声明:该公众号大部分文章来自作者日常学习笔记,也有少部分文章是经过原作者授权和其他公众号白名单转载,未经授权,严禁转载,如需转载,联系刘一手
请勿利用文章内的相关技术从事非法测试,如因此产生的一切不良后果与文章作者和本公众号无关。仅供学习研究
使用编程语言
python
使用到的模块
os,requests,lxml,re
爬取的网站首页如下
https://www.vmgirls.com
代码如下
import os
import requests
from lxml import etree
import re
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.59',
}
# 保存文件
def xzwj(xz, head, path):
w_na = xz.split("/")[-1]
w_rar = requests.get(xz, headers=head).content
with open(path + f'//{w_na}', 'wb') as f:
f.write(w_rar)
print("正在下载,请耐心等待。。。")
print(w_na, "下载完成")
# 解释压缩文件网址
def rar_xz(rar_url, head):
xz_res = requests.get(rar_url, headers=head).text
xz_rar = re.compile(r'window.location='(.*?)'')
xz = xz_rar.findall(xz_res)[0]
return xz
def xzxz(xx):
print("要下载的内容和网址是:", pna[xx - 1], pli[xx - 1])
print("---------开始下载展示图片------------")
r_resp = requests.get(pli[xx - 1], headers=headers)
r_tree = etree.HTML(r_resp.text)
r_imgs = r_tree.xpath('//div/div/p/img/@src')
r_nas = r_tree.xpath('//div/div/p/img/@title')
rar_url = r_tree.xpath('//div/div[@class="pay-box"]/a/@href')[0]
n = 1
gg=input('请输入保存的路径(默认c://,注意:需改为://):')
if gg =='':
gg='c://'
path = f"{gg+r_nas[0]}"
if not os.path.exists(path):
os.mkdir(path)
head = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.59',
'referer': pli[xx - 1]
}
for img in r_imgs:
xzwj(img, head, path)
print(f"{r_nas[0][:-2]},第{n}张下载完成---")
n += 1
print("共", len(r_imgs), "张下载完成")
print("==============开始下载压缩文件============")
xz = rar_xz(rar_url, head)
xzwj(xz, head, path)
def main_xz():
print("1.性感美女 2.清纯可爱 3.性感御姐 4.制服诱惑")
ms = input("请选择分类:")
url1 = "https://dimgw.us/xinggan"
url2 = "https://dimgw.us/qc"
url3 = "https://dimgw.us/yj"
url4 = "https://dimgw.us/zf"
if int(ms) == 1:
url = url1
elif int(ms) == 2:
url = url2
elif int(ms) == 3:
url = url3
elif int(ms) == 4:
url = url4
resp = requests.get(url, headers=headers)
pages = re.findall(r'<a class="page-numbers" href="(.*?)</a>', resp.text)[-1]
page = pages.split(">")[-1]
print("此类共", page, "页!")
print("*" * 50)
pna = []
pli = []
for p in range(1, int(page) + 1):
purl = url + f"/page/{p}"
presp = requests.get(purl, headers=headers)
tree = etree.HTML(presp.text)
rw_li = tree.xpath('//div[@class="row posts-wrapper"]//div/a[@target="_blank"]/@href')
rw_na = tree.xpath('//div[@class="row posts-wrapper"]//div/a[@target="_blank"]/img/@alt')
pna.extend(rw_na)
pli.extend(rw_li)
print("-" * 50, "n", "共有以下美女可选:")
for na in pna:
print(pna.index(na) + 1, na)
return pna, pli
pna, pli = main_xz()
var = 1
while var != len(pna):
xx = int(input("请输入要下载的序号(0退出):"))
if xx != 0:
xzxz(xx)
else:
print("退出")
break
var += 1
运行效果
有所激有所逼”而成!只要你想,你就可以!
原文始发于微信公众号(鹏组安全):python爬取漂亮小姐姐图片
免责声明:文章中涉及的程序(方法)可能带有攻击性,仅供安全研究与教学之用,读者将其信息做其他用途,由读者承担全部法律及连带责任,本站不承担任何法律及连带责任;如有问题可邮件联系(建议使用企业邮箱或有效邮箱,避免邮件被拦截,联系方式见首页),望知悉。
- 左青龙
- 微信扫一扫
-
- 右白虎
- 微信扫一扫
-
评论