本文来源吾爱破解论坛
import string
import urllib
import requests
from lxml import etree
url = "http://www.xiaohuar.com/hua/"
proxies = {
# 'http':'http://183.196.170.247:9000/',
# "http": "111.29.3.190:80"
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
}
rep = requests.get(url, headers=headers, proxies=proxies)
html_content = rep.content.decode(encoding='gb2312')
# print(html_content)
dom = etree.HTML(html_content)
# xh_photos_href = dom.xpath('//div[@id="list_img"]//div[@class="img"]/a/@href')
# http://www.xiaohuar.com/
xh_photos_src = dom.xpath('//div[@id="list_img"]//div[@class="img"]/a/img/@src')
xh_photos_info = dom.xpath('//div[@id="list_img"]//a/img/@alt')
# 下一页
xh_photos_href = dom.xpath('//body//div[@id="list_img"]//div[@id="page"]/div/a/@href')
# 判断尾页
xh_photos_a_text = dom.xpath('//body//div[@id="list_img"]//div[@id="page"]/div/a/text()')
print(xh_photos_a_text[-2])
i = 1
while xh_photos_a_text[-2] == '下一页':
for url, alt_info in zip(xh_photos_src, xh_photos_info):
# img_url=c.strip('//')
src_name = url.split('.')
print(src_name)
img_name = alt_info + '.' + src_name[-1]
print(img_name)
url = 'http://www.xiaohuar.com/' + url
url = urllib.parse.quote(url, safe=string.printable)
rep = requests.get(url, headers=headers)
with open(r'I:\Pchong\pc_image\flowers\\' + img_name.replace("/", "_"), 'wb')as f:
f.write(rep.content)
print('图片保存完毕!!')
print('-------第' + str(i) + '图片爬取完毕---------')
i += 1
print("下一页地址:",xh_photos_href[-2])
print("下一页地址:",xh_photos_href)
rep = requests.get(xh_photos_href[-2], headers=headers, proxies=proxies)
html_content = rep.content.decode(encoding='gbk')
# print(html_content)
dom = etree.HTML(html_content)
# xh_photos_href = dom.xpath('//div[@id="list_img"]//div[@class="img"]/a/@href')
# http://www.xiaohuar.com/
xh_photos_src = dom.xpath('//div[@id="list_img"]//div[@class="img"]/a/img/@src')
xh_photos_info = dom.xpath('//div[@id="list_img"]//a/img/@alt')
# 下一页
xh_photos_href = dom.xpath('//body//div[@id="list_img"]//div[@id="page"]/div/a/@href')
# 判断尾页
xh_photos_a_text = dom.xpath('//body//div[@id="list_img"]//div[@id="page"]/div/a/text()')
1582338980(1).jpg (145.18 KB, 下载次数: 1)
下载附件 保存到相册
image.png (1010 Bytes, 下载次数: 0)
下载附件 保存到相册
版权声明:
本站所有资源均为站长或网友整理自互联网或站长购买自互联网,站长无法分辨资源版权出自何处,所以不承担任何版权以及其他问题带来的法律责任,如有侵权或者其他问题请联系站长删除!站长QQ754403226 谢谢。
- 上一篇: MobaXterm算法注册机
- 下一篇: 利用selenium爬取bilibili评论