本文来源吾爱破解论坛
本帖最后由 null119 于 2019-7-10 20:15 编辑 QQ截图20190709154934.png (63.52 KB, 下载次数: 0)
下载附件
保存到相册
图片镇楼
获取豆瓣高分电影(500部)写入MySQL数据库:
[Python] 纯文本查看 复制代码
#-*-coding:UTF-8-*- import requests import json import pymysql from lxml import etree def GetWriteData(url,num): conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='root', db='mydata') cursor = conn.cursor() cursor.execute("DROP TABLE IF EXISTS db") #如果db Base存在则删除 cursor.execute('create table db ( id int(5) primary key, pname varchar(255), pf varchar(10) ,plnum int(10),zhpf int(10))') i = 0 while i < num: html = requests.get(url + str(i)) if '异常请求' in html.text: print('请求过于频繁,请稍后再试!') break text = json.loads(html.text[12:-1]) i += 20 print(i,round(i/20)) for j in text: try: s = requests.get(j['url']) except: print('访问异常') else: ehtml = etree.HTML(s.text) nu = int(''.join(ehtml.xpath('//*[@id="interest_sectl"]/div[1]/div[2]/div/div[2]/a/span/text()'))) zhpf=round(float(nu /10000 * 0.5)) + round(float(j['rate']) * 2) cursor.execute('INSERT INTO db(id,pname,pf,plnum,zhpf) values (%s, %s, %s ,%s,%s) ',[k,j['title'],j['rate'],nu,zhpf]) cursor.rowcount conn.commit() cursor.close() conn.close() print('任务结束。') if __name__ == '__main__': url = 'https://movie.douban.com/j/search_subjects?type=movie&tag=豆瓣高分&sort=recommend&page_limit=20&page_start=' print('开始获取,请稍候...') GetWriteData(url,500) # 500为获取影片数量
matplotlib简单可视化:
[Python] 纯文本查看 复制代码
import pymysql import matplotlib.pyplot as plt conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='root', db='mydata',cursorclass = pymysql.cursors.DictCursor) cursor = conn.cursor() cursor.execute('select * from mydata.db order by zhpf desc;') values = cursor.fetchall() cursor.close() conn.close() namedata=[] zhpf=[] n=0 for i in values: if n>20:break namedata.append(i['pname']) zhpf.append(i['zhpf']) n+=1 plt.rcParams["font.sans-serif"]=["SimHei"] plt.rcParams["axes.unicode_minus"]=False plt.barh(y=namedata, width=zhpf, label='综合评分',color='steelblue', alpha=0.8, height=0.8) for y, x in enumerate(zhpf): plt.text(x+2, y-0.4, '%s' % x, ha='center', va='bottom') plt.xlabel("分值") plt.ylabel("片名") plt.legend() plt.show()
版权声明:
本站所有资源均为站长或网友整理自互联网或站长购买自互联网,站长无法分辨资源版权出自何处,所以不承担任何版权以及其他问题带来的法律责任,如有侵权或者其他问题请联系站长删除!站长QQ754403226 谢谢。
- 上一篇: 神经网络-手写数字识别学习笔记
- 下一篇: 【分享】Scrapy采集网站的全部小说