本文来源吾爱破解论坛
本帖最后由 麦田孤望者 于 2020-1-28 18:45 编辑 Figure_5.png (130.07 KB, 下载次数: 0)
下载附件
保存到相册
Figure_4.png (129.92 KB, 下载次数: 0)
下载附件
保存到相册
这几天实在是太闲了...于是码了一个爬弹幕的程序
用的第三方库有: requests , pysimplegui , matplotlib
版本:一定是3.8 因为我有第42行用了海象操作符...当然那里改成return (cid_tuple,part,re.findall(re.compile('aid=\d{8}',re.S),url)[0][-8:])应该也是没问题的
生成效果
上代码
[Python] 纯文本查看 复制代码
import json import os import re import sys import time from fnmatch import fnmatch import matplotlib.pyplot as plt import PySimpleGUI as sg import requests from pylab import mpl mpl.rcParams['font.sans-serif'] = ['SimHei'] def findall(tx1,tx2,tx3): return re.findall(re.compile('{}(.*?){}'.format(tx1,tx2),re.S),tx3) def parse_1(url): try: aid = findall('https://www.bilibili.com/video/av','?spm_i',url)[0][:-1] except IndexError: aid = url[-8:] aid = re.findall(re.compile('\d{8}',re.S),aid)[0] #print('aid:',aid) res = requests.get('https://api.bilibili.com/x/player/pagelist?aid={}&jsonp=jsonp'.format(aid)) cid = res.json()['data'][0]['cid'] #print('cid:',cid) return (cid) def parse_2(url): cid_tuple = [] part=[] res = requests.get(url) html = findall('window.__kanzaki_ranko=','</script>',res.text)[0] cid_list = json.loads(html)['main']['pages'] #rint(cid_list) for i in cid_list: cid_tuple.append(i['cid']) part.append(i['part']) return (cid_tuple,part,aid:=re.findall(re.compile('aid=\d{8}',re.S),url)[0][-8:]) def parse_3(url): cid_list=[] title_list=[] res = requests.get(url).text html=findall('.__INITIAL_STATE__=',';\(function\(\)',res)[0] a = json.loads(html)['epList'] for i in a: cid_list.append(i['cid']) title_list.append(i['titleFormat']+' '+i['longTitle']) return (cid_list,title_list,'') def check(url,mode=''): if fnmatch(url,'https://www.bilibili.com/*') == False: print('请输入正确视频地址!') sys.exit() if fnmatch(url,'https://www.bilibili.com/video/*') == True: #print('1') return parse_1(url) elif fnmatch(url,'https://www.bilibili.com/blackboard/*') == True: #print('2') return parse_2(url) elif fnmatch(url,'https://www.bilibili.com/bangumi/*') == True: #print('3') return parse_3(url) def get_comment(cid): list1=[] list2=[] #print(cid) res = requests.get('http://comment.bilibili.com/{}.xml'.format(cid)) res.encoding='utf-8' comment_list = findall('<d','</d>',res.text) for i in comment_list: #print(i) params = findall('p="','">',i)[0] value = re.sub(params+'">','',i)[4:] params = params.split(',') list1.append(params) list2.append(value) a=(list1,list2) return(a) def check_again(tup,url): layout=[] if str(type(tup))=="<class 'tuple'>": cid=tup[0] for i,v in enumerate(tup[1]): layout.append([sg.Button(v,font='宋体.ttf')]) window2 = sg.Window('请确认第几话',layout=layout) a,b = window2.Read() for q,r in enumerate(tup[1]): if a == r: break anw = q window2.close() if int(anw) != i+1: cid = [str(tup[0][int(anw)])] title=str(tup[1][int(anw)]) else: res = requests.get(url) html=findall('<div id="viewbox_report" class="video-info report-wrap-module report-scroll-module">','class="video-title">',res.text)[0] title=findall('title="','"',html)[0] cid=[tup] return (cid,title) def main(): global video_url tx1 = sg.Text('视频链接:',font='宋体.ttf') txt = sg.Input('') bt1 = sg.Button('确定',font='宋体.ttf') window = sg.Window('哔哩哔哩弹幕',layout=[[tx1,txt,bt1]]) a,b = window.Read() video_url = b[0] window.close() cid = check_again(check(video_url),video_url) for i in cid[0]: a= get_comment(int(i)) title=cid[1] return (a,title) def bubbleSort(arr): n = len(arr) for i in range(n): for j in range(0, n-i-1): if arr[j][0] > arr[j+1][0] : arr[j], arr[j+1] = arr[j+1], arr[j] return arr def draw_(): mai=main() x,y=mai[0][0],mai[0][1] title=mai[1] list1=[] list2=[] for rr,i in enumerate(x): list1.append((int(float(i[0])),y[rr])) list2=bubbleSort(list1) max_time=list2[-1:][0][0] len_lis=len(list2) if max_time <=500: e = 10 elif max_time >500: e=30 elif max_time >1000: e=120 elif max_time >3000: e=300 a = max_time//e b = max_time%e c = a+1 list3=[] list4=[] d=0 for i in range(a): lambda_=[] for ii in list2: if i*e<ii[0]<=i*e+e: lambda_.append(ii) list3.append(len(lambda_)) list4.append(i*e) d+=len(lambda_) list3.append(len_lis-d) list4.append(max_time) x = range(len(list4)) plt.figure(figsize=(len(list4)-6,6.5)) plt.plot(x, list3, marker='o', mec='r', mfc='w',label=u'弹幕数量曲线图') plt.legend() # 让图例生效 plt.xticks(x, list4, rotation=20) plt.margins(0) plt.subplots_adjust(bottom=0.15) plt.xlabel(u"时间/s ") #X轴标签 plt.ylabel("数量") #Y轴标签 plt.title(title) #标题 plt.show() if __name__ == '__main__': try: draw_() except BaseException as e: print('ERROR:',e)
不写注释真的不是好习惯...还有...不要在意我的变量名...没想到合适的就用的abcde
看不懂的可以评论区问我
还有...求评分
Figure_5.png (130.07 KB, 下载次数: 0)
下载附件 保存到相册
2020-1-28 18:44 上传
版权声明:
本站所有资源均为站长或网友整理自互联网或站长购买自互联网,站长无法分辨资源版权出自何处,所以不承担任何版权以及其他问题带来的法律责任,如有侵权或者其他问题请联系站长删除!站长QQ754403226 谢谢。