本文来源吾爱破解论坛
刚学Python,正好朋友想把“乐队我做东”下载来看,就写个程序练练
[Python] 纯文本查看 复制代码
#!/usr/bin/env python3 # -*- coding: utf-8 -*- r''' =========================== * 乐队我做东 m3u8 * =========================== ''' import os, sys, urllib, requests, re, random from lxml import etree from time import sleep def getWorkdir(subdir): if os.name == 'nt': #电脑 workdir = os.path.join('F:\\BaiduNetdiskDownload', subdir) elif os.name == 'posix': #手机 workdir = os.path.join('/storage/emulated/0/Download', subdir) else: workdir = os.path.join(os.getcwd(), subdir) if not os.path.exists(workdir): os.mkdir(workdir) return workdir def getM3U8(p_title): print('\n%s' % p_title) #解析期页面,得到m3u8链接 try: p_content = requests.get(host_toc+phase_urls[phase_titles.index(p_title)], headers=header, timeout=30).content.decode('utf-8', errors='ignore') except: print(' ... failed opening phase page ... %s' % host_toc+phase_urls[phase_titles.index(p_title)]) return url_m3u8 = re.findall(r'cms_player = {"yun":true,"url":"(.*?)"', p_content, re.S)[0].replace('\/', '/') parsed_flag, m3u8_items, host_m3u8 = reParser_m3u8(url_m3u8) if parsed_flag == 'Y': getTS(p_title, m3u8_items, host_m3u8) else: print(' ... failed parsing m3u8 content ... %s' % url_m3u8) def reParser_m3u8(url): try: m3u8_content = requests.get(url, headers=header, timeout=30).content.decode('utf-8', errors='ignore') except: return 'N', [], '' if re.findall('^(\S+m3u8)$', m3u8_content, re.M): #如果有嵌套的m3u8 sub_url_m3u8 = os.path.split(url)[0] + '/' + re.findall('^(\S+m3u8)$', m3u8_content, re.M)[0] return reParser_m3u8(sub_url_m3u8) else: #解析并返回播放列表(ts文件序列) return 'Y', re.findall('^(\S+ts)$', m3u8_content, re.M), os.path.split(url)[0]+'/' def getTS(p_title, m3u8_items, host_m3u8): with open(os.path.join(workdir, '%s.txt' % p_title), 'w', encoding='utf-8') as f: for ts in m3u8_items: f.write(host_m3u8+ts+'\n') print(' ... playlist fetched') #下载ts文件 print(' ... downloading ts ... %d files' % len(m3u8_items)) p_dir = os.path.join(workdir, p_title) if not os.path.exists(p_dir): os.mkdir(p_dir) flag_combine = True for ts in m3u8_items: try: ts_resp = requests.get(host_m3u8+ts).content except: print(' ... failed reading ts ... %s' % host_m3u8+ts) flag_combine = False else: try: with open(os.path.join(p_dir, os.path.split(ts)[1]), 'wb') as f: f.write(ts_resp) except: print(' ... failed saving ts ... %s' % os.path.split(ts)[1]) flag_combine = False #合并ts文件 if flag_combine: print(' ... combining ts') os.chdir(p_dir) os.system('copy/b *.ts %s.ts' % p_title) print(' ... done') else: print(' ... pls check failed ts & manually combine\n ... done') print(__doc__) subdir = 'Band' workdir = getWorkdir(subdir) headerpool = ['Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:49.0) Gecko/20100101 Firefox/49.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36', 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10.5; en-US; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:50.0) Gecko/20100101 Firefox/50.0', 'Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/61.0', 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0', 'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0', 'Mozilla/5.0 (Windows NT 6.1; rv:50.0) Gecko/20100101 Firefox/50.0', 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36', 'Mozilla/5.0 (X11; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; Trident/5.0)', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)', 'Mozilla/5.0 (iPad; CPU OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0 Mobile/14B100 Safari/602.1', 'Opera/8.0 (Windows NT 5.1; U; en)'] header = dict() header["user-agent"] = random.choice(headerpool) #目录页 url_toc = 'https://www.116bt.com/vodshow/4193.html' host_toc = urllib.parse.urlsplit(url_toc)[0] + '://' + urllib.parse.urlsplit(url_toc)[1] #解析目录页,得到每期的名字、链接 toc = requests.get(url_toc, headers=header, timeout=30).content.decode('utf-8', errors='ignore') toc_html = etree.HTML(toc) phase_urls = toc_html.xpath('//ul[@class="detail-play-list clearfix tab-pane ff-playurl ff-playurl-tab-2 fade"]/li/a/@href') phase_titles = toc_html.xpath('//ul[@class="detail-play-list clearfix tab-pane ff-playurl ff-playurl-tab-2 fade"]/li/a/@title') for p_title in phase_titles: print(p_title) phase_titles.append('q') phase_titles.append('a') #选择要下载的期号 while True: phase_chosen = '' while phase_chosen not in phase_titles: phase_chosen = input('\nWhich one to get? (a)ll? (q)uit? ... ') if phase_chosen == 'q': sys.exit() elif phase_chosen == 'a': for p_title in phase_titles: getM3U8(p_title) sleep(2) else: getM3U8(phase_chosen) sleep(2)
版权声明:
本站所有资源均为站长或网友整理自互联网或站长购买自互联网,站长无法分辨资源版权出自何处,所以不承担任何版权以及其他问题带来的法律责任,如有侵权或者其他问题请联系站长删除!站长QQ754403226 谢谢。
- 上一篇: 文泉学堂所有书bid
- 下一篇: python爬虫,爬取58同城数据