本文来源吾爱破解论坛
本帖最后由 天空宫阙 于 2020-1-11 22:30 编辑
目标站点
https://www.tingchina.com/
上一个版本链接
https://www.52pojie.cn/thread-1089351-1-1.html
更新内容
1.上个版本只支持有声书,这个理论上支持听中国上的所有音频(包括有声书,评书,相声等),但未全部测试大概率会有不能下载的
py文件和exe文件下载链接
https://www.lanzous.com/i8m05xi
使用方法
1.输入目录页链接,如https://www.tingchina.com/pingshu/disp_1742.htm
小技巧:开启cmd快速编辑右键粘贴(看方法一即可):https://jingyan.baidu.com/article/c85b7a64618eb3003bac95d1.html
2.输入起始下载集数(第一页开始直接回车)
如果觉得可以免费评下分!
源码
[Python] 纯文本查看 复制代码
import requests from bs4 import BeautifulSoup import re from tqdm import tqdm import time import os class TingChina(): def __init__(self,category,id,strat_num): self.base_url = 'https://www.tingchina.com' self.category = category self.id = id self.num = int(strat_num)-1 self.name_num = int(strat_num) self.Referer = '' self.host1 = "http://t44.tingchina.com" self.host2 = "http://t33.tingchina.com" self.book_name = '' def get_total_episode(self): url ='https://www.tingchina.com/{}/disp_{}.htm'.format(self.category,str(self.id)) print(url) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36', } response = requests.get(url,headers=headers) if response.status_code==200: response.encoding='gbk' soup = BeautifulSoup(response.text,'lxml') ul = soup.select('div.list > ul')[0] lis = ul.select('li') for i in range(len(lis)-1,len(lis)-4,-1): matched = re.search('play.*?_(\d+)\.htm',str(lis[i])) if matched: num = int(matched.group(1)) break name = soup.select('title')[0].string return name,num+1 def get_flash_url(self): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36', } second_url = 'https://www.tingchina.com/{}/play/play_{}_{}.htm'.format(self.category,str(self.id),str(self.num)) url = 'https://www.tingchina.com/{}/{}/play_{}_{}.htm'.format(self.category,str(self.id),str(self.id),str(self.num)) response = requests.get(url,headers=headers) if response.status_code==200: response.encoding='gbk' return response.text,url else: response = requests.get(second_url,headers=headers) if response.status_code==200: response.encoding='gbk' return response.text,second_url def parse_flash_url(self): html,url = self.get_flash_url() soup = BeautifulSoup(html,'lxml') src = soup.select('#playdiv')[0].iframe['src'] self.Referer = url flei_matched = re.search('flei=(.*?)&',src) bookname_matched = re.search('bookname=(.*?)&',src) filename_matched = re.search('filename=(.*?)&',src) info = {} if flei_matched: info['flei'] = flei_matched.group(1) if bookname_matched: info['bookname'] = bookname_matched.group(1) if filename_matched: info['filename'] = filename_matched.group(1) if len(info)==3: real_address = self.host1+'/{}/{}/{}/{}'.format(self.category,info['flei'],info['bookname'],info['filename']) elif len(info)==2: if not 'flei' in info.keys(): real_address = self.host1+'/{}/{}/{}'.format(self.category,info['bookname'],info['filename']) if not 'bookname' in info.keys(): real_address = self.host1+'/{}/{}/{}'.format(self.category,info['flei'],info['filename']) else: real_address = self.host1+'/{}/{}'.format(self.category,info['filename']) # print('real_address',real_address) return src,url,real_address def get_audio(self): '''get key 和 real_address拼接得到可以访问的地址''' temp_url,Referer,real_address =self.parse_flash_url() url = self.base_url + temp_url headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36', 'Referer': Referer } response = requests.get(url,headers=headers) if response.status_code==200: # print(response.apparent_encoding) response.encoding='utf-8' matched = re.search('url\[3\]= ".*?(key=.*?)";',response.text,re.S) if matched: # print(matched.group(1)) return(real_address+'?'+matched.group(1)) def download(self): url = self.get_audio() print(url) if url: downloadFILE(url,os.path.join(self.book_name,str(self.name_num).zfill(4)+'.mp3'),self.Referer) def run(self): name,total_episode = self.get_total_episode() print('书名:',name,'集数:',total_episode) self.book_name = name if not os.path.exists(name): os.makedirs(name) while True: if self.name_num > total_episode: print('all assignments done!') break try: self.download() except Exception as e: print(self.name_num,e) with open('log.txt','a',encoding='utf-8') as f: f.write(str(self.name_num)+str(e)+'\n') self.num+=1 self.name_num+=1 def downloadFILE(url,name,Referer): headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36', 'Referer': Referer } resp = requests.get(url=url,stream=True,headers=headers) content_size = int(int(resp.headers['Content-Length'])/1024) with open(name, "wb") as f: print("Pkg total size is:",content_size,'k,start...') for data in tqdm(iterable=resp.iter_content(1024),total=content_size,unit='k',desc=name): f.write(data) print(name , "download finished!") if __name__ == "__main__": # disp_url = 'https://www.tingchina.com/yousheng/disp_21501.htm' disp_url = input('请输入目录页链接如:https://www.tingchina.com/yousheng/disp_21501.htm:') matched_category_id = re.search('tingchina\.com/(\w+)/disp_(\d+).htm',disp_url) if matched_category_id: category = matched_category_id.group(1) id = int(matched_category_id.group(2)) if id and category: start_num = input('请输入开始下载的集数(直接回车从第一集开始下载)') if start_num: t = TingChina(category,id,int(start_num)) t.run() else: t = TingChina(category,id,1) t.run() else: print('输入的链接无法解析') # pyinstaller --onefile --windowed --icon=bitbug_favicon.ico tingchina_v0.3.py # pyinstaller -F -i bitbug_favicon.ico tingchina_v0.3.py # t = TingChina('yousheng',21501,143) # t = TingChina('pingshu',1660,126) # t = TingChina('xiangsheng',12567,1) # t = TingChina('erge',433,12) # t = TingChina('xiaohua',233,248) # t.run()
点评 妇女之友 没有32位的吗 发表于 2020-1-12 16:51
版权声明:
本站所有资源均为站长或网友整理自互联网或站长购买自互联网,站长无法分辨资源版权出自何处,所以不承担任何版权以及其他问题带来的法律责任,如有侵权或者其他问题请联系站长删除!站长QQ754403226 谢谢。