本文来源吾爱破解论坛
本帖最后由 天空宫阙 于 2020-1-11 22:40 编辑
目标站点
https://www.tingchina.com/
直接上源码吧
[Python] 纯文本查看 复制代码
import requests from bs4 import BeautifulSoup import re from tqdm import tqdm import time import os class TingChina(): def __init__(self,id,strat_num): self.base_url = 'https://www.tingchina.com' self.id = id self.num = int(strat_num)-1 self.name_num = int(strat_num) self.Referer = '' self.host1 = "http://t44.tingchina.com" self.host2 = "http://t33.tingchina.com" self.book_name = '' def get_total_episode(self): url ='https://www.tingchina.com/yousheng/disp_{}.htm'.format(str(self.id)) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36', } response = requests.get(url,headers=headers) if response.status_code==200: response.encoding='gbk' soup = BeautifulSoup(response.text,'lxml') ul = soup.select('div.list > ul')[0] lis = ul.select('li') name = soup.select('body > div.wrap03.clearfix > div:nth-child(5) > div.main03 > div:nth-child(2) > div.book01 > ul > li:nth-child(1) > span > strong')[0].string return name,len(lis)-3 def get_flash_url(self): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36', } url = 'https://www.tingchina.com/yousheng/{}/play_{}_{}.htm'.format(str(self.id),str(self.id),str(self.num)) response = requests.get(url,headers=headers) if response.status_code==200: response.encoding='gbk' soup = BeautifulSoup(response.text,'lxml') src = soup.select('#playmedia')[0]['src'] self.Referer = url # print(src) # https://www.tingchina.com/play/yousheng/flash.asp?id=30391&inum=2&flei=都市言情&bookname=江湖岁月&filename=002_大闯我哥.mp3&rand=16&nexturl=play_30391_2.htm pattern_params = 'id=(\d+)&inum=(\d+)&flei=(.*?)&bookname=(.*?)&filename=(.*?)&' match_params = re.search(pattern_params,src) if match_params: info = { 'id':match_params.group(1), 'inum':match_params.group(2), 'flei':match_params.group(3), 'bookname':match_params.group(4), 'filename':match_params.group(5) } # print(info) real_address = self.host1+'/yousheng/{}/{}/{}'.format(info['flei'],info['bookname'],info['filename']) # print(real_address) return src,url,real_address def get_audio(self): '''get key 和 real_address拼接得到可以访问的地址''' temp_url,Referer,real_address =self.get_flash_url() # url = 'https://www.tingchina.com/play/yousheng/flash.asp?id={}&inum={}'.format(str(self.id),str(self.name_num)) url = self.base_url + temp_url headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36', 'Referer': Referer } response = requests.get(url,headers=headers) if response.status_code==200: # print(response.apparent_encoding) response.encoding='utf-8' matched = re.search('url\[3\]= ".*?(key=.*?)";',response.text,re.S) if matched: # print(matched.group(1)) return(real_address+'?'+matched.group(1)) def download(self): url = self.get_audio() print(url) if url: downloadFILE(url,os.path.join(self.book_name,str(self.name_num).zfill(4)+'.mp3'),self.Referer) def run(self): name,total_episode = self.get_total_episode() print('书名:',name,'集数:',total_episode) self.book_name = name if not os.path.exists(name): os.makedirs(name) while True: try: if self.name_num > total_episode: print('已经下载完成','all assignments done!') break self.download() except Exception as e: print(e) with open('log.txt','a',encoding='utf-8') as f: f.write(str(self.name_num)+str(e)+'\n') self.num+=1 self.name_num+=1 def downloadFILE(url,name,Referer): headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36', 'Referer': Referer } resp = requests.get(url=url,stream=True,headers=headers) content_size = int(int(resp.headers['Content-Length'])/1024) with open(name, "wb") as f: print("Pkg total size is:",content_size,'k,start...') for data in tqdm(iterable=resp.iter_content(1024),total=content_size,unit='k',desc=name): f.write(data) print(name , "download finished!") if __name__ == "__main__": print('例如:20459 1','下载id为20459的有声书从第1集开始下载') id,start_num = input('请输入id和起始下载集数用空格隔开').split(' ') if id and start_num: # t = TingChina(有声书id,起始下载集,如1时从第一集开始下载) t = TingChina(int(id),int(start_num)) t.run() else: print('请输入正确的id和起始下载集数用空格隔开')
思考
1.总的思路是从flash播放的接口中获得音频的真实地址
https://www.tingchina.com/play/yousheng/flash.asp?id=30255&inum=958。。。
总思路.jpg (97 KB, 下载次数: 0)
下载附件 保存到相册
url1或url2和url3拼接就是真实地址
2.推测由于网站编码为GBK,通过这个接口传入参数为中文时返回的url3中有乱码
于是从这个接口仅获取key值,其他的参数自行拼接
real_address = self.host1+'/yousheng/{}/{}/{}'.format(info['flei'],info['bookname'],info['filename'])
real_address+key即为可以访问的音频地址
3.尝试使用python的类来写,代码可能稍微有点乱
下载地址
py文件和封装后的exe文件:https://www.lanzous.com/i8lqsmf新版本链接:https://www.52pojie.cn/thread-1089526-1-1.html
使用方法
1.输入有声书的id和起始下载的集数用空格隔开
例如:20459 1','下载id为20459的有声书从第1集开始下载
id如下图
id.jpg (3.21 KB, 下载次数: 0)
下载附件 保存到相册
id和起始下载的集数
输入2.jpg (32.01 KB, 下载次数: 0)
下载附件 保存到相册
如果觉得可以免费评下分!
本帖被以下淘专辑推荐: · 实用工具|主题: 132, 订阅: 39 · 值得学习|主题: 97, 订阅: 21
版权声明:
本站所有资源均为站长或网友整理自互联网或站长购买自互联网,站长无法分辨资源版权出自何处,所以不承担任何版权以及其他问题带来的法律责任,如有侵权或者其他问题请联系站长删除!站长QQ754403226 谢谢。
- 上一篇: 小说爬虫
- 下一篇: 【分享】python-字符串去重的5种方法