本文来源吾爱破解论坛
本帖最后由 wushaominkk 于 2019-6-6 21:17 编辑
【请勿用于商业用途】
虎牙直播源m3u8
新手可以学习一下
一开始用的直接通过网页获取房间id
找不到翻页的, 然后换了思路,从接口
上获取直播间的id
这个代码可以爬其他板块的,其他板块的时候,
可能会遇到报错,稍微改一下就行了
导出txt文档,代码已补充
[Python] 纯文本查看 复制代码
import requests import re import json from lxml import etree class Huya_live: def __init__(self): # self.list_url = [] self.id = [] self.headers = { "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36", } def get_m3u8(self): for id in self.id: url = "https://www.huya.com/" + id response = requests.get(url, headers=self.headers) html = response.text # title = re.findall("<title>.*|\r\n</title>", html)[0] html_etree = etree.HTML(html) title = html_etree.xpath('//h1[@id="J_roomTitle"]/text()')[0] title = re.sub("<title>|</title>|\r|\n", "", title) regex = re.compile(r"{.*}")#匹配{}格式的内容 strs = re.findall(regex, html) if id == "523923" or id == "627859" or id == "880219" or id == "11602041" or id == "11352941" or id == "11352969" or id == "880203" or id == "11342439" or id == "17089810": str = strs[1].replace("(|)", "")[:-1] else: str = strs[2].replace("(|)", "")[:-1] str = json.loads(str) try: streamName = "/" + str['data'][0]['gameStreamInfoList'][1]['sStreamName'] m3u8 = str['data'][0]['gameStreamInfoList'][1]['sHlsUrl'] # flv = str['data'][0]['gameStreamInfoList'][1]['sFlvUrl'] print(title, "\t\t\t\t", m3u8+streamName + ".m3u8") except: print(url, "获取失败") continue self.put_write( m3u8 + streamName + ".m3u8", title) # def get_hrefs(self): # base_url = "https://www.huya.com/g/seeTogether" # response = requests.get(base_url, headers=self.headers).text # html = etree.HTML(response) # hrefs = html.xpath('//li[@class="game-live-item"]/a/@href') # for href in hrefs[::2]: # self.list_url.append(href) def get_romId(self): for i in range(1, 5): yiqikan_Api = "https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&gameId=2135&tagAll=0&page=%d" % i # xingshow_api = "https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&gameId=1663&tagAll=0&page=%d" % i response = requests.get(yiqikan_Api, headers=self.headers).text html = json.loads(response) ids = html['data']['datas'] for id in ids: self.id.append(id['profileRoom']) def put_write(self, m3u8, title): str = title + "***"*20 + m3u8 + "\n" with open("./虎牙直播源.txt", 'a+') as f: f.write(str) if __name__ == '__main__': huya = Huya_live() # huya.get_hrefs() huya.get_romId() huya.get_m3u8()
更新后的代码, 其实改了一下正则表达式, 就改了一点, 应该没有报错了
[Python] 纯文本查看 复制代码
import re import requests import json from lxml import etree class Huya_live: def __init__(self): # self.list_url = [] self.id = [] self.headers = { "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36", } def get_m3u8(self): for id in self.id: url = "https://www.huya.com/" + id response = requests.get(url, headers=self.headers) html = response.text # title = re.findall("<title>.*|\r\n</title>", html)[0] html_etree = etree.HTML(html) title = html_etree.xpath('//h1[@id="J_roomTitle"]/text()')[0] title = re.sub("<title>|</title>|\r|\n", "", title) regex = re.compile(r"{\"status\"(.*)};") # 匹配{}格式的内容 strs = re.findall(regex, html) try: a = list(list(strs)[0]) a[4] = "{" str = "".join(a[4:]) str = json.loads(str) streamName = "/" + str['data'][0]['gameStreamInfoList'][1]['sStreamName'] m3u8 = str['data'][0]['gameStreamInfoList'][1]['sHlsUrl'] # flv = str['data'][0]['gameStreamInfoList'][1]['sFlvUrl'] print(title, "\t\t\t\t", m3u8 + streamName + ".m3u8") except: print(url, "直播间没有人直播噢, 不信你去看看 网址为%s" % url) continue self.put_write(m3u8 + streamName + ".m3u8", title) def get_romId(self): for i in range(1, 5): yiqikan_Api = "https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&gameId=2135&tagAll=0&page=%d" % i # xingshow_api = "https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&gameId=1663&tagAll=0&page=%d" % i response = requests.get(yiqikan_Api, headers=self.headers).text html = json.loads(response) ids = html['data']['datas'] for id in ids: self.id.append(id['profileRoom']) def put_write(self, m3u8, title): str = title + "***" * 20 + m3u8 + "\n" with open("./虎牙直播源.txt", 'a+') as f: f.write(str) if __name__ == '__main__': huya = Huya_live() huya.get_romId() huya.get_m3u8()
求点各位的
版权声明:
本站所有资源均为站长或网友整理自互联网或站长购买自互联网,站长无法分辨资源版权出自何处,所以不承担任何版权以及其他问题带来的法律责任,如有侵权或者其他问题请联系站长删除!站长QQ754403226 谢谢。