本文来源吾爱破解论坛
本帖最后由 mikeee 于 2020-2-4 15:55 编辑
[Asm] 纯文本查看 复制代码
''' python 3.6, 3.7 pip install httpx loguru pyjwt tqdm # 文泉学堂 # 3208943 ('Python+TensorFlow机器学习实战', '248') 第1页 python fetch_png.py # 第10页 python fetch_png.py 3208943 10 ''' # from typing import Union, Tuple from pathlib import Path from time import time import json import httpx import jwt from tqdm import trange from loguru import logger JWT_SECRET = 'g0NnWdSE8qEjdMD8a1aq12qEYphwErKctvfd3IktWHWiOBpVsgkecur38aBRPn2w' SESS = httpx.Client() URL = 'https://lib-nuanxin.wqxuetang.com' SESS.get(URL) # def gen_jwt_key(self): def gen_jwt_key(bookid): ''' jwt key for bookid ''' # url = "https://lib-nuanxin.wqxuetang.com/v1/read/k?bid=%s" % bookid url = f'{URL}/v1/read/k?bid={bookid}' # r = self.session.get(url, timeout=5) # r = SESS.get(url, timeout=5) # j = json.loads(r.text) try: resp = SESS.get(url) resp.raise_for_status() except Exception as exc: logger.warning(exc) return str(exc) try: jdata = resp.json() except Exception as exc: logger.warning(exc) jdata = {} res = jdata.get('data') if res is None: raise Exception('returned None, something is not right...') return res # def gen_jwt_token(self, page): def gen_jwt_token(bookid, page=1): ''' gen jwt token ''' cur_time = time() jwtkey = gen_jwt_key(bookid) jwttoken = jwt.encode( { "p": page, "t": int(cur_time) * 1000, "b": str(bookid), "w": 1000, # "k": json.dumps(self.jwtkey), "k": json.dumps(jwtkey), "iat": int(cur_time), }, JWT_SECRET, algorithm='HS256', ).decode('ascii') return jwttoken # def bookinfo(self): def bookinfo(bookid): ''' bookinfo ''' # url = f"https://lib-nuanxin.wqxuetang.com/v1/read/initread?bid={self.bookid}" # noqa url = f'{URL}/v1/read/initread?bid={bookid}' # r = self.session.get(url) try: bookid = int(bookid) except Exception as exc: logger.warning(f'error: {exc}, setting bookid to 1') raise if bookid < 1: # logger.warning(f' bookid {bookid} < 1, setting bookid to 1') # bookid = 1 raise Exception(' bookid must be bigger than zero') req = httpx.models.Request('GET', URL) try: resp = SESS.get(url) resp.raise_for_status() except Exception as exc: logger.warning(exc) resp = httpx.Response( status_code=499, request=req, content=str(exc).encode() ) try: jdata = resp.json() except Exception as exc: logger.warning(exc) jdata = {} # info = json.loads(r.text) # data = info['data'] # return data['name'], data['canreadpages'] data = jdata.get('data') if data is None: raise Exception( 'returned None, something is not right...可能无此书号,也有可能是网络有问题或IP被限制……' # noqa ) bookinfo.jdata = jdata return data.get('name'), data.get('canreadpages') # async def download_img(self, page, task_id): def fetch_png(bookid, page=1): ''' download booid page img ''' token = gen_jwt_token(bookid, page) url = f'{URL}/page/img/{bookid}/{page}?k={token}' headers = { 'referer': f'{URL}/read/pdf/{bookid}', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36', # noqa } req = httpx.models.Request('GET', URL) count = 0 while 1: try: resp = SESS.get(url, headers=headers) resp.raise_for_status() break except Exception as exc: logger.warning(exc) resp = httpx.Response( status_code=499, request=req, content=str(exc).encode() ) count += 1 if count > 3: break else: logger.warning(' We tried hard (4 times), giving up') raise Exception('Failed... ') fetch_png.resp = resp try: res = resp.content except Exception as exc: logger.warning(exc) return b'' return res def fetch_book(bookid, page1=1, page2=None): ''' fetch book with bookid page1 to page2 ''' # ''' try: last_page = int(bookinfo(bookid)[1]) except Exception as exc: logger.error(exc) raise if page2 is None: page2 = last_page for elm in trange(page1, page2 + 1): # if elm > 100: break filename = f'{bookid}-{elm:03d}.png' if Path(filename).exists(): logger.info(f'{filename} already exists, sipping....') else: logger.info(f' Fetching {filename}...') count = 0 while 1: try: png = fetch_png(bookid, elm) Path(filename).write_bytes(png) break except Exception as exc: count += 1 if count > 2: break else: logger.warning(' Page {page} probbaly missing') def main(): # pylint: disable=too-many-branches ''' main ''' import os # pylint: disable=unused-import import sys # pylint: disable=unused-import bookid_str = '' yes_list = ['是', 'y', 'qui', '对', '好'] logger.info( r''' 访问 https://lib-nuanxin.wqxuetang.com/ 搜索点击感兴趣的书,例如 《计算机网络基础》 https://lib-nuanxin.wqxuetang.com/#/Book/2175744 网址尾部的数字2175744 即为书号 bookid。将bookid拷至 系统剪贴板或记住该数字。 ''' ) while 1: bookid_str = input('输入书号bookid:(例如 123, 退出输入 q 或 x) ') if any(map(lambda x: bookid_str.lower().startswith(x), ['q', 'x'])): break if not bookid_str.strip(): continue try: bookid = int(bookid_str) except Exception as exc: logger.warning(f'\n\t 无效书号:{bookid_str}, 重新输入') continue if bookid < 1: logger.warning(f'\n\t 无效书号:{bookid_str}, 重新输入') continue count = 0 success = False while 1: try: logger.info('\n\t\t diggin...') info = bookinfo(bookid) success = True break except Exception as exc: logger.error(exc) count += 1 if count > 3: logger.info('\n\t 事不过三,还是算了吧') break cont_or_not = input('再试一次?确认输入 y, 输入其他重输书号 ') if not cont_or_not.strip(): continue if any(map(lambda x: cont_or_not.lower().startswith(x), yes_list)): continue else: break if not success: continue logger.info(bookinfo.jdata) logger.info(f'\n\t\t 下载 {info}? ') yes_no = input('确认输入 y, 输入其他重输书号 ') if any(map(lambda x: yes_no.lower().startswith(x), yes_list)): logger.info( f''' 开始下载 {info}... 一般情况下(例如网络正常、服务器不太忙,IP未被限制) 平均每100页约需15-20分钟)。 终断(ctrl-C或ctrl-brea)程序后,已下载的页不会被覆盖。 因此,如发现下载的页有问题或有些页未成功下载,可以删掉 再用相同的 bookid 运行一次程序 python fetch_png.py ''' ) fetch_book(bookid) ''' bookid = 3208943 if not sys.argv[1:]: logger.info(' Provide at least a bookid.') logger.info(' Using %s to test ' % bookid) else: try: bookid = sys.argv[1] except Exception as exc: logger.warning(exc) page = 1 if not sys.argv[2:]: logger.info(' Provide a page number.') logger.info(' Using %s to test ' % page) else: try: bookid = sys.argv[2] except Exception as exc: logger.warning(exc) logger.info(f' Fetchiing {bookid} {bookinfo(bookid)} page: {page}') res = fetch_png(bookid, page) filename = f'{bookid}-{page:03d}.png' count = 0 while Path(filename).exists(): count += 1 filename = f'{bookid}-{page:03d}-{count}.png' if count > 4: break else: logger.warning(f' Possibly overwriting {filename}') Path(filename).write_bytes(res) logger.info(f'{filename} saved.') if sys.platform in ['win32']: os.startfile(f'{filename}') # type: ignore # ''' # pylint: disable=pointless-string-statement if __name__ == '__main__': main()
Happy downloading, reading and learning!
搜到网上高人分享的资源,做了点简化。希望对网友有点用。(友好提示:可以
版权声明:
本站所有资源均为站长或网友整理自互联网或站长购买自互联网,站长无法分辨资源版权出自何处,所以不承担任何版权以及其他问题带来的法律责任,如有侵权或者其他问题请联系站长删除!站长QQ754403226 谢谢。