本文来源吾爱破解论坛
本帖最后由 858043016 于 2020-2-20 12:22 编辑
1.获取书名和目录
网页分别是
名称
https://lib-nuanxin.wqxuetang.com/v1/book/initbook?bid=3206295
目录
https://lib-nuanxin.wqxuetang.com/v1/book/catatree?bid=3206295
返回格式是json
2.pymupdf
生成PDF以及添加目录
3.代码
3.1只生成目录,结合FreePic2PDF使用,已经生成utf-16文件了,复制整个文件就可以使用
[Python] 纯文本查看 复制代码
import requests as req import json import os def get_cata(book_id): cata="" url_name = "https://lib-nuanxin.wqxuetang.com/v1/book/catatree?bid={}".format(book_id) response = req.post(url=url_name, headers=headers) book_cata = json.loads(response.text) for i in book_cata['data']: cata=cata+str(i['label'])+'\t'+str(i['pnum'])+'\n' # print(i['level'],i['label'],i['pnum']) if ('children' in i): for j in (i['children']): # print("\t",j['level'],j['label'],j['pnum']) cata=cata+'\t'+str(j['label'])+'\t'+str(j['pnum'])+'\n' return cata with open('Cookies.txt', 'r') as f: cookies=f.read() headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Cookie': cookies, 'Host': 'lib-nuanxin.wqxuetang.com', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'none', 'Sec-Fetch-User': '?1', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36' } book_id=input('输入书的id:\n') with open('FreePic2Pdf_bkmk.txt', 'w',encoding='utf-16') as f: cookies=f.write(get_cata(book_id))
3.2生成PDF和目录
[Python] 纯文本查看 复制代码
import fitz import requests as req import json import os import img2pdf def get_name(book_id): url_name="https://lib-nuanxin.wqxuetang.com/v1/book/initbook?bid={}".format(book_id) response = req.post(url=url_name, headers=headers) book_more=json.loads(response.text) return(book_more['data']['name'],book_more['data']['pages']) def get_cata(book_id): cata=[] url_name = "https://lib-nuanxin.wqxuetang.com/v1/book/catatree?bid={}".format(book_id) response = req.post(url=url_name, headers=headers) book_cata = json.loads(response.text) for i in book_cata['data']: cata.append([int(i['level']), i['label'], int(i['pnum'])]) # print(i['level'],i['label'],i['pnum']) if ('children' in i): for j in (i['children']): # print("\t",j['level'],j['label'],j['pnum']) cata.append([int(j['level']), j['label'], int(j['pnum'])]) return cata def pic2pdf(book_id): name, page_all = get_name(book_id) print("开始进行{}_{},一共{}页".format(book_id,name,page_all)) #生成目录 path_tem = path_raw+"\\temp\\{}_{}_P{}.pdf".format(name, book_id, page_all) path_final = path_raw+"\\{}_{}_P{}.pdf".format(name, book_id, page_all) imgList = os.listdir(path_raw) print("获取了{}页图片".format(len(imgList))) imgList.sort(key=lambda x:int(x[:-4])) img_all=[] for img in imgList: img_path=path_raw+"\\{}".format(img) img_all.append(img_path) pfn_bytes = img2pdf.convert(img_all, with_pdfrw=False); #判断临时目录是否存在 if os.path.exists(path_raw+"\\temp") == False: os.mkdir(path_raw+"\\temp") with open(path_tem, "wb") as f: f.write(pfn_bytes) f.close() print("准备生成目录...") #添加目录 try: doc = fitz.open(path_tem) toc = get_cata(book_id) doc.setToC(toc) doc.save(path_final) # 保存pdf文件 doc.close() print("保存成功{}".format(path_final)) os.remove(path_tem) os.remove(path_raw+"\\temp") except: print("添加目录错误,检查页码是否正确") #获取cookies with open('Cookies.txt', 'r') as f: cookies=f.read() headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Cookie': cookies, 'Host': 'lib-nuanxin.wqxuetang.com', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'none', 'Sec-Fetch-User': '?1', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36' } book_id=input('输入书的id:\n') path_raw=input('输入放着图片的文件夹的全部路径:\n') pic2pdf(book_id)
3.3 20200220更新,不需要cookies了,代码直接复制就能用
[Python] 纯文本查看 复制代码
import json import os import fitz import img2pdf import requests as req def get_name(book_id): url_name = 'https://www.wqxuetang.com/v1/book/initbook?bid={}'.format(book_id) response = req.post(url=url_name, headers=headers) book_more=json.loads(response.text) return(book_more['data']['name'],book_more['data']['pages']) def get_cata(book_id): cata=[] url_name='https://www.wqxuetang.com/v1/book/catatree?bid={}'.format(book_id) response = req.post(url=url_name, headers=headers) book_cata = json.loads(response.text) for i in book_cata['data']: cata.append([int(i['level']), i['label'], int(i['pnum'])]) # print(i['level'],i['label'],i['pnum']) if ('children' in i): for j in (i['children']): # print("\t",j['level'],j['label'],j['pnum']) cata.append([int(j['level']), j['label'], int(j['pnum'])]) return cata headers = { 'Accept': 'application/json,text/plain,*/*', 'Accept - Encoding': 'gzip,deflate,br', 'Accept - Language': 'zh-CN,zh;q = 0.9', 'BA': 'bapkg/com.bookask.wqxuetang,baver/0.0.1', 'Connection': 'keep - alive', 'Cookie':'', 'Host':'www.wqxuetang.com', 'Referer':'https://www.wqxuetang.com/', 'User - Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36' } def pic2pdf(book_id): name, page_all = get_name(book_id) print("开始进行{}_{},一共{}页".format(book_id,name,page_all)) #生成目录 path_tem = path_raw+"\\temp\\{}_{}_P{}.pdf".format(name, book_id, page_all) path_final = path_raw+"\\{}_{}_P{}.pdf".format(name, book_id, page_all) imgList = os.listdir(path_raw) print("获取了{}页图片".format(len(imgList))) imgList.sort(key=lambda x:int(x[:-4])) img_all=[] for img in imgList: img_path=path_raw+"\\{}".format(img) img_all.append(img_path) pfn_bytes = img2pdf.convert(img_all, with_pdfrw=False); #判断临时目录是否存在 if os.path.exists(path_raw+"\\temp") == False: os.mkdir(path_raw+"\\temp") with open(path_tem, "wb") as f: f.write(pfn_bytes) f.close() print("准备生成目录...") #添加目录 try: doc = fitz.open(path_tem) toc = get_cata(book_id) doc.setToC(toc) doc.save(path_final) # 保存pdf文件 doc.close() print("保存成功{}".format(path_final)) os.remove(path_tem) except: print("添加目录错误,检查页码是否正确") flag=input("单独添加目录输入1\n") if flag=='1': print("-进入单独添加目录-") try: book_id = input('输入书的id:\n') path = input("输入文件目录包括pdf名称") doc = fitz.open(path) toc = get_cata(book_id) doc.setToC(toc) doc.save(path) # 保存pdf文件 doc.close() print("添加成功{}".format(path)) except: print("添加目录失败") else: print("-进入PDF合成及目录添加-") book_id = input('输入书的id:\n') path_raw = input('输入放着图片的文件夹的全部路径:\n') pic2pdf(book_id)
4.参考文献
https://zhuanlan.zhihu.com/p/88618967
https://www.jb51.net/article/160622.htm
点个热心吧
5.软件下载地址
https://www.52pojie.cn/thread-1103527-1-1.html
版权声明:
本站所有资源均为站长或网友整理自互联网或站长购买自互联网,站长无法分辨资源版权出自何处,所以不承担任何版权以及其他问题带来的法律责任,如有侵权或者其他问题请联系站长删除!站长QQ754403226 谢谢。
- 上一篇: 日常作品分享------模拟登录豆瓣并根据电影名称发表评论
- 下一篇: python 读写xlsx