@@ -11,13 +11,14 @@ import os, sys, platform
1111from functools import partial
1212import threading , psutil , tempfile , pyperclip
1313import base64 , json , re , requests
14+ from pypdf import PdfReader , PdfWriter
1415
1516os_name = platform .system () # 获取操作系统类型
1617
1718if os_name == "Windows" : # 在 Windows 操作系统下,导入 Windows 相关库
1819 import win32print , win32gui , win32con , win32api , ctypes , winreg
1920
20- def parse (url : str ) -> tuple [str , str , str ] | tuple [None , None , None ]: # 解析 URL
21+ def parse (url : str ) -> tuple [str , str , str , list ] | tuple [None , None , None , None ]:
2122 try :
2223 content_id , content_type , resource_url = None , None , None
2324
@@ -27,7 +28,7 @@ def parse(url: str) -> tuple[str, str, str] | tuple[None, None, None]: # 解析
2728 content_id = q .split ("=" )[1 ]
2829 break
2930 if not content_id :
30- return None , None , None
31+ return None , None , None , None
3132
3233 for q in url [url .find ("?" ) + 1 :].split ("&" ):
3334 if q .split ("=" )[0 ] == "contentType" :
@@ -66,8 +67,96 @@ def parse(url: str) -> tuple[str, str, str] | tuple[None, None, None]: # 解析
6667 response = session .get (f"https://s-file-1.ykt.cbern.com.cn/zxx/ndrs/special_edu/resources/details/{ content_id } .json" )
6768 else : # 对普通电子课本的解析
6869 response = session .get (f"https://s-file-1.ykt.cbern.com.cn/zxx/ndrv2/resources/tch_material/details/{ content_id } .json" )
69-
70+
7071 data = response .json ()
72+ title = data .get ("title" , "未知教材" )
73+
74+ # 3. 获取章节目录 (核心修改部分)
75+ chapters = data .get ("chapters" , [])
76+
77+ # 如果主接口没目录,尝试通过 ebook_mapping + tree 接口组合获取
78+ if not chapters :
79+ mapping_url = None
80+ for item in data .get ("ti_items" , []):
81+ if item .get ("ti_file_flag" ) == "ebook_mapping" :
82+ mapping_url = item ["ti_storages" ][0 ]
83+ break
84+
85+ if mapping_url :
86+ try :
87+ if not access_token :
88+ mapping_url = re .sub (
89+ r"^https?://(?:.+).ykt.cbern.com.cn/(.+)/([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}).pkg/(.+)$" ,
90+ r"https://c1.ykt.cbern.com.cn/\1/\2.pkg/\3" ,
91+ mapping_url
92+ )
93+
94+ # A. 下载 mapping 文件获取页码和 ebook_id
95+ map_resp = session .get (mapping_url )
96+ map_resp .encoding = 'utf-8'
97+ map_data = map_resp .json ()
98+
99+ ebook_id = map_data .get ("ebook_id" )
100+
101+ # 构建 nodeId 到 pageNumber 的映射字典
102+ # 格式: { "node_id_1": 5, "node_id_2": 10 }
103+ page_map = {}
104+ if "mappings" in map_data :
105+ for m in map_data ["mappings" ]:
106+ page_map [m ["node_id" ]] = m .get ("page_number" , 1 )
107+
108+ # B. 如果有 ebook_id,去下载完整的目录树 (Tree API)
109+ if ebook_id :
110+ tree_url = f"https://s-file-1.ykt.cbern.com.cn/zxx/ndrv2/national_lesson/trees/{ ebook_id } .json"
111+ tree_resp = session .get (tree_url )
112+
113+ if tree_resp .status_code == 200 :
114+ tree_data = tree_resp .json ()
115+
116+ # 递归函数:合并 Tree的标题 和 Mapping的页码
117+ def process_tree_nodes (nodes ):
118+ result = []
119+ for node in nodes :
120+ # 从 page_map 中找页码,找不到为none
121+ page_num = page_map .get (node ["id" ], None )
122+
123+ chapter_item = {
124+ "title" : node ["title" ],
125+ "page_index" : page_num
126+ }
127+
128+ # 如果有子节点,递归处理
129+ if node .get ("child_nodes" ):
130+ chapter_item ["children" ] = process_tree_nodes (node ["child_nodes" ])
131+
132+ result .append (chapter_item )
133+ return result
134+
135+ # 开始解析
136+ if isinstance (tree_data , list ):
137+ chapters = process_tree_nodes (tree_data )
138+ elif isinstance (tree_data , dict ) and "child_nodes" in tree_data :
139+ chapters = process_tree_nodes (tree_data ["child_nodes" ])
140+
141+ # print(f"成功获取完整目录: {len(chapters)} 个顶级章节")
142+
143+ # C. 兜底方案:如果获取 Tree 失败,仅使用 mapping 生成纯页码索引
144+ if not chapters and "mappings" in map_data :
145+ temp_chapters = []
146+ mappings = map_data ["mappings" ]
147+ mappings .sort (key = lambda x : x ["page_number" ])
148+ for i , m in enumerate (mappings ):
149+ temp_chapters .append ({
150+ "title" : f"第 { i + 1 } 节 (P{ m ['page_number' ]} )" ,
151+ "page_index" : m ['page_number' ]
152+ })
153+ chapters = temp_chapters
154+
155+ except Exception as e :
156+ print (f"目录解析异常: { e } " )
157+
158+ # 4. 获取 PDF 下载链接 (保持不变)
159+
71160 for item in list (data ["ti_items" ]):
72161 if item ["lc_ti_format" ] == "pdf" : # 寻找存有 PDF 链接列表的项
73162 resource_url : str = item ["ti_storages" ][0 ] # 获取并构造 PDF 的 URL
@@ -88,15 +177,64 @@ def parse(url: str) -> tuple[str, str, str] | tuple[None, None, None]: # 解析
88177 resource_url = re .sub (r"^https?://(?:.+).ykt.cbern.com.cn/(.+)/([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}).pkg/(.+)\.pdf$" , r"https://c1.ykt.cbern.com.cn/\1/\2.pkg/\3.pdf" , resource_url )
89178 break
90179 if not resource_url :
91- return None , None , None
180+ return None , None , None , None
92181 else :
93- return None , None , None
94-
95- return resource_url , content_id , data ["title" ]
96- except Exception : # 解析失败时返回 None
97- return None , None , None
182+ return None , None , None , None
183+
184+ return resource_url , content_id , title , chapters
185+ except Exception :
186+ return None , None , None , None
187+
188+ def add_bookmarks (pdf_path : str , chapters : list ) -> None :
189+ """给 PDF 添加书签"""
190+ try :
191+ if not chapters :
192+ return
193+ reader = PdfReader (pdf_path )
194+ writer = PdfWriter ()
195+ writer .append_pages_from_reader (reader )
196+
197+ # 递归添加书签的内部函数
198+ def _add_chapter (chapter_list , parent = None ):
199+ for chapter in chapter_list :
200+ title = chapter .get ("title" , "未知章节" )
201+ # 1. 获取原始值
202+ p_index = chapter .get ("page_index" )
203+ # print(f"处理章节“{title}”,页码索引:{p_index}")
204+ # 2. 如果值为 None (JSON里的null) 或者不存在,跳过这个书签(因为未使用)
205+ if p_index is None :
206+ sys .stderr .write (f"[!!]跳过章节“{ title } ”的书签,原因:未指定页码\n " )
207+ continue
208+ # 3. 尝试将其转为整数并减 1 (pypdf 页码从 0 开始)
209+ try :
210+ page_num = int (p_index ) - 1
211+ except (ValueError , TypeError ):
212+ page_num = 0 # 如果转换失败,默认指向第1页
213+ # page_num = chapter.get("page_index", 1) - 1
214+ if page_num < 0 : page_num = 0
215+
216+ if page_num >= len (writer .pages ):
217+ page_num = len (writer .pages ) - 1
218+
219+ # 添加书签
220+ # parent 是父级书签对象,用于处理多级目录
221+ bookmark = writer .add_outline_item (title , page_num , parent = parent )
222+
223+ # 如果有子章节(children),递归添加
224+ if "children" in chapter and chapter ["children" ]:
225+ _add_chapter (chapter ["children" ], parent = bookmark )
226+
227+ # 开始处理章节数据
228+ _add_chapter (chapters )
229+
230+ # 保存修改后的文件
231+ with open (pdf_path , "wb" ) as f :
232+ writer .write (f )
233+
234+ except Exception as e :
235+ sys .stderr .write (f"添加书签失败: { e } \n " )
98236
99- def download_file (url : str , save_path : str ) -> None : # 下载文件
237+ def download_file (url : str , save_path : str , chapters : list = None ) -> None : # 下载文件
100238 global download_states
101239 current_state = { "download_url" : url , "save_path" : save_path , "downloaded_size" : 0 , "total_size" : 0 , "finished" : False , "failed_reason" : None }
102240 download_states .append (current_state )
@@ -123,7 +261,9 @@ def download_file(url: str, save_path: str) -> None: # 下载文件
123261 download_progress = (all_downloaded_size / all_total_size ) * 100
124262 download_progress_bar ["value" ] = download_progress # 更新进度条
125263 progress_label .config (text = f"{ format_bytes (all_downloaded_size )} /{ format_bytes (all_total_size )} ({ download_progress :.2f} %) 已下载 { downloaded_number } /{ total_number } " ) # 更新标签以显示当前下载进度
126-
264+ if chapters :
265+ progress_label .config (text = f"添加书签" )
266+ add_bookmarks (save_path , chapters )
127267 current_state ["downloaded_size" ] = current_state ["total_size" ]
128268 current_state ["finished" ] = True
129269
@@ -188,7 +328,8 @@ def download() -> None: # 下载资源文件
188328 dir_path = None
189329
190330 for url in urls :
191- resource_url , content_id , title = parse (url )
331+ # resource_url, content_id, title = parse(url)
332+ resource_url , content_id , title , chapters = parse (url )
192333 if not resource_url :
193334 failed_links .append (url ) # 添加到失败链接
194335 continue
@@ -205,7 +346,8 @@ def download() -> None: # 下载资源文件
205346 if os_name == "Windows" :
206347 save_path = save_path .replace ("/" , "\\ " )
207348
208- thread_it (download_file , (resource_url , save_path )) # 开始下载(多线程,防止窗口卡死)
349+ # thread_it(download_file, (resource_url, save_path)) # 开始下载(多线程,防止窗口卡死)
350+ thread_it (download_file , (resource_url , save_path , chapters )) # 开始下载(多线程,防止窗口卡死)
209351
210352 if failed_links :
211353 messagebox .showwarning ("警告" , "以下 “行” 无法解析:\n " + "\n " .join (failed_links )) # 显示警告对话框
0 commit comments