@@ -23,27 +23,25 @@ def __init__(self, book_json_path, output_dir="build"):
2323 def extract_precise_toc (self , doc , offset ):
2424 """
2525 根据 get_toc() 返回的初步目录,在对应页码进行文本定位,获取 y 坐标并偏移。
26+ 调整层级以适应整体书籍结构 (Headings 设为 Level 3+)。
2627 """
27- # PyMuPDF get_toc() 可能返回 3 或 4 个元素的列表: [lvl, title, page, (dest_dict)]
2828 raw_toc = doc .get_toc ()
2929 refined_toc = []
3030
3131 for entry in raw_toc :
3232 lvl = entry [0 ]
3333 title = entry [1 ]
34- page_1 = entry [2 ] # 1st-based page in current doc
34+ page_1 = entry [2 ]
3535
36- # 默认目标 (整页跳转)
37- # PyMuPDF set_toc 期待 dest 为字典,或者 None (默认跳转到页顶)
36+ # 原始 PDF 的 H1 (lvl 1) 在合集中应设为 Level 3
37+ new_lvl = lvl + 2
3838 new_page_1 = page_1 + offset
3939 dest = {"kind" : fitz .LINK_GOTO , "page" : new_page_1 - 1 , "to" : fitz .Point (0 , 0 )}
4040
41- # 尝试在特定页面查找标题以获取精确 Y 坐标
4241 page_0 = page_1 - 1
4342 if 0 <= page_0 < len (doc ):
4443 found_y = None
4544 page_obj = doc [page_0 ]
46- # get_text("dict") 包含了文本块的边界框
4745 blocks = page_obj .get_text ("dict" )["blocks" ]
4846 target_title_norm = title .strip ().lower ()
4947
@@ -52,75 +50,124 @@ def extract_precise_toc(self, doc, offset):
5250 for line in b ["lines" ]:
5351 for s in line ["spans" ]:
5452 if s ["text" ].strip ().lower () == target_title_norm :
55- found_y = s ["bbox" ][1 ] # y0 (top coordinate)
53+ found_y = s ["bbox" ][1 ]
5654 break
5755 if found_y is not None : break
5856 if found_y is not None : break
5957
6058 if found_y is not None :
6159 dest ["to" ] = fitz .Point (0 , found_y )
62- else :
63- print (f" Note: Could not find precise position for '{ title } ' on page { page_1 } , using page top." )
6460
65- refined_toc .append ([lvl , title , new_page_1 , dest ])
61+ refined_toc .append ([new_lvl , title , new_page_1 , dest ])
6662
6763 return refined_toc
6864
65+ def get_english_filename (self ):
66+ """从 nav.json 中查找对应的英文文件名"""
67+ nav_path = Path ("D:/Github/blog/whk/config/nav.json" )
68+ if nav_path .exists ():
69+ try :
70+ with open (nav_path , "r" , encoding = "utf-8" ) as f :
71+ nav_data = json .load (f )
72+ for item in nav_data :
73+ if item .get ("title" ) == self .book_data .get ("title" ):
74+ return item .get ("export" , {}).get ("filename" , f"{ self .book_data ['title' ]} .pdf" )
75+ except Exception as e :
76+ print (f"Error reading nav.json: { e } " )
77+ return f"{ self .book_data ['title' ]} .pdf"
78+
6979 def process (self ):
70- print (f"Processing Book: { self .book_data ['title' ]} " )
80+ book_title = self .book_data ['title' ]
81+ print (f"Processing Book: { book_title } " )
82+ temp_files = []
7183
72- # 1. 准备装饰页 (TeX)
73- # TODO: 渲染模板并调用 xelatex (此处假设已有编译好的 PDF 或通过外部步骤完成)
84+ # 1. 插入封面与装饰页
85+ decorative_pages = [
86+ ("cover" , f"{ book_title } _cover.pdf" , "封面" ),
87+ ("frontispiece" , f"{ book_title } _frontispiece.pdf" , "扉页" ),
88+ ("toc" , f"{ book_title } _toc.pdf" , "目录" )
89+ ]
7490
75- # 2. 合成逻辑
76- # 插入封面 (假设名称为 cover.pdf)
77- cover_path = self .output_dir / f"{ self .book_data ['title' ]} _cover.pdf"
78- if cover_path .exists ():
79- cover_doc = fitz .open (cover_path )
80- self .final_doc .insert_pdf (cover_doc )
81- self .page_offset += len (cover_doc )
82- cover_doc .close ()
91+ for key , fname , label in decorative_pages :
92+ p = self .output_dir / fname
93+ if p .exists ():
94+ doc = fitz .open (p )
95+ self .final_doc .insert_pdf (doc )
96+ self .page_offset += len (doc )
97+ self .toc_data .append ([1 , label , self .page_offset - len (doc ) + 1 ])
98+ doc .close ()
99+ temp_files .append (p )
83100
84- # 遍历章节
101+ # 2. 遍历章节
85102 for section in self .book_data ["sections" ]:
86- print (f" Inserting Section: { section ['title' ]} " )
103+ sec_title = section ['title' ]
104+ print (f" Inserting Section: { sec_title } " )
87105
88- # 插入章首页 (TeX 产物 )
89- opener_path = self .output_dir / f"opener_{ section [ 'title' ] } .pdf"
106+ # 章节首页 (Level 1 )
107+ opener_path = self .output_dir / f"opener_{ sec_title } .pdf"
90108 if opener_path .exists ():
91109 opener_doc = fitz .open (opener_path )
92110 self .final_doc .insert_pdf (opener_doc )
93111 self .page_offset += len (opener_doc )
112+ self .toc_data .append ([1 , sec_title , self .page_offset - len (opener_doc ) + 1 ])
94113 opener_doc .close ()
95- self .toc_data .append ([1 , section ['title' ], self .page_offset ]) # 章级目录
114+ temp_files .append (opener_path )
115+ else :
116+ self .toc_data .append ([1 , sec_title , self .page_offset + 1 ])
96117
97- # 插入内容页
118+ # 插入内容页 (Level 2)
98119 for sub in section ["sections" ]:
99- # 尝试从 JSON 所在目录查找,或者使用绝对 site/build 路径
120+ sub_title = sub [ 'title' ]
100121 content_path = self .book_json_path .parent / sub ["path" ]
101122 if not content_path .exists ():
102123 content_path = Path ("site/build" ) / sub ["path" ]
103124
104125 if content_path .exists ():
105126 doc = fitz .open (content_path )
106- # 提取并偏移章节内的书签
107- chapter_toc = self .extract_precise_toc (doc , self .page_offset )
108- self .toc_data .extend (chapter_toc )
127+ # 记录页面标题作为 Level 2 书签
128+ self .toc_data .append ([2 , sub_title , self .page_offset + 1 ])
129+
130+ # 提取并偏移章节内的 headings (Level 3+)
131+ chapter_headings = self .extract_precise_toc (doc , self .page_offset )
132+ self .toc_data .extend (chapter_headings )
109133
110134 self .final_doc .insert_pdf (doc )
111135 self .page_offset += len (doc )
112136 doc .close ()
137+ temp_files .append (content_path )
113138 else :
114139 print (f" Warning: Content not found at { content_path } " )
115140
116- # 3. 设置最终目录
141+ # 3. 封底
142+ back_path = self .output_dir / f"{ book_title } _backcover.pdf"
143+ if back_path .exists ():
144+ doc = fitz .open (back_path )
145+ self .final_doc .insert_pdf (doc )
146+ self .page_offset += len (doc )
147+ self .toc_data .append ([1 , "封底" , self .page_offset - len (doc ) + 1 ])
148+ doc .close ()
149+ temp_files .append (back_path )
150+
151+ # 4. 设置最终目录并保存
117152 self .final_doc .set_toc (self .toc_data )
118153
119- # 4. 保存
120- output_file = self .output_dir / f" { self . book_data [ 'title' ] } .pdf"
154+ final_filename = self . get_english_filename ()
155+ output_file = self .output_dir / final_filename
121156 self .final_doc .save (output_file , deflate = True , garbage = 4 )
122157 self .final_doc .close ()
158+
123159 print (f"Final PDF saved to { output_file } " )
160+
161+ # 5. 清理
162+ print ("Cleaning up temporary files..." )
163+ for f in temp_files :
164+ try :
165+ if f .exists () and f != output_file :
166+ f .unlink ()
167+ except : pass
168+ # 清理 tex 文件
169+ for f in self .output_dir .glob ("*.tex" ): f .unlink ()
170+ if (self .output_dir / "tex_tasks.txt" ).exists (): (self .output_dir / "tex_tasks.txt" ).unlink ()
124171
125172if __name__ == "__main__" :
126173 import argparse
@@ -135,31 +182,52 @@ def process(self):
135182 # 仅生成 TeX 模板供后续容器编译
136183 print ("Rendering TeX templates..." )
137184 generated_tex_files = []
185+ book_title = processor .book_data .get ('title' , 'Unknown' )
138186
139- # 渲染封面
140- cover_filename = f"{ processor .book_data .get ('title' , 'Unknown' )} _cover.tex"
141- cover_tex = processor .jinja_env .get_template ("cover.tex.j2" ).render (
142- title = processor .book_data .get ("title" , "Unknown" ),
143- subtitle = processor .book_data .get ("subtitle" , "" ),
144- authors = processor .book_data .get ("authors" , [])
145- )
146- cover_path = processor .output_dir / cover_filename
147- with open (cover_path , "w" , encoding = "utf-8" ) as f :
148- f .write (cover_tex )
187+ common_data = {
188+ "title" : book_title ,
189+ "subtitle" : processor .book_data .get ("subtitle" , "" ),
190+ "authors" : processor .book_data .get ("authors" , []),
191+ "info" : processor .book_data .get ("info" , {})
192+ }
193+
194+ # 1. 封面
195+ cover_tex = processor .jinja_env .get_template ("cover.tex.j2" ).render (** common_data )
196+ cover_path = processor .output_dir / f"{ book_title } _cover.tex"
197+ with open (cover_path , "w" , encoding = "utf-8" ) as f : f .write (cover_tex )
149198 generated_tex_files .append (str (cover_path ))
150199
151- # 渲染章首页
200+ # 2. 扉页
201+ front_tex = processor .jinja_env .get_template ("frontispiece.tex.j2" ).render (** common_data )
202+ front_path = processor .output_dir / f"{ book_title } _frontispiece.tex"
203+ with open (front_path , "w" , encoding = "utf-8" ) as f : f .write (front_tex )
204+ generated_tex_files .append (str (front_path ))
205+
206+ # 3. 目录页 (简版概要)
207+ toc_outline = []
208+ for sec in processor .book_data ["sections" ]:
209+ toc_outline .append ({"title" : sec ['title' ], "page" : "?" }) # 物理页码在 plan 阶段未知,通常填 ? 或略过
210+ toc_tex = processor .jinja_env .get_template ("toc.tex.j2" ).render (toc_outline = toc_outline , ** common_data )
211+ toc_path = processor .output_dir / f"{ book_title } _toc.tex"
212+ with open (toc_path , "w" , encoding = "utf-8" ) as f : f .write (toc_tex )
213+ generated_tex_files .append (str (toc_path ))
214+
215+ # 4. 章首页
152216 for idx , section in enumerate (processor .book_data ["sections" ], 1 ):
153- opener_filename = f"opener_{ section ['title' ]} .tex"
154217 opener_tex = processor .jinja_env .get_template ("opener.tex.j2" ).render (
155218 chapter_num = idx ,
156219 chapter_title = section ["title" ]
157220 )
158- opener_path = processor .output_dir / opener_filename
159- with open (opener_path , "w" , encoding = "utf-8" ) as f :
160- f .write (opener_tex )
221+ opener_path = processor .output_dir / f"opener_{ section ['title' ]} .tex"
222+ with open (opener_path , "w" , encoding = "utf-8" ) as f : f .write (opener_tex )
161223 generated_tex_files .append (str (opener_path ))
162224
225+ # 5. 封底
226+ back_tex = processor .jinja_env .get_template ("backcover.tex.j2" ).render (** common_data )
227+ back_path = processor .output_dir / f"{ book_title } _backcover.tex"
228+ with open (back_path , "w" , encoding = "utf-8" ) as f : f .write (back_tex )
229+ generated_tex_files .append (str (back_path ))
230+
163231 # 写入任务列表供 CI 循环调用
164232 with open (processor .output_dir / "tex_tasks.txt" , "w" , encoding = "utf-8" ) as f :
165233 for tf in generated_tex_files :
0 commit comments