@@ -13,38 +13,28 @@ def __init__(self, book_json_path, output_dir="build"):
1313 with open (self .book_json_path , "r" , encoding = "utf-8" ) as f :
1414 self .book_data = json .load (f )
1515
16- # 锚定模板目录到镜像内的绝对路径
1716 template_dir = os .environ .get ("TEMPLATES_DIR" , "/app/templates" )
1817 self .jinja_env = Environment (loader = FileSystemLoader (template_dir ))
1918 self .final_doc = fitz .open ()
2019 self .page_offset = 0
21- self .toc_data = []
20+ self .toc_data = [] # [[lvl, title, page, dest]]
21+ self .skip_decoration_pages = set () # 1-based
22+ self .book_meta = {}
2223
2324 def extract_precise_toc (self , doc , offset ):
24- """
25- 根据 get_toc() 返回的初步目录,在对应页码进行文本定位,获取 y 坐标并偏移。
26- 调整层级以适应整体书籍结构 (Headings 设为 Level 3+)。
27- """
2825 raw_toc = doc .get_toc ()
2926 refined_toc = []
30-
3127 for entry in raw_toc :
32- lvl = entry [0 ]
33- title = entry [1 ]
34- page_1 = entry [2 ]
35-
36- # 原始 PDF 的 H1 (lvl 1) 在合集中应设为 Level 3
28+ lvl , title , page_1 = entry [0 ], entry [1 ], entry [2 ]
3729 new_lvl = lvl + 2
3830 new_page_1 = page_1 + offset
3931 dest = {"kind" : fitz .LINK_GOTO , "page" : new_page_1 - 1 , "to" : fitz .Point (0 , 0 )}
40-
4132 page_0 = page_1 - 1
4233 if 0 <= page_0 < len (doc ):
4334 found_y = None
4435 page_obj = doc [page_0 ]
4536 blocks = page_obj .get_text ("dict" )["blocks" ]
4637 target_title_norm = title .strip ().lower ()
47-
4838 for b in blocks :
4939 if "lines" in b :
5040 for line in b ["lines" ]:
@@ -53,121 +43,153 @@ def extract_precise_toc(self, doc, offset):
5343 found_y = s ["bbox" ][1 ]
5444 break
5545 if found_y is not None : break
46+ if found_y is not None : break
5647 if found_y is not None : break
57-
5848 if found_y is not None :
5949 dest ["to" ] = fitz .Point (0 , found_y )
60-
6150 refined_toc .append ([new_lvl , title , new_page_1 , dest ])
62-
6351 return refined_toc
6452
53+ def add_toc_links (self , toc_page_num ):
54+ if toc_page_num > len (self .final_doc ): return
55+ page = self .final_doc [toc_page_num - 1 ]
56+ blocks = page .get_text ("blocks" )
57+ for ent in self .toc_data :
58+ lvl , title , target_page = ent [0 ], ent [1 ], ent [2 ]
59+ if lvl > 2 : continue
60+ for b in blocks :
61+ if title in b [4 ]:
62+ rect = fitz .Rect (b [:4 ])
63+ page .insert_link ({"kind" : fitz .LINK_GOTO , "page" : target_page - 1 , "from" : rect })
64+ break
65+
66+ def ensure_parity (self , target_parity ):
67+ current_page = self .page_offset + 1
68+ if current_page % 2 != target_parity :
69+ self .final_doc .new_page (width = fitz .paper_size ("a4" )[0 ], height = fitz .paper_size ("a4" )[1 ])
70+ self .page_offset += 1
71+ self .skip_decoration_pages .add (self .page_offset )
72+ return True
73+ return False
74+
75+ def draw_decorations (self , doc , start_page_num , book_title , section_title ):
76+ font_name = "china-ss"
77+ for i in range (len (doc )):
78+ page = doc [i ]
79+ abs_page = start_page_num + i
80+ if abs_page in self .skip_decoration_pages : continue
81+ is_odd = abs_page % 2 != 0
82+ footer_font = "helv"
83+ footer_size = 9
84+ footer_y = page .rect .height - 30
85+ footer_text = f"{ abs_page } "
86+ page .insert_text ((page .rect .width / 2 - 5 , footer_y ), footer_text , fontsize = footer_size , fontname = footer_font , color = (0.4 , 0.4 , 0.4 ))
87+ header_y = 35
88+ line_y = 45
89+ header_size = 9
90+ color = (0.5 , 0.5 , 0.5 )
91+ if is_odd :
92+ text = section_title
93+ tw = fitz .get_text_length (text , fontname = font_name , fontsize = header_size )
94+ page .insert_text ((page .rect .width - tw - 40 , header_y ), text , fontsize = header_size , fontname = font_name , color = color )
95+ else :
96+ text = book_title
97+ page .insert_text ((40 , header_y ), text , fontsize = header_size , fontname = font_name , color = color )
98+ page .draw_line ((40 , line_y ), (page .rect .width - 40 , line_y ), color = (0.8 , 0.8 , 0.8 ), width = 0.4 )
99+
65100 def get_english_filename (self ):
66- """从 nav.json 中查找对应的英文文件名"""
67- nav_path = Path ("D:/Github/blog/whk/config/nav.json" )
68- if nav_path .exists ():
69- try :
70- with open (nav_path , "r" , encoding = "utf-8" ) as f :
71- nav_data = json .load (f )
72- for item in nav_data :
73- if item .get ("title" ) == self .book_data .get ("title" ):
74- return item .get ("export" , {}).get ("filename" , f"{ self .book_data ['title' ]} .pdf" )
75- except Exception as e :
76- print (f"Error reading nav.json: { e } " )
101+ paths = [Path ("D:/Github/blog/whk/config/nav.json" ), Path ("config/nav.json" ), Path ("/app/config/nav.json" ), Path ("../../whk/config/nav.json" )]
102+ for p in paths :
103+ if p .exists ():
104+ try :
105+ with open (p , "r" , encoding = "utf-8" ) as f :
106+ nav_data = json .load (f )
107+ for item in nav_data :
108+ if item .get ("title" ) == self .book_data .get ("title" ):
109+ return item .get ("export" , {}).get ("filename" , f"{ self .book_data ['title' ]} .pdf" )
110+ except Exception : pass
77111 return f"{ self .book_data ['title' ]} .pdf"
78112
79113 def process (self ):
80114 book_title = self .book_data ['title' ]
81- print (f"Processing Book: { book_title } " )
82115 temp_files = []
83-
84- # 1. 插入封面与装饰页
85- decorative_pages = [
86- ("cover" , f"{ book_title } _cover.pdf" , "封面" ),
87- ("frontispiece" , f"{ book_title } _frontispiece.pdf" , "扉页" ),
88- ("toc" , f"{ book_title } _toc.pdf" , "目录" )
89- ]
90-
91- for key , fname , label in decorative_pages :
116+ output_file = self .output_dir / self .get_english_filename ()
117+ decorative_pages = [("cover" , f"{ book_title } _cover.pdf" , "封面" , 1 ), ("frontispiece" , f"{ book_title } _frontispiece.pdf" , "扉页" , 0 ), ("toc" , f"{ book_title } _toc.pdf" , "目录" , 1 )]
118+ toc_page_num = 0
119+ for key , fname , label , target_parity in decorative_pages :
120+ self .ensure_parity (target_parity )
92121 p = self .output_dir / fname
93122 if p .exists ():
94123 doc = fitz .open (p )
124+ p_start = self .page_offset + 1
125+ if key == "toc" : toc_page_num = p_start
126+ self .skip_decoration_pages .add (p_start )
95127 self .final_doc .insert_pdf (doc )
96128 self .page_offset += len (doc )
97- self .toc_data .append ([1 , label , self . page_offset - len ( doc ) + 1 ])
129+ self .toc_data .append ([1 , label , p_start ])
98130 doc .close ()
99131 temp_files .append (p )
100-
101- # 2. 遍历章节
102132 for section in self .book_data ["sections" ]:
103133 sec_title = section ['title' ]
104- print (f" Inserting Section: { sec_title } " )
105-
106- # 章节首页 (Level 1)
134+ self .ensure_parity (1 )
107135 opener_path = self .output_dir / f"opener_{ sec_title } .pdf"
108136 if opener_path .exists ():
109137 opener_doc = fitz .open (opener_path )
138+ p_start = self .page_offset + 1
139+ self .skip_decoration_pages .add (p_start )
110140 self .final_doc .insert_pdf (opener_doc )
111141 self .page_offset += len (opener_doc )
112- self .toc_data .append ([1 , sec_title , self . page_offset - len ( opener_doc ) + 1 ])
142+ self .toc_data .append ([1 , sec_title , p_start ])
113143 opener_doc .close ()
114144 temp_files .append (opener_path )
115145 else :
116146 self .toc_data .append ([1 , sec_title , self .page_offset + 1 ])
117-
118- # 插入内容页 (Level 2)
119147 for sub in section ["sections" ]:
120148 sub_title = sub ['title' ]
149+ self .ensure_parity (0 )
121150 content_path = self .book_json_path .parent / sub ["path" ]
122- if not content_path .exists ():
123- content_path = Path ("site/build" ) / sub ["path" ]
124-
151+ if not content_path .exists (): content_path = Path ("site/build" ) / sub ["path" ]
125152 if content_path .exists ():
126153 doc = fitz .open (content_path )
127- # 记录页面标题作为 Level 2 书签
128- self .toc_data .append ([2 , sub_title , self .page_offset + 1 ])
129-
130- # 提取并偏移章节内的 headings (Level 3+)
131154 chapter_headings = self .extract_precise_toc (doc , self .page_offset )
132- self .toc_data .extend (chapter_headings )
133-
155+ main_title_norm = sub_title .strip ().lower ()
156+ if chapter_headings and chapter_headings [0 ][1 ].strip ().lower () == main_title_norm :
157+ chapter_headings [0 ][0 ] = 2
158+ self .toc_data .extend (chapter_headings )
159+ else :
160+ self .toc_data .append ([2 , sub_title , self .page_offset + 1 ])
161+ self .toc_data .extend (chapter_headings )
162+ self .draw_decorations (doc , self .page_offset + 1 , book_title , sec_title )
134163 self .final_doc .insert_pdf (doc )
135164 self .page_offset += len (doc )
136165 doc .close ()
137166 temp_files .append (content_path )
138- else :
139- print (f" Warning: Content not found at { content_path } " )
140-
141- # 3. 封底
167+ self .ensure_parity (0 )
142168 back_path = self .output_dir / f"{ book_title } _backcover.pdf"
143169 if back_path .exists ():
144170 doc = fitz .open (back_path )
171+ p_start = self .page_offset + 1
172+ self .skip_decoration_pages .add (p_start )
145173 self .final_doc .insert_pdf (doc )
146174 self .page_offset += len (doc )
147- self .toc_data .append ([1 , "封底" , self . page_offset - len ( doc ) + 1 ])
175+ self .toc_data .append ([1 , "封底" , p_start ])
148176 doc .close ()
149177 temp_files .append (back_path )
150-
151- # 4. 设置最终目录并保存
152178 self .final_doc .set_toc (self .toc_data )
153-
154- final_filename = self .get_english_filename ()
155- output_file = self .output_dir / final_filename
179+ if toc_page_num > 0 : self .add_toc_links (toc_page_num )
156180 self .final_doc .save (output_file , deflate = True , garbage = 4 )
157181 self .final_doc .close ()
158-
159182 print (f"Final PDF saved to { output_file } " )
160-
161- # 5. 清理
162- print ("Cleaning up temporary files..." )
183+ resolved_output = output_file .resolve ()
163184 for f in temp_files :
164185 try :
165- if f .exists () and f != output_file :
166- f .unlink ()
167- except : pass
168- # 清理 tex 文件
169- for f in self .output_dir .glob ("*.tex" ): f .unlink ()
170- if (self .output_dir / "tex_tasks.txt" ).exists (): (self .output_dir / "tex_tasks.txt" ).unlink ()
186+ if f .exists () and f .resolve () != resolved_output : f .unlink ()
187+ except Exception : pass
188+ for f in self .output_dir .glob ("*.tex" ):
189+ try : f .unlink ()
190+ except Exception : pass
191+ if (self .output_dir / "tex_tasks.txt" ).exists ():
192+ (self .output_dir / "tex_tasks.txt" ).unlink ()
171193
172194if __name__ == "__main__" :
173195 import argparse
@@ -176,63 +198,48 @@ def process(self):
176198 parser .add_argument ("--plan-only" , action = "store_true" )
177199 parser .add_argument ("--merge" , action = "store_true" )
178200 args = parser .parse_args ()
179-
180201 processor = PDFProcessor (args .book_json )
181202 if args .plan_only :
182- # 仅生成 TeX 模板供后续容器编译
183- print ("Rendering TeX templates..." )
184203 generated_tex_files = []
185204 book_title = processor .book_data .get ('title' , 'Unknown' )
186-
187- common_data = {
188- "title" : book_title ,
189- "subtitle" : processor .book_data .get ("subtitle" , "" ),
190- "authors" : processor .book_data .get ("authors" , []),
191- "info" : processor .book_data .get ("info" , {})
192- }
193-
194- # 1. 封面
195- cover_tex = processor .jinja_env .get_template ("cover.tex.j2" ).render (** common_data )
205+ est_offset = 3
206+ common_data = {"title" : book_title , "subtitle" : processor .book_data .get ("subtitle" , "" ), "authors" : processor .book_data .get ("authors" , []), "info" : processor .book_data .get ("info" , {})}
196207 cover_path = processor .output_dir / f"{ book_title } _cover.tex"
197- with open (cover_path , "w" , encoding = "utf-8" ) as f : f .write (cover_tex )
208+ with open (cover_path , "w" , encoding = "utf-8" ) as f : f .write (processor . jinja_env . get_template ( "cover.tex.j2" ). render ( ** common_data ) )
198209 generated_tex_files .append (str (cover_path ))
199-
200- # 2. 扉页
201- front_tex = processor .jinja_env .get_template ("frontispiece.tex.j2" ).render (** common_data )
202210 front_path = processor .output_dir / f"{ book_title } _frontispiece.tex"
203- with open (front_path , "w" , encoding = "utf-8" ) as f : f .write (front_tex )
211+ with open (front_path , "w" , encoding = "utf-8" ) as f : f .write (processor . jinja_env . get_template ( "frontispiece.tex.j2" ). render ( ** common_data ) )
204212 generated_tex_files .append (str (front_path ))
205-
206- # 3. 目录页 (简版概要)
207213 toc_outline = []
208- for sec in processor .book_data ["sections" ]:
209- toc_outline .append ({"title" : sec ['title' ], "page" : "?" }) # 物理页码在 plan 阶段未知,通常填 ? 或略过
214+ running_page = est_offset + 1
215+ for section in processor .book_data ["sections" ]:
216+ if running_page % 2 == 0 : running_page += 1
217+ entry = {"title" : section ['title' ], "page" : running_page , "children" : []}
218+ running_page += 1
219+ for sub in section ["sections" ]:
220+ if running_page % 2 != 0 : running_page += 1
221+ content_path = processor .book_json_path .parent / sub ["path" ]
222+ if not content_path .exists (): content_path = Path ("site/build" ) / sub ["path" ]
223+ content_page_count = 0
224+ if content_path .exists ():
225+ try :
226+ with fitz .open (content_path ) as doc : content_page_count = len (doc )
227+ except Exception : pass
228+ entry ["children" ].append ({"title" : sub ['title' ], "page" : running_page })
229+ running_page += content_page_count
230+ toc_outline .append (entry )
210231 toc_tex = processor .jinja_env .get_template ("toc.tex.j2" ).render (toc_outline = toc_outline , ** common_data )
211232 toc_path = processor .output_dir / f"{ book_title } _toc.tex"
212233 with open (toc_path , "w" , encoding = "utf-8" ) as f : f .write (toc_tex )
213234 generated_tex_files .append (str (toc_path ))
214-
215- # 4. 章首页
216235 for idx , section in enumerate (processor .book_data ["sections" ], 1 ):
217- opener_tex = processor .jinja_env .get_template ("opener.tex.j2" ).render (
218- chapter_num = idx ,
219- chapter_title = section ["title" ]
220- )
221236 opener_path = processor .output_dir / f"opener_{ section ['title' ]} .tex"
222- with open (opener_path , "w" , encoding = "utf-8" ) as f : f .write (opener_tex )
237+ with open (opener_path , "w" , encoding = "utf-8" ) as f : f .write (processor . jinja_env . get_template ( "opener.tex.j2" ). render ( chapter_num = idx , chapter_title = section [ "title" ]) )
223238 generated_tex_files .append (str (opener_path ))
224-
225- # 5. 封底
226- back_tex = processor .jinja_env .get_template ("backcover.tex.j2" ).render (** common_data )
227239 back_path = processor .output_dir / f"{ book_title } _backcover.tex"
228- with open (back_path , "w" , encoding = "utf-8" ) as f : f .write (back_tex )
240+ with open (back_path , "w" , encoding = "utf-8" ) as f : f .write (processor . jinja_env . get_template ( "backcover.tex.j2" ). render ( ** common_data ) )
229241 generated_tex_files .append (str (back_path ))
230-
231- # 写入任务列表供 CI 循环调用
232242 with open (processor .output_dir / "tex_tasks.txt" , "w" , encoding = "utf-8" ) as f :
233- for tf in generated_tex_files :
234- f .write (f"{ tf } \n " )
235- print (f"Generated { len (generated_tex_files )} TeX files. List saved to { processor .output_dir / 'tex_tasks.txt' } " )
243+ for tf in generated_tex_files : f .write (f"{ tf } \n " )
236244 if args .merge :
237- # 执行最终的 PDF 合体
238245 processor .process ()
0 commit comments