Skip to content

Commit 95a690e

Browse files
committed
Update
1 parent 5b2708e commit 95a690e

File tree

4 files changed

+178
-159
lines changed

4 files changed

+178
-159
lines changed

images/exporter-build/scripts/processor.py

Lines changed: 122 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -13,38 +13,28 @@ def __init__(self, book_json_path, output_dir="build"):
1313
with open(self.book_json_path, "r", encoding="utf-8") as f:
1414
self.book_data = json.load(f)
1515

16-
# 锚定模板目录到镜像内的绝对路径
1716
template_dir = os.environ.get("TEMPLATES_DIR", "/app/templates")
1817
self.jinja_env = Environment(loader=FileSystemLoader(template_dir))
1918
self.final_doc = fitz.open()
2019
self.page_offset = 0
21-
self.toc_data = []
20+
self.toc_data = [] # [[lvl, title, page, dest]]
21+
self.skip_decoration_pages = set() # 1-based
22+
self.book_meta = {}
2223

2324
def extract_precise_toc(self, doc, offset):
24-
"""
25-
根据 get_toc() 返回的初步目录,在对应页码进行文本定位,获取 y 坐标并偏移。
26-
调整层级以适应整体书籍结构 (Headings 设为 Level 3+)。
27-
"""
2825
raw_toc = doc.get_toc()
2926
refined_toc = []
30-
3127
for entry in raw_toc:
32-
lvl = entry[0]
33-
title = entry[1]
34-
page_1 = entry[2]
35-
36-
# 原始 PDF 的 H1 (lvl 1) 在合集中应设为 Level 3
28+
lvl, title, page_1 = entry[0], entry[1], entry[2]
3729
new_lvl = lvl + 2
3830
new_page_1 = page_1 + offset
3931
dest = {"kind": fitz.LINK_GOTO, "page": new_page_1 - 1, "to": fitz.Point(0, 0)}
40-
4132
page_0 = page_1 - 1
4233
if 0 <= page_0 < len(doc):
4334
found_y = None
4435
page_obj = doc[page_0]
4536
blocks = page_obj.get_text("dict")["blocks"]
4637
target_title_norm = title.strip().lower()
47-
4838
for b in blocks:
4939
if "lines" in b:
5040
for line in b["lines"]:
@@ -53,121 +43,153 @@ def extract_precise_toc(self, doc, offset):
5343
found_y = s["bbox"][1]
5444
break
5545
if found_y is not None: break
46+
if found_y is not None: break
5647
if found_y is not None: break
57-
5848
if found_y is not None:
5949
dest["to"] = fitz.Point(0, found_y)
60-
6150
refined_toc.append([new_lvl, title, new_page_1, dest])
62-
6351
return refined_toc
6452

53+
def add_toc_links(self, toc_page_num):
54+
if toc_page_num > len(self.final_doc): return
55+
page = self.final_doc[toc_page_num - 1]
56+
blocks = page.get_text("blocks")
57+
for ent in self.toc_data:
58+
lvl, title, target_page = ent[0], ent[1], ent[2]
59+
if lvl > 2: continue
60+
for b in blocks:
61+
if title in b[4]:
62+
rect = fitz.Rect(b[:4])
63+
page.insert_link({"kind": fitz.LINK_GOTO, "page": target_page - 1, "from": rect})
64+
break
65+
66+
def ensure_parity(self, target_parity):
67+
current_page = self.page_offset + 1
68+
if current_page % 2 != target_parity:
69+
self.final_doc.new_page(width=fitz.paper_size("a4")[0], height=fitz.paper_size("a4")[1])
70+
self.page_offset += 1
71+
self.skip_decoration_pages.add(self.page_offset)
72+
return True
73+
return False
74+
75+
def draw_decorations(self, doc, start_page_num, book_title, section_title):
76+
font_name = "china-ss"
77+
for i in range(len(doc)):
78+
page = doc[i]
79+
abs_page = start_page_num + i
80+
if abs_page in self.skip_decoration_pages: continue
81+
is_odd = abs_page % 2 != 0
82+
footer_font = "helv"
83+
footer_size = 9
84+
footer_y = page.rect.height - 30
85+
footer_text = f"{abs_page}"
86+
page.insert_text((page.rect.width / 2 - 5, footer_y), footer_text, fontsize=footer_size, fontname=footer_font, color=(0.4, 0.4, 0.4))
87+
header_y = 35
88+
line_y = 45
89+
header_size = 9
90+
color = (0.5, 0.5, 0.5)
91+
if is_odd:
92+
text = section_title
93+
tw = fitz.get_text_length(text, fontname=font_name, fontsize=header_size)
94+
page.insert_text((page.rect.width - tw - 40, header_y), text, fontsize=header_size, fontname=font_name, color=color)
95+
else:
96+
text = book_title
97+
page.insert_text((40, header_y), text, fontsize=header_size, fontname=font_name, color=color)
98+
page.draw_line((40, line_y), (page.rect.width - 40, line_y), color=(0.8, 0.8, 0.8), width=0.4)
99+
65100
def get_english_filename(self):
66-
"""从 nav.json 中查找对应的英文文件名"""
67-
nav_path = Path("D:/Github/blog/whk/config/nav.json")
68-
if nav_path.exists():
69-
try:
70-
with open(nav_path, "r", encoding="utf-8") as f:
71-
nav_data = json.load(f)
72-
for item in nav_data:
73-
if item.get("title") == self.book_data.get("title"):
74-
return item.get("export", {}).get("filename", f"{self.book_data['title']}.pdf")
75-
except Exception as e:
76-
print(f"Error reading nav.json: {e}")
101+
paths = [Path("D:/Github/blog/whk/config/nav.json"), Path("config/nav.json"), Path("/app/config/nav.json"), Path("../../whk/config/nav.json")]
102+
for p in paths:
103+
if p.exists():
104+
try:
105+
with open(p, "r", encoding="utf-8") as f:
106+
nav_data = json.load(f)
107+
for item in nav_data:
108+
if item.get("title") == self.book_data.get("title"):
109+
return item.get("export", {}).get("filename", f"{self.book_data['title']}.pdf")
110+
except Exception: pass
77111
return f"{self.book_data['title']}.pdf"
78112

79113
def process(self):
80114
book_title = self.book_data['title']
81-
print(f"Processing Book: {book_title}")
82115
temp_files = []
83-
84-
# 1. 插入封面与装饰页
85-
decorative_pages = [
86-
("cover", f"{book_title}_cover.pdf", "封面"),
87-
("frontispiece", f"{book_title}_frontispiece.pdf", "扉页"),
88-
("toc", f"{book_title}_toc.pdf", "目录")
89-
]
90-
91-
for key, fname, label in decorative_pages:
116+
output_file = self.output_dir / self.get_english_filename()
117+
decorative_pages = [("cover", f"{book_title}_cover.pdf", "封面", 1), ("frontispiece", f"{book_title}_frontispiece.pdf", "扉页", 0), ("toc", f"{book_title}_toc.pdf", "目录", 1)]
118+
toc_page_num = 0
119+
for key, fname, label, target_parity in decorative_pages:
120+
self.ensure_parity(target_parity)
92121
p = self.output_dir / fname
93122
if p.exists():
94123
doc = fitz.open(p)
124+
p_start = self.page_offset + 1
125+
if key == "toc": toc_page_num = p_start
126+
self.skip_decoration_pages.add(p_start)
95127
self.final_doc.insert_pdf(doc)
96128
self.page_offset += len(doc)
97-
self.toc_data.append([1, label, self.page_offset - len(doc) + 1])
129+
self.toc_data.append([1, label, p_start])
98130
doc.close()
99131
temp_files.append(p)
100-
101-
# 2. 遍历章节
102132
for section in self.book_data["sections"]:
103133
sec_title = section['title']
104-
print(f" Inserting Section: {sec_title}")
105-
106-
# 章节首页 (Level 1)
134+
self.ensure_parity(1)
107135
opener_path = self.output_dir / f"opener_{sec_title}.pdf"
108136
if opener_path.exists():
109137
opener_doc = fitz.open(opener_path)
138+
p_start = self.page_offset + 1
139+
self.skip_decoration_pages.add(p_start)
110140
self.final_doc.insert_pdf(opener_doc)
111141
self.page_offset += len(opener_doc)
112-
self.toc_data.append([1, sec_title, self.page_offset - len(opener_doc) + 1])
142+
self.toc_data.append([1, sec_title, p_start])
113143
opener_doc.close()
114144
temp_files.append(opener_path)
115145
else:
116146
self.toc_data.append([1, sec_title, self.page_offset + 1])
117-
118-
# 插入内容页 (Level 2)
119147
for sub in section["sections"]:
120148
sub_title = sub['title']
149+
self.ensure_parity(0)
121150
content_path = self.book_json_path.parent / sub["path"]
122-
if not content_path.exists():
123-
content_path = Path("site/build") / sub["path"]
124-
151+
if not content_path.exists(): content_path = Path("site/build") / sub["path"]
125152
if content_path.exists():
126153
doc = fitz.open(content_path)
127-
# 记录页面标题作为 Level 2 书签
128-
self.toc_data.append([2, sub_title, self.page_offset + 1])
129-
130-
# 提取并偏移章节内的 headings (Level 3+)
131154
chapter_headings = self.extract_precise_toc(doc, self.page_offset)
132-
self.toc_data.extend(chapter_headings)
133-
155+
main_title_norm = sub_title.strip().lower()
156+
if chapter_headings and chapter_headings[0][1].strip().lower() == main_title_norm:
157+
chapter_headings[0][0] = 2
158+
self.toc_data.extend(chapter_headings)
159+
else:
160+
self.toc_data.append([2, sub_title, self.page_offset + 1])
161+
self.toc_data.extend(chapter_headings)
162+
self.draw_decorations(doc, self.page_offset + 1, book_title, sec_title)
134163
self.final_doc.insert_pdf(doc)
135164
self.page_offset += len(doc)
136165
doc.close()
137166
temp_files.append(content_path)
138-
else:
139-
print(f" Warning: Content not found at {content_path}")
140-
141-
# 3. 封底
167+
self.ensure_parity(0)
142168
back_path = self.output_dir / f"{book_title}_backcover.pdf"
143169
if back_path.exists():
144170
doc = fitz.open(back_path)
171+
p_start = self.page_offset + 1
172+
self.skip_decoration_pages.add(p_start)
145173
self.final_doc.insert_pdf(doc)
146174
self.page_offset += len(doc)
147-
self.toc_data.append([1, "封底", self.page_offset - len(doc) + 1])
175+
self.toc_data.append([1, "封底", p_start])
148176
doc.close()
149177
temp_files.append(back_path)
150-
151-
# 4. 设置最终目录并保存
152178
self.final_doc.set_toc(self.toc_data)
153-
154-
final_filename = self.get_english_filename()
155-
output_file = self.output_dir / final_filename
179+
if toc_page_num > 0: self.add_toc_links(toc_page_num)
156180
self.final_doc.save(output_file, deflate=True, garbage=4)
157181
self.final_doc.close()
158-
159182
print(f"Final PDF saved to {output_file}")
160-
161-
# 5. 清理
162-
print("Cleaning up temporary files...")
183+
resolved_output = output_file.resolve()
163184
for f in temp_files:
164185
try:
165-
if f.exists() and f != output_file:
166-
f.unlink()
167-
except: pass
168-
# 清理 tex 文件
169-
for f in self.output_dir.glob("*.tex"): f.unlink()
170-
if (self.output_dir / "tex_tasks.txt").exists(): (self.output_dir / "tex_tasks.txt").unlink()
186+
if f.exists() and f.resolve() != resolved_output: f.unlink()
187+
except Exception: pass
188+
for f in self.output_dir.glob("*.tex"):
189+
try: f.unlink()
190+
except Exception: pass
191+
if (self.output_dir / "tex_tasks.txt").exists():
192+
(self.output_dir / "tex_tasks.txt").unlink()
171193

172194
if __name__ == "__main__":
173195
import argparse
@@ -176,63 +198,48 @@ def process(self):
176198
parser.add_argument("--plan-only", action="store_true")
177199
parser.add_argument("--merge", action="store_true")
178200
args = parser.parse_args()
179-
180201
processor = PDFProcessor(args.book_json)
181202
if args.plan_only:
182-
# 仅生成 TeX 模板供后续容器编译
183-
print("Rendering TeX templates...")
184203
generated_tex_files = []
185204
book_title = processor.book_data.get('title', 'Unknown')
186-
187-
common_data = {
188-
"title": book_title,
189-
"subtitle": processor.book_data.get("subtitle", ""),
190-
"authors": processor.book_data.get("authors", []),
191-
"info": processor.book_data.get("info", {})
192-
}
193-
194-
# 1. 封面
195-
cover_tex = processor.jinja_env.get_template("cover.tex.j2").render(**common_data)
205+
est_offset = 3
206+
common_data = {"title": book_title, "subtitle": processor.book_data.get("subtitle", ""), "authors": processor.book_data.get("authors", []), "info": processor.book_data.get("info", {})}
196207
cover_path = processor.output_dir / f"{book_title}_cover.tex"
197-
with open(cover_path, "w", encoding="utf-8") as f: f.write(cover_tex)
208+
with open(cover_path, "w", encoding="utf-8") as f: f.write(processor.jinja_env.get_template("cover.tex.j2").render(**common_data))
198209
generated_tex_files.append(str(cover_path))
199-
200-
# 2. 扉页
201-
front_tex = processor.jinja_env.get_template("frontispiece.tex.j2").render(**common_data)
202210
front_path = processor.output_dir / f"{book_title}_frontispiece.tex"
203-
with open(front_path, "w", encoding="utf-8") as f: f.write(front_tex)
211+
with open(front_path, "w", encoding="utf-8") as f: f.write(processor.jinja_env.get_template("frontispiece.tex.j2").render(**common_data))
204212
generated_tex_files.append(str(front_path))
205-
206-
# 3. 目录页 (简版概要)
207213
toc_outline = []
208-
for sec in processor.book_data["sections"]:
209-
toc_outline.append({"title": sec['title'], "page": "?"}) # 物理页码在 plan 阶段未知,通常填 ? 或略过
214+
running_page = est_offset + 1
215+
for section in processor.book_data["sections"]:
216+
if running_page % 2 == 0: running_page += 1
217+
entry = {"title": section['title'], "page": running_page, "children": []}
218+
running_page += 1
219+
for sub in section["sections"]:
220+
if running_page % 2 != 0: running_page += 1
221+
content_path = processor.book_json_path.parent / sub["path"]
222+
if not content_path.exists(): content_path = Path("site/build") / sub["path"]
223+
content_page_count = 0
224+
if content_path.exists():
225+
try:
226+
with fitz.open(content_path) as doc: content_page_count = len(doc)
227+
except Exception: pass
228+
entry["children"].append({"title": sub['title'], "page": running_page})
229+
running_page += content_page_count
230+
toc_outline.append(entry)
210231
toc_tex = processor.jinja_env.get_template("toc.tex.j2").render(toc_outline=toc_outline, **common_data)
211232
toc_path = processor.output_dir / f"{book_title}_toc.tex"
212233
with open(toc_path, "w", encoding="utf-8") as f: f.write(toc_tex)
213234
generated_tex_files.append(str(toc_path))
214-
215-
# 4. 章首页
216235
for idx, section in enumerate(processor.book_data["sections"], 1):
217-
opener_tex = processor.jinja_env.get_template("opener.tex.j2").render(
218-
chapter_num=idx,
219-
chapter_title=section["title"]
220-
)
221236
opener_path = processor.output_dir / f"opener_{section['title']}.tex"
222-
with open(opener_path, "w", encoding="utf-8") as f: f.write(opener_tex)
237+
with open(opener_path, "w", encoding="utf-8") as f: f.write(processor.jinja_env.get_template("opener.tex.j2").render(chapter_num=idx, chapter_title=section["title"]))
223238
generated_tex_files.append(str(opener_path))
224-
225-
# 5. 封底
226-
back_tex = processor.jinja_env.get_template("backcover.tex.j2").render(**common_data)
227239
back_path = processor.output_dir / f"{book_title}_backcover.tex"
228-
with open(back_path, "w", encoding="utf-8") as f: f.write(back_tex)
240+
with open(back_path, "w", encoding="utf-8") as f: f.write(processor.jinja_env.get_template("backcover.tex.j2").render(**common_data))
229241
generated_tex_files.append(str(back_path))
230-
231-
# 写入任务列表供 CI 循环调用
232242
with open(processor.output_dir / "tex_tasks.txt", "w", encoding="utf-8") as f:
233-
for tf in generated_tex_files:
234-
f.write(f"{tf}\n")
235-
print(f"Generated {len(generated_tex_files)} TeX files. List saved to {processor.output_dir / 'tex_tasks.txt'}")
243+
for tf in generated_tex_files: f.write(f"{tf}\n")
236244
if args.merge:
237-
# 执行最终的 PDF 合体
238245
processor.process()
Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,39 @@
11
\documentclass[12pt,a4paper]{article}
2-
\usepackage[utf8]{inputenc}
32
\usepackage{fontspec}
43
\usepackage{geometry}
5-
\geometry{a4paper, margin=1in}
4+
\usepackage{xcolor}
5+
\geometry{a4paper, margin=1.5in}
66

77
\setmainfont{Noto Serif CJK SC}
8+
\definecolor{graycolor}{RGB}{120, 120, 120}
89

910
\begin{document}
1011
\thispagestyle{empty}
1112
\vspace*{3cm}
1213
\begin{center}
1314
{\Huge \bfseries {{ title }}} \par
14-
\vspace{1cm}
15-
{\Large {{ subtitle }}} \par
16-
\vspace{2cm}
15+
\vspace{0.8cm}
16+
{\huge \itshape \color{graycolor} {{ subtitle }}} \par
17+
\vspace{3cm}
1718
{\large 作者:{{ authors | join(', ') }}} \par
19+
1820
\vfill
21+
1922
\begin{minipage}{0.8\textwidth}
20-
{\itshape
23+
\centering
24+
{\Large \textbf{丛书简介}} \par \vspace{1cm}
25+
{\large \setlength{\parindent}{2em}
2126
{% for line in info.abstract %}
22-
{{ line }} \par \vspace{0.5cm}
27+
{{ line }} \par \vspace{0.4cm}
2328
{% endfor %}
2429
}
2530
\end{minipage}
31+
2632
\vfill
33+
2734
{\large {{ info.publishing }}} \par
28-
{\large \today}
35+
\vspace{0.5cm}
36+
{\large {{ info.date or "\today" }}}
2937
\end{center}
3038
\newpage
3139
\end{document}

0 commit comments

Comments
 (0)