Skip to content

Commit 5b2708e

Browse files
committed
Update
1 parent 3877b36 commit 5b2708e

File tree

6 files changed

+196
-51
lines changed

6 files changed

+196
-51
lines changed

images/exporter-build/scripts/processor.py

Lines changed: 117 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -23,27 +23,25 @@ def __init__(self, book_json_path, output_dir="build"):
2323
def extract_precise_toc(self, doc, offset):
2424
"""
2525
根据 get_toc() 返回的初步目录,在对应页码进行文本定位,获取 y 坐标并偏移。
26+
调整层级以适应整体书籍结构 (Headings 设为 Level 3+)。
2627
"""
27-
# PyMuPDF get_toc() 可能返回 3 或 4 个元素的列表: [lvl, title, page, (dest_dict)]
2828
raw_toc = doc.get_toc()
2929
refined_toc = []
3030

3131
for entry in raw_toc:
3232
lvl = entry[0]
3333
title = entry[1]
34-
page_1 = entry[2] # 1st-based page in current doc
34+
page_1 = entry[2]
3535

36-
# 默认目标 (整页跳转)
37-
# PyMuPDF set_toc 期待 dest 为字典,或者 None (默认跳转到页顶)
36+
# 原始 PDF 的 H1 (lvl 1) 在合集中应设为 Level 3
37+
new_lvl = lvl + 2
3838
new_page_1 = page_1 + offset
3939
dest = {"kind": fitz.LINK_GOTO, "page": new_page_1 - 1, "to": fitz.Point(0, 0)}
4040

41-
# 尝试在特定页面查找标题以获取精确 Y 坐标
4241
page_0 = page_1 - 1
4342
if 0 <= page_0 < len(doc):
4443
found_y = None
4544
page_obj = doc[page_0]
46-
# get_text("dict") 包含了文本块的边界框
4745
blocks = page_obj.get_text("dict")["blocks"]
4846
target_title_norm = title.strip().lower()
4947

@@ -52,75 +50,124 @@ def extract_precise_toc(self, doc, offset):
5250
for line in b["lines"]:
5351
for s in line["spans"]:
5452
if s["text"].strip().lower() == target_title_norm:
55-
found_y = s["bbox"][1] # y0 (top coordinate)
53+
found_y = s["bbox"][1]
5654
break
5755
if found_y is not None: break
5856
if found_y is not None: break
5957

6058
if found_y is not None:
6159
dest["to"] = fitz.Point(0, found_y)
62-
else:
63-
print(f" Note: Could not find precise position for '{title}' on page {page_1}, using page top.")
6460

65-
refined_toc.append([lvl, title, new_page_1, dest])
61+
refined_toc.append([new_lvl, title, new_page_1, dest])
6662

6763
return refined_toc
6864

65+
def get_english_filename(self):
66+
"""从 nav.json 中查找对应的英文文件名"""
67+
nav_path = Path("D:/Github/blog/whk/config/nav.json")
68+
if nav_path.exists():
69+
try:
70+
with open(nav_path, "r", encoding="utf-8") as f:
71+
nav_data = json.load(f)
72+
for item in nav_data:
73+
if item.get("title") == self.book_data.get("title"):
74+
return item.get("export", {}).get("filename", f"{self.book_data['title']}.pdf")
75+
except Exception as e:
76+
print(f"Error reading nav.json: {e}")
77+
return f"{self.book_data['title']}.pdf"
78+
6979
def process(self):
70-
print(f"Processing Book: {self.book_data['title']}")
80+
book_title = self.book_data['title']
81+
print(f"Processing Book: {book_title}")
82+
temp_files = []
7183

72-
# 1. 准备装饰页 (TeX)
73-
# TODO: 渲染模板并调用 xelatex (此处假设已有编译好的 PDF 或通过外部步骤完成)
84+
# 1. 插入封面与装饰页
85+
decorative_pages = [
86+
("cover", f"{book_title}_cover.pdf", "封面"),
87+
("frontispiece", f"{book_title}_frontispiece.pdf", "扉页"),
88+
("toc", f"{book_title}_toc.pdf", "目录")
89+
]
7490

75-
# 2. 合成逻辑
76-
# 插入封面 (假设名称为 cover.pdf)
77-
cover_path = self.output_dir / f"{self.book_data['title']}_cover.pdf"
78-
if cover_path.exists():
79-
cover_doc = fitz.open(cover_path)
80-
self.final_doc.insert_pdf(cover_doc)
81-
self.page_offset += len(cover_doc)
82-
cover_doc.close()
91+
for key, fname, label in decorative_pages:
92+
p = self.output_dir / fname
93+
if p.exists():
94+
doc = fitz.open(p)
95+
self.final_doc.insert_pdf(doc)
96+
self.page_offset += len(doc)
97+
self.toc_data.append([1, label, self.page_offset - len(doc) + 1])
98+
doc.close()
99+
temp_files.append(p)
83100

84-
# 遍历章节
101+
# 2. 遍历章节
85102
for section in self.book_data["sections"]:
86-
print(f" Inserting Section: {section['title']}")
103+
sec_title = section['title']
104+
print(f" Inserting Section: {sec_title}")
87105

88-
# 插入章首页 (TeX 产物)
89-
opener_path = self.output_dir / f"opener_{section['title']}.pdf"
106+
# 章节首页 (Level 1)
107+
opener_path = self.output_dir / f"opener_{sec_title}.pdf"
90108
if opener_path.exists():
91109
opener_doc = fitz.open(opener_path)
92110
self.final_doc.insert_pdf(opener_doc)
93111
self.page_offset += len(opener_doc)
112+
self.toc_data.append([1, sec_title, self.page_offset - len(opener_doc) + 1])
94113
opener_doc.close()
95-
self.toc_data.append([1, section['title'], self.page_offset]) # 章级目录
114+
temp_files.append(opener_path)
115+
else:
116+
self.toc_data.append([1, sec_title, self.page_offset + 1])
96117

97-
# 插入内容页
118+
# 插入内容页 (Level 2)
98119
for sub in section["sections"]:
99-
# 尝试从 JSON 所在目录查找,或者使用绝对 site/build 路径
120+
sub_title = sub['title']
100121
content_path = self.book_json_path.parent / sub["path"]
101122
if not content_path.exists():
102123
content_path = Path("site/build") / sub["path"]
103124

104125
if content_path.exists():
105126
doc = fitz.open(content_path)
106-
# 提取并偏移章节内的书签
107-
chapter_toc = self.extract_precise_toc(doc, self.page_offset)
108-
self.toc_data.extend(chapter_toc)
127+
# 记录页面标题作为 Level 2 书签
128+
self.toc_data.append([2, sub_title, self.page_offset + 1])
129+
130+
# 提取并偏移章节内的 headings (Level 3+)
131+
chapter_headings = self.extract_precise_toc(doc, self.page_offset)
132+
self.toc_data.extend(chapter_headings)
109133

110134
self.final_doc.insert_pdf(doc)
111135
self.page_offset += len(doc)
112136
doc.close()
137+
temp_files.append(content_path)
113138
else:
114139
print(f" Warning: Content not found at {content_path}")
115140

116-
# 3. 设置最终目录
141+
# 3. 封底
142+
back_path = self.output_dir / f"{book_title}_backcover.pdf"
143+
if back_path.exists():
144+
doc = fitz.open(back_path)
145+
self.final_doc.insert_pdf(doc)
146+
self.page_offset += len(doc)
147+
self.toc_data.append([1, "封底", self.page_offset - len(doc) + 1])
148+
doc.close()
149+
temp_files.append(back_path)
150+
151+
# 4. 设置最终目录并保存
117152
self.final_doc.set_toc(self.toc_data)
118153

119-
# 4. 保存
120-
output_file = self.output_dir / f"{self.book_data['title']}.pdf"
154+
final_filename = self.get_english_filename()
155+
output_file = self.output_dir / final_filename
121156
self.final_doc.save(output_file, deflate=True, garbage=4)
122157
self.final_doc.close()
158+
123159
print(f"Final PDF saved to {output_file}")
160+
161+
# 5. 清理
162+
print("Cleaning up temporary files...")
163+
for f in temp_files:
164+
try:
165+
if f.exists() and f != output_file:
166+
f.unlink()
167+
except: pass
168+
# 清理 tex 文件
169+
for f in self.output_dir.glob("*.tex"): f.unlink()
170+
if (self.output_dir / "tex_tasks.txt").exists(): (self.output_dir / "tex_tasks.txt").unlink()
124171

125172
if __name__ == "__main__":
126173
import argparse
@@ -135,31 +182,52 @@ def process(self):
135182
# 仅生成 TeX 模板供后续容器编译
136183
print("Rendering TeX templates...")
137184
generated_tex_files = []
185+
book_title = processor.book_data.get('title', 'Unknown')
138186

139-
# 渲染封面
140-
cover_filename = f"{processor.book_data.get('title', 'Unknown')}_cover.tex"
141-
cover_tex = processor.jinja_env.get_template("cover.tex.j2").render(
142-
title=processor.book_data.get("title", "Unknown"),
143-
subtitle=processor.book_data.get("subtitle", ""),
144-
authors=processor.book_data.get("authors", [])
145-
)
146-
cover_path = processor.output_dir / cover_filename
147-
with open(cover_path, "w", encoding="utf-8") as f:
148-
f.write(cover_tex)
187+
common_data = {
188+
"title": book_title,
189+
"subtitle": processor.book_data.get("subtitle", ""),
190+
"authors": processor.book_data.get("authors", []),
191+
"info": processor.book_data.get("info", {})
192+
}
193+
194+
# 1. 封面
195+
cover_tex = processor.jinja_env.get_template("cover.tex.j2").render(**common_data)
196+
cover_path = processor.output_dir / f"{book_title}_cover.tex"
197+
with open(cover_path, "w", encoding="utf-8") as f: f.write(cover_tex)
149198
generated_tex_files.append(str(cover_path))
150199

151-
# 渲染章首页
200+
# 2. 扉页
201+
front_tex = processor.jinja_env.get_template("frontispiece.tex.j2").render(**common_data)
202+
front_path = processor.output_dir / f"{book_title}_frontispiece.tex"
203+
with open(front_path, "w", encoding="utf-8") as f: f.write(front_tex)
204+
generated_tex_files.append(str(front_path))
205+
206+
# 3. 目录页 (简版概要)
207+
toc_outline = []
208+
for sec in processor.book_data["sections"]:
209+
toc_outline.append({"title": sec['title'], "page": "?"}) # 物理页码在 plan 阶段未知,通常填 ? 或略过
210+
toc_tex = processor.jinja_env.get_template("toc.tex.j2").render(toc_outline=toc_outline, **common_data)
211+
toc_path = processor.output_dir / f"{book_title}_toc.tex"
212+
with open(toc_path, "w", encoding="utf-8") as f: f.write(toc_tex)
213+
generated_tex_files.append(str(toc_path))
214+
215+
# 4. 章首页
152216
for idx, section in enumerate(processor.book_data["sections"], 1):
153-
opener_filename = f"opener_{section['title']}.tex"
154217
opener_tex = processor.jinja_env.get_template("opener.tex.j2").render(
155218
chapter_num=idx,
156219
chapter_title=section["title"]
157220
)
158-
opener_path = processor.output_dir / opener_filename
159-
with open(opener_path, "w", encoding="utf-8") as f:
160-
f.write(opener_tex)
221+
opener_path = processor.output_dir / f"opener_{section['title']}.tex"
222+
with open(opener_path, "w", encoding="utf-8") as f: f.write(opener_tex)
161223
generated_tex_files.append(str(opener_path))
162224

225+
# 5. 封底
226+
back_tex = processor.jinja_env.get_template("backcover.tex.j2").render(**common_data)
227+
back_path = processor.output_dir / f"{book_title}_backcover.tex"
228+
with open(back_path, "w", encoding="utf-8") as f: f.write(back_tex)
229+
generated_tex_files.append(str(back_path))
230+
163231
# 写入任务列表供 CI 循环调用
164232
with open(processor.output_dir / "tex_tasks.txt", "w", encoding="utf-8") as f:
165233
for tf in generated_tex_files:
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
\documentclass[12pt,a4paper]{article}
2+
\usepackage[utf8]{inputenc}
3+
\usepackage{fontspec}
4+
\usepackage{geometry}
5+
\geometry{a4paper, margin=1in}
6+
7+
\setmainfont{Noto Serif CJK SC}
8+
9+
\begin{document}
10+
\thispagestyle{empty}
11+
\vspace*{15cm}
12+
\begin{center}
13+
{\large 版权所有 \textcopyright\ \the\year\ {{ authors | join(', ') }}} \par
14+
\vspace{0.5cm}
15+
{\small 采用 MkDocs Exporter 构建}
16+
\end{center}
17+
\newpage
18+
\end{document}

images/exporter-build/templates/cover.tex.j2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
\usepackage{geometry}
55
\geometry{a4paper, margin=1in}
66

7-
\setmainfont{Noto Sans CJK SC}
7+
\setmainfont{Noto Serif CJK SC}
88

99
\begin{document}
1010
\begin{titlepage}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
\documentclass[12pt,a4paper]{article}
2+
\usepackage[utf8]{inputenc}
3+
\usepackage{fontspec}
4+
\usepackage{geometry}
5+
\geometry{a4paper, margin=1in}
6+
7+
\setmainfont{Noto Serif CJK SC}
8+
9+
\begin{document}
10+
\thispagestyle{empty}
11+
\vspace*{3cm}
12+
\begin{center}
13+
{\Huge \bfseries {{ title }}} \par
14+
\vspace{1cm}
15+
{\Large {{ subtitle }}} \par
16+
\vspace{2cm}
17+
{\large 作者:{{ authors | join(', ') }}} \par
18+
\vfill
19+
\begin{minipage}{0.8\textwidth}
20+
{\itshape
21+
{% for line in info.abstract %}
22+
{{ line }} \par \vspace{0.5cm}
23+
{% endfor %}
24+
}
25+
\end{minipage}
26+
\vfill
27+
{\large {{ info.publishing }}} \par
28+
{\large \today}
29+
\end{center}
30+
\newpage
31+
\end{document}

images/exporter-build/templates/opener.tex.j2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
\usepackage{geometry}
55
\geometry{a4paper, margin=1in}
66

7-
\setmainfont{Noto Sans CJK SC}
7+
\setmainfont{Noto Serif CJK SC}
88

99
\begin{document}
1010
\vspace*{10cm}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
\documentclass[12pt,a4paper]{article}
2+
\usepackage[utf8]{inputenc}
3+
\usepackage{fontspec}
4+
\usepackage{geometry}
5+
\geometry{a4paper, margin=1in}
6+
7+
\setmainfont{Noto Serif CJK SC}
8+
9+
\begin{document}
10+
\thispagestyle{empty}
11+
\section*{目录}
12+
\vspace{1cm}
13+
% 这里只是一个占位符,实际目录逻辑由 Python 处理器控制书签
14+
% 或者在此渲染更精美的目录
15+
\begin{enumerate}
16+
{% for section in toc_outline %}
17+
\item {{ section.title }} \dotfill {{ section.page }}
18+
{% if section.children %}
19+
\begin{itemize}
20+
{% for child in section.children %}
21+
\item {{ child.title }} \dotfill {{ child.page }}
22+
{% endfor %}
23+
\end{itemize}
24+
{% endif %}
25+
{% endfor %}
26+
\end{enumerate}
27+
\newpage
28+
\end{document}

0 commit comments

Comments
 (0)