Skip to content

Commit 9d6ec8b

Browse files
committed
Update
1 parent 162c69f commit 9d6ec8b

File tree

2 files changed

+47
-28
lines changed

2 files changed

+47
-28
lines changed

images/fragment/scripts/convert.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,12 @@ def auto_convert_file(extra_args, filters):
4848
f.write(fixed_content)
4949

5050
def main():
51-
filters = ['html-cleanup.lua']
52-
pdoc_args = ['--wrap=none']
51+
# 动态获取当前脚本所在目录,确保在 Docker 或本地运行都能找到过滤器
52+
script_dir = os.path.dirname(os.path.abspath(__file__))
53+
lua_filter_path = os.path.join(script_dir, 'html-cleanup.lua')
54+
55+
filters = [lua_filter_path]
56+
pdoc_args = ['--wrap=none', '--standalone']
5357
auto_convert_file(pdoc_args, filters)
5458

5559
if __name__ == "__main__":

images/fragment/scripts/html-cleanup.lua

Lines changed: 41 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
-- html-cleanup-v2.lua
2-
-- HTML → commonmark_x 终极清理版(支持任意嵌套 callout + 完整反转义)
2+
-- HTML → commonmark_x 终极清理版(支持任意嵌套 callout + 完整反转义 + 元数据提取
33

44
local function unescape_math(s)
55
if not s then return "" end
6-
s = pandoc.utils.stringify(s)
6+
if type(s) ~= "string" then
7+
s = pandoc.utils.stringify(s)
8+
end
79
-- HTML entities 完整解码
810
s = s:gsub("&", "&")
911
:gsub("&lt;", "<")
@@ -36,7 +38,37 @@ local function max_fence_length(str)
3638
return maxn
3739
end
3840

39-
-- 1. Inline arithmatex: [\$F_1\'\$] → $F_1'$
41+
-- 转换元数据(如果需要作为 frontmatter 写入)
42+
function Pandoc(doc)
43+
local meta = doc.meta
44+
-- 尝试从 RawBlocks 中搜寻元数据(启发式)
45+
for i, el in ipairs(doc.blocks) do
46+
if el.t == "RawBlock" and el.format == "html" then
47+
local title = el.text:match('data%-title="([^"]+)"')
48+
local url = el.text:match('data%-url="([^"]+)"')
49+
if title then meta.title = title end
50+
if url then meta.url = url end
51+
end
52+
end
53+
54+
-- 移除那些提取完后的元数据注释
55+
local new_blocks = pandoc.List()
56+
for _, el in ipairs(doc.blocks) do
57+
local skip = false
58+
if el.t == "RawBlock" and el.format == "html" then
59+
if el.text:match('mkdocs%-fragment') or el.text:match('</article>') then
60+
skip = true
61+
end
62+
end
63+
if not skip then
64+
new_blocks:insert(el)
65+
end
66+
end
67+
doc.blocks = new_blocks
68+
return doc
69+
end
70+
71+
-- 2. Inline arithmatex: [\$F_1\'\$] → $F_1'$
4072
function Span(el)
4173
if el.classes and el.classes:includes("arithmatex") then
4274
local text = pandoc.utils.stringify(el.content)
@@ -46,7 +78,7 @@ function Span(el)
4678
end
4779
end
4880

49-
-- 2. Block arithmatex + Admonition + details
81+
-- 3. Block arithmatex + Admonition
5082
function Div(el)
5183
-- arithmatex display math
5284
if el.classes and el.classes:includes("arithmatex") then
@@ -64,12 +96,12 @@ function Div(el)
6496
break
6597
end
6698
end
67-
if not callout_type and el.classes and el.classes:includes("admonition") then
99+
if not callout_type and el.classes and (el.classes:includes("admonition") or el.classes:includes("details")) then
68100
callout_type = "note"
69101
end
70102

71103
if callout_type then
72-
-- 提取标题(admonition-title / Header / <summary>)
104+
-- 提取标题
73105
local title = nil
74106
local body = pandoc.List{}
75107
for _, blk in ipairs(el.content) do
@@ -92,10 +124,7 @@ function Div(el)
92124
end
93125
end
94126

95-
-- 写入内部内容(已经过 filter 处理)
96127
local inner_md = pandoc.write(pandoc.Pandoc(body), "commonmark_x")
97-
98-
-- 动态 fence 长度:比内部最大多 1,保证嵌套绝对安全
99128
local fence_len = math.max(3, max_fence_length(inner_md) + 1)
100129
local fence = string.rep(":", fence_len)
101130

@@ -111,22 +140,8 @@ function Div(el)
111140
return el
112141
end
113142

114-
-- 3. 清除所有标题的 {#_1} {#_2} 等 id
115-
function Header(el)
116-
el.identifier = ""
117-
return el
118-
end
119-
120-
-- 4. 处理被 skipped 的 <details>
121-
function RawBlock(el)
122-
if el.format == "html" and el.text:match("^%s*<details") then
123-
return pandoc.Div(pandoc.List{}, pandoc.Attr("", {"details"}))
124-
end
125-
end
126-
127143
return {
144+
{Pandoc = Pandoc},
128145
{Span = Span},
129-
{Div = Div},
130-
{Header = Header},
131-
{RawBlock = RawBlock}
132-
}
146+
{Div = Div}
147+
}

0 commit comments

Comments
 (0)