Skip to content

Commit 362337e

Browse files
Documentation: clean page titles
1 parent 2db013e commit 362337e

File tree

1 file changed

+19
-1
lines changed

1 file changed

+19
-1
lines changed

Doc/md_filter.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,24 @@ def repl(match):
8080
# Replace all Markdown image references in the text.
8181
return re.sub(pattern, repl, text)
8282

83+
def clean_header_title(text):
84+
"""
85+
Remove HTML tags, markdown image/link syntax and extra whitespace
86+
from the input header text, leaving only plain text.
87+
88+
For example:
89+
"Diligent Core [![Tweet](https://...)](https://...) <img src="media/diligentgraphics-logo.png" ...>"
90+
becomes:
91+
"Diligent Core"
92+
"""
93+
# Remove HTML tags.
94+
text = re.sub(r'<[^>]+>', '', text)
95+
# Remove markdown images: ![...](...)
96+
text = re.sub(r'!\[[^\]]*\]\([^\)]*\)', '', text)
97+
# Remove markdown links: [...](...)
98+
text = re.sub(r'\[[^\]]*\]\([^\)]*\)', '', text)
99+
# Remove extra whitespace.
100+
return text.strip()
83101

84102
def process_content(input_filepath, lines):
85103
root_dir = get_project_root()
@@ -103,7 +121,7 @@ def process_content(input_filepath, lines):
103121
if not header_replaced and line.strip():
104122
match = header_regex.match(line)
105123
if match:
106-
header_title = match.group(2)
124+
header_title = clean_header_title(match.group(2))
107125
# Replace the header with the \page command.
108126
output_lines.append(f"\\page {page_id} {header_title}\n")
109127
header_replaced = True

0 commit comments

Comments
 (0)