@@ -80,6 +80,24 @@ def repl(match):
8080 # Replace all Markdown image references in the text.
8181 return re .sub (pattern , repl , text )
8282
83+ def clean_header_title (text ):
84+ """
85+ Remove HTML tags, markdown image/link syntax and extra whitespace
86+ from the input header text, leaving only plain text.
87+
88+ For example:
89+ "Diligent Core [](https://...) <img src="media/diligentgraphics-logo.png" ...>"
90+ becomes:
91+ "Diligent Core"
92+ """
93+ # Remove HTML tags.
94+ text = re .sub (r'<[^>]+>' , '' , text )
95+ # Remove markdown images: 
96+ text = re .sub (r'!\[[^\]]*\]\([^\)]*\)' , '' , text )
97+ # Remove markdown links: [...](...)
98+ text = re .sub (r'\[[^\]]*\]\([^\)]*\)' , '' , text )
99+ # Remove extra whitespace.
100+ return text .strip ()
83101
84102def process_content (input_filepath , lines ):
85103 root_dir = get_project_root ()
@@ -103,7 +121,7 @@ def process_content(input_filepath, lines):
103121 if not header_replaced and line .strip ():
104122 match = header_regex .match (line )
105123 if match :
106- header_title = match .group (2 )
124+ header_title = clean_header_title ( match .group (2 ) )
107125 # Replace the header with the \page command.
108126 output_lines .append (f"\\ page { page_id } { header_title } \n " )
109127 header_replaced = True
0 commit comments