Skip to content

Commit dba583e

Browse files
authored
Merge pull request #9248 from wazuh/enhancement/idr580-turn-html-references-into-md-references-4.9
Add function to convert .html links to .md in Markdown generated files 4.9
2 parents aeb53d5 + b360cfc commit dba583e

File tree

1 file changed

+83
-0
lines changed

1 file changed

+83
-0
lines changed

source/conf.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,87 @@
373373

374374
# -- Options for Markdown output --------------------------------------------
375375

376+
def fix_markdown_links(app, exception):
377+
"""
378+
Post-process markdown files to replace .html extensions with .md extensions.
379+
380+
This function runs after the markdown build completes and converts all
381+
relative .html links to .md links while preserving absolute URLs.
382+
383+
Features:
384+
- Converts href="file.html" to href="file.md"
385+
- Converts href="file.html#anchor" to href="file.md#anchor"
386+
- Handles both single and double quotes
387+
- Handles markdown-style links [text](file.html)
388+
- Preserves absolute URLs (http://, https://, //, etc.)
389+
390+
Args:
391+
app: Sphinx application object
392+
exception: Exception raised during build (None if successful)
393+
"""
394+
if app.builder.name == 'markdown' and not exception:
395+
from pathlib import Path
396+
397+
build_dir = Path(app.outdir)
398+
modified_count = 0
399+
total_count = 0
400+
401+
print("\n" + "="*70)
402+
print("Post-processing markdown files: Converting .html links to .md")
403+
print("="*70)
404+
405+
# Process all markdown files recursively
406+
for md_file in build_dir.rglob('*.md'):
407+
total_count += 1
408+
409+
try:
410+
with open(md_file, 'r', encoding='utf-8') as f:
411+
content = f.read()
412+
413+
original_content = content
414+
415+
# Pattern 1: href="...html" (but not absolute URLs)
416+
# Matches: href="path/to/file.html"
417+
# Skips: href="https://example.com/file.html"
418+
content = re.sub(
419+
r'href="(?!(?:[a-zA-Z][a-zA-Z0-9+.-]*:)?//)([^"]*?)\.html"',
420+
r'href="\1.md"',
421+
content
422+
)
423+
424+
# Pattern 2: href='...html' (single quotes, not absolute URLs)
425+
# Matches: href='path/to/file.html'
426+
content = re.sub(
427+
r"href='(?!(?:[a-zA-Z][a-zA-Z0-9+.-]*:)?//)([^']*?)\.html'",
428+
r"href='\1.md'",
429+
content
430+
)
431+
432+
# Pattern 3: [text](link.html) markdown links (not absolute URLs)
433+
# Matches: [Link](path/to/file.html) and [Link](file.html#anchor)
434+
content = re.sub(
435+
r'\[([^\]]+)\]\((?!(?:[a-zA-Z][a-zA-Z0-9+.-]*:)?//)([^\)]*?)\.html((?:#[^\)]+)?)\)',
436+
r'[\1](\2.md\3)',
437+
content
438+
)
439+
440+
# Only write if changes were made
441+
if content != original_content:
442+
with open(md_file, 'w', encoding='utf-8') as f:
443+
f.write(content)
444+
modified_count += 1
445+
print(f" ✓ Fixed links in: {md_file.relative_to(build_dir)}")
446+
447+
except Exception as e:
448+
print(f" ✗ Error processing {md_file.relative_to(build_dir)}: {e}")
449+
450+
print("-"*70)
451+
print(f"Processing complete!")
452+
print(f" Total files: {total_count}")
453+
print(f" Modified files: {modified_count}")
454+
print(f" Unchanged files: {total_count - modified_count}")
455+
print("="*70 + "\n")
456+
376457
# Options for sphinx-markdown-builder
377458
markdown_http_base = '' # Use relative links
378459

@@ -490,6 +571,8 @@ def setup(app):
490571
app.connect('html-page-context', manage_assets)
491572
app.connect('build-finished', finish_and_clean)
492573

574+
app.connect('build-finished', fix_markdown_links) # Connect the markdown link fixer to post-process generated markdown files
575+
493576
app.connect('html-page-context', pagefind_custom_weights)
494577

495578
def pagefind_custom_weights(app, pagename, templatename, context, doctree):

0 commit comments

Comments
 (0)