|
373 | 373 |
|
374 | 374 | # -- Options for Markdown output -------------------------------------------- |
375 | 375 |
|
| 376 | +def fix_markdown_links(app, exception): |
| 377 | + """ |
| 378 | + Post-process markdown files to replace .html extensions with .md extensions. |
| 379 | +
|
| 380 | + This function runs after the markdown build completes and converts all |
| 381 | + relative .html links to .md links while preserving absolute URLs. |
| 382 | +
|
| 383 | + Features: |
| 384 | + - Converts href="file.html" to href="file.md" |
| 385 | + - Converts href="file.html#anchor" to href="file.md#anchor" |
| 386 | + - Handles both single and double quotes |
| 387 | + - Handles markdown-style links [text](file.html) |
| 388 | + - Preserves absolute URLs (http://, https://, //, etc.) |
| 389 | +
|
| 390 | + Args: |
| 391 | + app: Sphinx application object |
| 392 | + exception: Exception raised during build (None if successful) |
| 393 | + """ |
| 394 | + if app.builder.name == 'markdown' and not exception: |
| 395 | + from pathlib import Path |
| 396 | + |
| 397 | + build_dir = Path(app.outdir) |
| 398 | + modified_count = 0 |
| 399 | + total_count = 0 |
| 400 | + |
| 401 | + print("\n" + "="*70) |
| 402 | + print("Post-processing markdown files: Converting .html links to .md") |
| 403 | + print("="*70) |
| 404 | + |
| 405 | + # Process all markdown files recursively |
| 406 | + for md_file in build_dir.rglob('*.md'): |
| 407 | + total_count += 1 |
| 408 | + |
| 409 | + try: |
| 410 | + with open(md_file, 'r', encoding='utf-8') as f: |
| 411 | + content = f.read() |
| 412 | + |
| 413 | + original_content = content |
| 414 | + |
| 415 | + # Pattern 1: href="...html" (but not absolute URLs) |
| 416 | + # Matches: href="path/to/file.html" |
| 417 | + # Skips: href="https://example.com/file.html" |
| 418 | + content = re.sub( |
| 419 | + r'href="(?!(?:[a-zA-Z][a-zA-Z0-9+.-]*:)?//)([^"]*?)\.html"', |
| 420 | + r'href="\1.md"', |
| 421 | + content |
| 422 | + ) |
| 423 | + |
| 424 | + # Pattern 2: href='...html' (single quotes, not absolute URLs) |
| 425 | + # Matches: href='path/to/file.html' |
| 426 | + content = re.sub( |
| 427 | + r"href='(?!(?:[a-zA-Z][a-zA-Z0-9+.-]*:)?//)([^']*?)\.html'", |
| 428 | + r"href='\1.md'", |
| 429 | + content |
| 430 | + ) |
| 431 | + |
| 432 | + # Pattern 3: [text](link.html) markdown links (not absolute URLs) |
| 433 | + # Matches: [Link](path/to/file.html) and [Link](file.html#anchor) |
| 434 | + content = re.sub( |
| 435 | + r'\[([^\]]+)\]\((?!(?:[a-zA-Z][a-zA-Z0-9+.-]*:)?//)([^\)]*?)\.html((?:#[^\)]+)?)\)', |
| 436 | + r'[\1](\2.md\3)', |
| 437 | + content |
| 438 | + ) |
| 439 | + |
| 440 | + # Only write if changes were made |
| 441 | + if content != original_content: |
| 442 | + with open(md_file, 'w', encoding='utf-8') as f: |
| 443 | + f.write(content) |
| 444 | + modified_count += 1 |
| 445 | + print(f" ✓ Fixed links in: {md_file.relative_to(build_dir)}") |
| 446 | + |
| 447 | + except Exception as e: |
| 448 | + print(f" ✗ Error processing {md_file.relative_to(build_dir)}: {e}") |
| 449 | + |
| 450 | + print("-"*70) |
| 451 | + print(f"Processing complete!") |
| 452 | + print(f" Total files: {total_count}") |
| 453 | + print(f" Modified files: {modified_count}") |
| 454 | + print(f" Unchanged files: {total_count - modified_count}") |
| 455 | + print("="*70 + "\n") |
| 456 | + |
376 | 457 | # Options for sphinx-markdown-builder |
377 | 458 | markdown_http_base = '' # Use relative links |
378 | 459 |
|
@@ -490,6 +571,8 @@ def setup(app): |
490 | 571 | app.connect('html-page-context', manage_assets) |
491 | 572 | app.connect('build-finished', finish_and_clean) |
492 | 573 |
|
| 574 | + app.connect('build-finished', fix_markdown_links) # Connect the markdown link fixer to post-process generated markdown files |
| 575 | + |
493 | 576 | app.connect('html-page-context', pagefind_custom_weights) |
494 | 577 |
|
495 | 578 | def pagefind_custom_weights(app, pagename, templatename, context, doctree): |
|
0 commit comments