|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Verify all internal links in slides/README.md |
| 4 | +Checks that referenced files actually exist in the repository. |
| 5 | +""" |
| 6 | + |
| 7 | +import re |
| 8 | +import os |
| 9 | +import sys |
| 10 | +from pathlib import Path |
| 11 | + |
| 12 | +def find_repo_root(): |
| 13 | + """Find the repository root directory.""" |
| 14 | + current = Path(__file__).resolve().parent |
| 15 | + while current != current.parent: |
| 16 | + if (current / '.git').exists(): |
| 17 | + return current |
| 18 | + current = current.parent |
| 19 | + raise RuntimeError("Could not find repository root") |
| 20 | + |
| 21 | +def extract_internal_links(readme_path): |
| 22 | + """Extract all internal GitHub links from README.""" |
| 23 | + with open(readme_path, 'r') as f: |
| 24 | + content = f.read() |
| 25 | + |
| 26 | + # Match GitHub raw file links |
| 27 | + github_pattern = r'https://github\.com/ContextLab/llm-course/blob/main/([^\s\)]+)' |
| 28 | + # Match relative links |
| 29 | + relative_pattern = r'\[([^\]]+)\]\(([^http][^\s\)]+)\)' |
| 30 | + # Match GitHub Pages links for slides |
| 31 | + pages_pattern = r'https://contextlab\.github\.io/llm-course/slides/([^\s\)]+)' |
| 32 | + |
| 33 | + links = [] |
| 34 | + |
| 35 | + for match in re.finditer(github_pattern, content): |
| 36 | + path = match.group(1) |
| 37 | + links.append(('github', path, match.start())) |
| 38 | + |
| 39 | + for match in re.finditer(pages_pattern, content): |
| 40 | + path = f"slides/{match.group(1)}" |
| 41 | + # Convert .html to source format |
| 42 | + if path.endswith('.html'): |
| 43 | + # Could be from .ipynb or .tex |
| 44 | + base = path[:-5] |
| 45 | + links.append(('pages', f"{base}.ipynb", match.start())) |
| 46 | + |
| 47 | + return links |
| 48 | + |
| 49 | +def verify_links(repo_root, links): |
| 50 | + """Verify that all linked files exist.""" |
| 51 | + errors = [] |
| 52 | + warnings = [] |
| 53 | + |
| 54 | + for link_type, path, position in links: |
| 55 | + # Handle PDF links - check for .tex source |
| 56 | + if path.endswith('.pdf'): |
| 57 | + tex_path = path[:-4] + '.tex' |
| 58 | + full_path = repo_root / tex_path |
| 59 | + if not full_path.exists(): |
| 60 | + # Check for individual lecture files |
| 61 | + errors.append(f"Missing source for {path}: expected {tex_path}") |
| 62 | + elif path.endswith('.ipynb'): |
| 63 | + full_path = repo_root / path |
| 64 | + if not full_path.exists(): |
| 65 | + errors.append(f"Missing file: {path}") |
| 66 | + else: |
| 67 | + full_path = repo_root / path |
| 68 | + if not full_path.exists(): |
| 69 | + warnings.append(f"File not found: {path}") |
| 70 | + |
| 71 | + return errors, warnings |
| 72 | + |
| 73 | +def main(): |
| 74 | + repo_root = find_repo_root() |
| 75 | + readme_path = repo_root / 'slides' / 'README.md' |
| 76 | + |
| 77 | + if not readme_path.exists(): |
| 78 | + print(f"❌ README not found: {readme_path}") |
| 79 | + sys.exit(1) |
| 80 | + |
| 81 | + print(f"🔍 Checking links in {readme_path}") |
| 82 | + print(f"📁 Repository root: {repo_root}") |
| 83 | + print() |
| 84 | + |
| 85 | + links = extract_internal_links(readme_path) |
| 86 | + print(f"Found {len(links)} internal links to verify") |
| 87 | + |
| 88 | + errors, warnings = verify_links(repo_root, links) |
| 89 | + |
| 90 | + if warnings: |
| 91 | + print(f"\n⚠️ Warnings ({len(warnings)}):") |
| 92 | + for w in warnings: |
| 93 | + print(f" - {w}") |
| 94 | + |
| 95 | + if errors: |
| 96 | + print(f"\n❌ Errors ({len(errors)}):") |
| 97 | + for e in errors: |
| 98 | + print(f" - {e}") |
| 99 | + sys.exit(1) |
| 100 | + else: |
| 101 | + print("\n✅ All critical links verified!") |
| 102 | + |
| 103 | +if __name__ == '__main__': |
| 104 | + main() |
0 commit comments