|
| 1 | +import nbformat |
| 2 | +import re |
| 3 | +import os |
| 4 | +import glob |
| 5 | +import logging |
| 6 | +from pathlib import Path |
| 7 | + |
| 8 | +# Configure logging |
| 9 | +logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s") |
| 10 | +logger = logging.getLogger(__name__) |
| 11 | + |
| 12 | +# Regex pattern to replace: <source src="./xxx"> → <source src="../xxx"> |
| 13 | +VIDEO_SRC_PATTERN = re.compile(r'(<source\s+src=")\./([^"]+)(")') |
| 14 | +script_dir = os.path.dirname(os.path.abspath(__file__)) |
| 15 | + |
| 16 | + |
| 17 | +def convert_ipynb_to_md(ipynb_path: str, output_dir: str): |
| 18 | + """ |
| 19 | + Convert a Jupyter notebook to Markdown, replacing video src paths. |
| 20 | +
|
| 21 | + Args: |
| 22 | + ipynb_path (str): Path to the input .ipynb file. |
| 23 | + output_dir (str): Directory to save the converted .md file. |
| 24 | + """ |
| 25 | + ipynb_path = Path(ipynb_path) |
| 26 | + notebook_name = ipynb_path.stem |
| 27 | + output_dir = Path(output_dir) |
| 28 | + md_path = output_dir / f"{notebook_name}.md" |
| 29 | + os.makedirs(md_path.parent, exist_ok=True) |
| 30 | + |
| 31 | + with open(ipynb_path, "r", encoding="utf-8") as f: |
| 32 | + nb = nbformat.read(f, as_version=4) |
| 33 | + |
| 34 | + md_lines = [] |
| 35 | + |
| 36 | + for cell in nb.cells: |
| 37 | + if cell.cell_type == "markdown": |
| 38 | + text = cell.source |
| 39 | + # Replace video source path from ./ to ../ |
| 40 | + text = VIDEO_SRC_PATTERN.sub(r'\1../\2\3', text) |
| 41 | + md_lines.append(text) |
| 42 | + elif cell.cell_type == "code": |
| 43 | + if cell.get("source"): |
| 44 | + md_lines.append(f"```python\n{cell.source}\n```") |
| 45 | + |
| 46 | + with open(md_path, "w", encoding="utf-8") as f: |
| 47 | + f.write("\n\n".join(md_lines)) |
| 48 | + |
| 49 | + # Try to get relative path for logging; fallback to filename if error occurs |
| 50 | + try: |
| 51 | + relative_md_path = md_path.relative_to(Path.cwd()) |
| 52 | + except ValueError: |
| 53 | + relative_md_path = md_path.name |
| 54 | + |
| 55 | + logger.info(f"✅ Converted: {ipynb_path.name} → {relative_md_path}") |
| 56 | + |
| 57 | + |
| 58 | +if __name__ == "__main__": |
| 59 | + # Batch convert all notebooks matching the pattern |
| 60 | + input_files = glob.glob("Tutorial/rag/notebook/chapter*/*.ipynb", recursive=True) |
| 61 | + output_dir = os.path.join(script_dir, "zh/Tutorial") |
| 62 | + |
| 63 | + for ipynb_file in input_files: |
| 64 | + convert_ipynb_to_md(ipynb_file, output_dir) |
0 commit comments