Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion _quarto.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
project:
type: website
pre-render: docs/scripts/generate_config_docs.py
pre-render:
- docs/scripts/generate_config_docs.py
- docs/scripts/generate_examples_docs.py

quartodoc:
dir: docs/api
Expand Down Expand Up @@ -286,6 +288,11 @@ website:
- docs/gradient_checkpointing.qmd
- docs/nd_parallelism.qmd

- title: Usage Examples
desc: Example YAML files for training different models
contents:
- docs/examples/*.qmd

Comment on lines +291 to +295
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Use section: instead of title: for sidebar groups

In the sidebar config every other group uses section:. Quarto’s schema doesn’t recognise title: at this level, so the whole “Usage Examples” block may be ignored.

-        - title: Usage Examples
+        - section: "Usage Examples"
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
- title: Usage Examples
desc: Example YAML files for training different models
contents:
- docs/examples/*.qmd
- section: "Usage Examples"
desc: Example YAML files for training different models
contents:
- docs/examples/*.qmd
🤖 Prompt for AI Agents
In _quarto.yml around lines 291 to 295, replace the key `title:` with `section:`
for the sidebar group labeled "Usage Examples" because Quarto's schema requires
`section:` at this level for sidebar groups to be recognized. Update the YAML to
use `section:` instead of `title:` to ensure the block is properly included in
the sidebar.

- section: "Troubleshooting"
contents:
- docs/faq.qmd
Expand Down
5 changes: 5 additions & 0 deletions docs/scripts/examples-allowlist.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
examples:
- distributed-parallel
- slurm
- llama-3
- gpt-oss
124 changes: 124 additions & 0 deletions docs/scripts/generate_examples_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"""
auto generate example docs from allowlist
"""

import re
import shutil
import sys
from pathlib import Path

import yaml

# Paths
THIS = Path(__file__).resolve()
ROOT = THIS.parents[2] # repo root (docs/scripts -> docs -> ROOT)
EXAMPLES_DIR = ROOT / "examples"
OUTPUT_DIR = ROOT / "docs" / "examples"
ALLOWLIST_YML = THIS.parent / "examples-allowlist.yml"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is an allowlist necessary? I think anything matching README.md within the examples dir seem to be a good check?

We just need to make sure the resulting qmd has execute: false



# utilities
def slugify(name: str) -> str:
s = re.sub(r"[^a-zA-Z0-9\-_/]+", "-", name.strip())
s = s.replace("/", "-")
s = re.sub(r"-+", "-", s).strip("-").lower()
return s or "example"


def read_allowlist():
with open(ALLOWLIST_YML, "r", encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
items = data.get("examples", [])
if not isinstance(items, list):
raise ValueError("`examples` must be a list in examples-allowlist.yml")
return items


def find_readme(folder: Path) -> Path | None:
for name in ("README.md", "Readme.md", "readme.md"):
p = folder / name
if p.exists():
return p
return None


def first_h1(md: str) -> str | None:
for line in md.splitlines():
if line.startswith("# "):
return line[2:].strip()
return None


IMG_RE = re.compile(r"!\[[^\]]*\]\(([^)]+)\)")


def rewrite_and_copy_assets(
md: str, src_dir: Path, dest_assets_root: Path, slug: str
) -> str:
"""
Copy local image assets referenced in markdown to
docs/examples/assets/<slug>/... and rewrite the links.
"""
dest_assets = dest_assets_root / slug

def repl(m):
url = m.group(1).strip()
if re.match(r"^(https?:)?//", url):
return m.group(0) # leave remote URLs
# normalize path
src_path = (src_dir / url).resolve()
if not src_path.exists():
return m.group(0) # leave as-is if not found
rel = src_path.relative_to(src_dir)
dest_path = dest_assets / rel
Comment on lines +70 to +73
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Handle assets that live outside the example directory

src_path.relative_to(src_dir) raises ValueError if the README references an asset like ../shared/logo.png. That aborts the whole run.

-        rel = src_path.relative_to(src_dir)
+        try:
+            rel = src_path.relative_to(src_dir)
+        except ValueError:
+            # fallback: flatten to basename to keep the run alive
+            rel = src_path.name
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if not src_path.exists():
return m.group(0) # leave as-is if not found
rel = src_path.relative_to(src_dir)
dest_path = dest_assets / rel
if not src_path.exists():
return m.group(0) # leave as-is if not found
try:
rel = src_path.relative_to(src_dir)
except ValueError:
# fallback: flatten to basename to keep the run alive
rel = src_path.name
dest_path = dest_assets / rel
🤖 Prompt for AI Agents
In docs/scripts/generate_examples_docs.py around lines 70 to 73, the code uses
src_path.relative_to(src_dir) which raises a ValueError if the asset path is
outside the example directory. To fix this, add a try-except block around the
relative_to call to catch ValueError and handle such cases gracefully, for
example by skipping those assets or processing them differently to avoid
aborting the entire run.

dest_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src_path, dest_path)
new_rel = f"assets/{slug}/{rel.as_posix()}"
return m.group(0).replace(url, new_rel)

return IMG_RE.sub(repl, md)


def write_qmd(out_path: Path, title: str, body_md: str):
out_path.parent.mkdir(parents=True, exist_ok=True)
fm = f"---\ntitle: {title!r}\nformat:\n html:\n toc: true\n---\n\n"
out_path.write_text(fm + body_md, encoding="utf-8")


def main():
allow = read_allowlist()
if not EXAMPLES_DIR.exists():
print(f"[WARN] {EXAMPLES_DIR} not found", file=sys.stderr)
return

(OUTPUT_DIR / "assets").mkdir(parents=True, exist_ok=True)

generated = []
for item in allow:
src_dir = EXAMPLES_DIR / item
if not src_dir.exists() or not src_dir.is_dir():
print(f"[WARN] Skipping {item} (not a directory)", file=sys.stderr)
continue

readme = find_readme(src_dir)
if not readme:
print(f"[WARN] Skipping {item} (no README.md)", file=sys.stderr)
continue

md = readme.read_text(encoding="utf-8")
slug = slugify(item)
title = first_h1(md) or f"Example: {item}"
md = rewrite_and_copy_assets(md, src_dir, OUTPUT_DIR / "assets", slug)
write_qmd(OUTPUT_DIR / f"{slug}.qmd", title, md)
generated.append(slug)

# Optional: index page
if generated:
generated.sort()
listing = "\n".join([f"- [{s}](./{s}.qmd)" for s in generated])
index_md = "# Examples\n\nBelow are the curated examples:\n\n" + listing + "\n"
write_qmd(OUTPUT_DIR / "index.qmd", "Examples", index_md)


if __name__ == "__main__":
main()
Loading