Skip to content

Commit 0e3a604

Browse files
authored
[v1.x] docs: publish llms.txt and markdown renditions of the docs (#3029)
1 parent 9678a3b commit 0e3a604

3 files changed

Lines changed: 193 additions & 3 deletions

File tree

docs/hooks/llms_txt.py

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
"""Generate llms.txt, llms-full.txt, and per-page markdown (https://llmstxt.org/).
2+
3+
The hook publishes three artifacts into the built site:
4+
5+
- `llms.txt`: a markdown index of the documentation, one link per page,
6+
grouped by nav section.
7+
- a `.md` rendition of every prose page next to its HTML (e.g.
8+
`server/index.md`), which is what the llms.txt links point at.
9+
- `llms-full.txt`: every prose page concatenated for single-fetch consumption.
10+
11+
Page markdown is the source markdown with `--8<--` snippet includes resolved
12+
and relative links rewritten to absolute URLs. The API reference page
13+
(`api.md`) is a mkdocstrings stub with no markdown source, so it is linked as
14+
rendered HTML from an Optional section instead of being embedded.
15+
16+
Incremental builds (`mkdocs build --dirty`) are rejected: they skip unmodified
17+
pages, which would silently truncate the generated artifacts.
18+
"""
19+
20+
from __future__ import annotations
21+
22+
import posixpath
23+
import re
24+
from dataclasses import dataclass, field
25+
from pathlib import Path
26+
27+
from mkdocs.config.defaults import MkDocsConfig
28+
from mkdocs.exceptions import PluginError
29+
from mkdocs.structure.files import File, Files
30+
from mkdocs.structure.nav import Navigation, Section
31+
from mkdocs.structure.pages import Page
32+
33+
# Pages with no markdown source, linked as HTML under "## Optional".
34+
_OPTIONAL_PAGES = [
35+
("api.md", "API reference", "Auto-generated API reference for the mcp package (rendered HTML)"),
36+
]
37+
38+
_SNIPPET_LINE = re.compile(r'^(?P<indent>[ \t]*)--8<-- "(?P<path>[^"\n]+)"$', flags=re.MULTILINE)
39+
_MD_LINK = re.compile(r'(\]\()([^)\s]+\.md)(#[^)\s]*)?( +"[^"]*")?(\))')
40+
41+
42+
@dataclass
43+
class _State:
44+
page_markdown: dict[str, str] = field(default_factory=dict)
45+
rendition_uris: set[str] = field(default_factory=set)
46+
nav: Navigation | None = None
47+
files: Files | None = None
48+
49+
50+
_state = _State()
51+
52+
53+
def _site_url(config: MkDocsConfig) -> str:
54+
assert config.site_url is not None
55+
return config.site_url.rstrip("/") + "/"
56+
57+
58+
def _md_uri(file: File) -> str:
59+
return re.sub(r"\.html$", ".md", file.dest_uri)
60+
61+
62+
def on_config(config: MkDocsConfig) -> None:
63+
# `mkdocs serve` rebuilds reuse the imported module; start each build clean.
64+
_state.page_markdown.clear()
65+
_state.rendition_uris.clear()
66+
_state.nav = _state.files = None
67+
68+
69+
def on_nav(nav: Navigation, config: MkDocsConfig, files: Files) -> None:
70+
_state.nav = nav
71+
_state.files = files
72+
_state.rendition_uris.update(page.file.src_uri for page in nav.pages if page.file.src_uri != "api.md")
73+
74+
75+
def on_page_markdown(markdown: str, page: Page, config: MkDocsConfig, files: Files) -> str | None:
76+
if page.file.src_uri not in _state.rendition_uris:
77+
return None
78+
79+
# Same anchor as the pymdownx.snippets `base_path` in mkdocs.yml.
80+
repo_root = Path(config.config_file_path).parent
81+
82+
def include(match: re.Match[str]) -> str:
83+
indent, path = match["indent"], match["path"]
84+
# Mirror the snippets extension's restrict_base_path: reject paths
85+
# that resolve outside the repo root.
86+
resolved_path = (repo_root / path).resolve()
87+
if not resolved_path.is_relative_to(repo_root.resolve()):
88+
raise PluginError(f"llms_txt: snippet path {path!r} in {page.file.src_uri} escapes the repo root")
89+
try:
90+
content = resolved_path.read_text(encoding="utf-8").rstrip("\n")
91+
except OSError as exc:
92+
raise PluginError(f"llms_txt: cannot read snippet {path!r} in {page.file.src_uri}") from exc
93+
# Keep a pointer to the embedded file so readers can find it on disk.
94+
if path.endswith(".py"):
95+
content = f"# {path}\n{content}"
96+
if indent:
97+
content = "\n".join(indent + line if line else line for line in content.split("\n"))
98+
return content
99+
100+
resolved, substitutions = _SNIPPET_LINE.subn(include, markdown)
101+
if substitutions != sum("--8<--" in line for line in markdown.splitlines()):
102+
raise PluginError(f"llms_txt: unresolved snippet include in {page.file.src_uri}")
103+
104+
site_url = _site_url(config)
105+
src_dir = posixpath.dirname(page.file.src_uri)
106+
107+
def rewrite(match: re.Match[str]) -> str:
108+
opening, target, anchor, title, closing = match.groups()
109+
if "://" in target:
110+
return match.group(0)
111+
linked = files.get_file_from_path(posixpath.normpath(posixpath.join(src_dir, target)))
112+
if linked is None:
113+
raise PluginError(f"llms_txt: cannot resolve link target {target!r} in {page.file.src_uri}")
114+
# Pages without a markdown rendition (the api.md stub) link to their HTML instead.
115+
url = _md_uri(linked) if linked.src_uri in _state.rendition_uris else linked.url
116+
return f"{opening}{site_url}{url}{anchor or ''}{title or ''}{closing}"
117+
118+
_state.page_markdown[page.file.src_uri] = _MD_LINK.sub(rewrite, resolved)
119+
return None
120+
121+
122+
def _section_pages(section: Section) -> list[Page]:
123+
pages: list[Page] = []
124+
for child in section.children:
125+
if isinstance(child, Page) and child.file.src_uri in _state.rendition_uris:
126+
pages.append(child)
127+
elif isinstance(child, Section):
128+
pages.extend(_section_pages(child))
129+
return pages
130+
131+
132+
def on_post_build(config: MkDocsConfig) -> None:
133+
assert _state.nav is not None and _state.files is not None
134+
missing = _state.rendition_uris - _state.page_markdown.keys()
135+
if missing:
136+
raise PluginError(f"llms_txt: pages skipped this build (is this a --dirty build?): {sorted(missing)}")
137+
138+
site_dir = Path(config.site_dir)
139+
site_url = _site_url(config)
140+
141+
top_level = [
142+
item for item in _state.nav.items if isinstance(item, Page) and item.file.src_uri in _state.rendition_uris
143+
]
144+
sections: list[tuple[str, list[Page]]] = [("Docs", top_level)] if top_level else []
145+
for item in _state.nav.items:
146+
if isinstance(item, Section):
147+
pages = _section_pages(item)
148+
if pages:
149+
sections.append((item.title, pages))
150+
151+
index = [f"# {config.site_name}", "", f"> {config.site_description}", ""]
152+
full: list[str] = []
153+
for title, pages in sections:
154+
index += [f"## {title}", ""]
155+
for page in pages:
156+
markdown = _state.page_markdown[page.file.src_uri]
157+
(site_dir / _md_uri(page.file)).write_text(markdown, encoding="utf-8")
158+
159+
description = page.meta.get("description")
160+
tail = f": {description}" if description else ""
161+
index.append(f"- [{page.title}]({site_url}{_md_uri(page.file)}){tail}")
162+
163+
body, h1_found = re.subn(r"\A\s*# .+\n", "", markdown)
164+
if not h1_found:
165+
raise PluginError(f"llms_txt: page {page.file.src_uri} does not start with an H1")
166+
full += [f"# {page.title}", "", f"Source: {page.canonical_url}", "", body.strip(), ""]
167+
index.append("")
168+
169+
index += ["## Optional", ""]
170+
for src_uri, title, description in _OPTIONAL_PAGES:
171+
linked = _state.files.get_file_from_path(src_uri)
172+
if linked is None:
173+
raise PluginError(f"llms_txt: optional page {src_uri} not found")
174+
index.append(f"- [{title}]({site_url}{linked.url}): {description}")
175+
index.append("")
176+
177+
(site_dir / "llms.txt").write_text("\n".join(index), encoding="utf-8")
178+
(site_dir / "llms-full.txt").write_text("\n".join(full), encoding="utf-8")

docs/index.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,9 @@ npx -y @modelcontextprotocol/inspector
7070
## API Reference
7171

7272
Full API documentation is available in the [API Reference](api.md).
73+
74+
## llms.txt
75+
76+
Reading with an LLM? This documentation is also published in the [llms.txt](https://llmstxt.org/) format:
77+
[llms.txt](https://py.sdk.modelcontextprotocol.io/llms.txt) is an index of the pages, and
78+
[llms-full.txt](https://py.sdk.modelcontextprotocol.io/llms-full.txt) contains every page in a single file.

mkdocs.yml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
site_name: MCP Server
2-
site_description: MCP Server
1+
site_name: MCP Python SDK
2+
site_description: The official Python SDK for the Model Context Protocol
33
strict: true
44

55
repo_name: modelcontextprotocol/python-sdk
@@ -85,7 +85,10 @@ markdown_extensions:
8585
- pymdownx.critic
8686
- pymdownx.mark
8787
- pymdownx.superfences
88-
- pymdownx.snippets
88+
# Resolve snippet includes against the repo root regardless of the build's
89+
# working directory (the extension's default base_path is the CWD).
90+
- pymdownx.snippets:
91+
base_path: !relative $config_dir
8992
- pymdownx.tilde
9093
- pymdownx.inlinehilite
9194
- pymdownx.highlight:
@@ -111,6 +114,9 @@ markdown_extensions:
111114
watch:
112115
- src/mcp
113116

117+
hooks:
118+
- docs/hooks/llms_txt.py
119+
114120
plugins:
115121
- search
116122
- social

0 commit comments

Comments
 (0)