Skip to content

Commit d7272cd

Browse files
authored
Merge pull request #237 from pytorch/add-llm-txt-generation
Fixes to llms.txt generation Changes Enabled by default - generates llms.txt automatically unless disabled or user provides their own Spec compliant - includes H1 title, quote block description, H2 sections, and title format Title deduplication - optional llm_deduplicate_titles to disambiguate duplicate titles (e.g., "GRU" → "GRU (torch.nn.GRU)") Generic fallback - uses project name for description if llm_description not set Configuration html_theme_options = { # Optional: custom description (defaults to "{project} documentation.") "llm_description": "Your project description here.", # Optional: disambiguate duplicate titles "llm_deduplicate_titles": "true", # Optional: disable generation "llm_disabled": "true", }
2 parents 4a8895e + 2798949 commit d7272cd

File tree

3 files changed

+60
-11
lines changed

3 files changed

+60
-11
lines changed

docs/conf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,8 @@
270270
# "llm_important_pages": "index, notes/cuda",
271271
# "enable_navbar_dropdowns": False,
272272
"pytorch_project": "docs",
273+
# llms.txt configuration
274+
"llm_deduplicate_titles": "true", # Enable title deduplication
273275
# "show_lf_header": False,
274276
# "show_lf_footer": False,
275277
# RunLLM Widget Configuration (uncomment and set assistant_id to enable)

pytorch_sphinx_theme2/__init__.py

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -203,26 +203,27 @@ def _generate_llms_txt(app, exception):
203203
204204
The file is resolved in this order:
205205
206-
1. **Explicit option** — ``llm_custom_file`` theme option pointing to a file
206+
1. **Explicit disable** — ``llm_disabled = "true"`` skips generation entirely.
207+
2. **Custom file** — ``llm_custom_file`` theme option pointing to a file
207208
relative to the Sphinx source directory.
208-
2. **Convention** — A file named ``llms.txt`` in the Sphinx source root.
209-
3. **Auto-generation** — A simple page listing following the Hugging Face
210-
style, with URLs resolved as:
209+
3. **Convention** — A file named ``llms.txt`` in the Sphinx source root.
210+
4. **Auto-generation** — A simple page listing following the llms.txt spec,
211+
with URLs resolved as:
211212
a. ``llm_domain`` + ``llm_base_path`` theme options → fully constructed URLs
212213
b. Sphinx ``html_baseurl`` config → baseurl + relative path
213214
c. Relative URLs as a last resort
214215
215-
Opt-in: set ``llm_disabled = false`` in html_theme_options to enable.
216+
Enabled by default. Set ``llm_disabled = "true"`` to disable.
216217
"""
217218
if exception is not None:
218219
return # Don't generate if build failed
219220

220221
if app.builder.name != "html":
221222
return
222223

223-
# Disabled by default; opt-in with llm_disabled = false
224+
# Enabled by default; opt-out with llm_disabled = "true"
224225
theme_options = app.config.html_theme_options or {}
225-
if str(theme_options.get("llm_disabled", "true")).lower() == "true":
226+
if str(theme_options.get("llm_disabled", "false")).lower() == "true":
226227
return
227228

228229
dest_path = Path(app.outdir) / "llms.txt"
@@ -286,17 +287,60 @@ def make_url(relative_path):
286287

287288
# Build the URL
288289
url = make_url(docname + ".html")
289-
docs.append({"title": str(title), "url": url})
290+
docs.append({"title": str(title), "url": url, "docname": docname})
290291

291292
except Exception as e:
292293
print(f"Warning: Could not discover pages for llms.txt: {e}")
293294

295+
# Deduplicate titles if enabled
296+
# This adds a disambiguating suffix to duplicate titles based on their URL path
297+
deduplicate = (
298+
str(theme_options.get("llm_deduplicate_titles", "false")).lower() == "true"
299+
)
300+
if deduplicate:
301+
# Count title occurrences
302+
title_counts = {}
303+
for doc in docs:
304+
title_counts[doc["title"]] = title_counts.get(doc["title"], 0) + 1
305+
306+
# Find duplicates and add disambiguation
307+
for doc in docs:
308+
if title_counts[doc["title"]] > 1:
309+
# Extract module/path info from docname for disambiguation
310+
# e.g., "generated/torch.nn.GRU" -> "torch.nn.GRU"
311+
docname = doc["docname"]
312+
313+
# Try to get a meaningful suffix from the docname
314+
if "/" in docname:
315+
suffix = docname.split("/")[-1]
316+
else:
317+
suffix = docname
318+
319+
# Remove "generated/" prefix if present (Sphinx autodoc convention)
320+
if suffix.startswith("generated/"):
321+
suffix = suffix[10:]
322+
323+
# Only add suffix if it's different from the title
324+
if suffix.lower() != doc["title"].lower():
325+
doc["title"] = f"{doc['title']} ({suffix})"
326+
294327
# Build the llms.txt content in Hugging Face style
295328
lines = []
296329

297330
# Header
298331
lines.append(f"# {project}")
299332
lines.append("")
333+
334+
# Quote block with project description (for spec compliance)
335+
# If llm_description is set, use it. Otherwise, generate a generic one from project name.
336+
llm_description = theme_options.get("llm_description", "").strip()
337+
if not llm_description:
338+
# Generic fallback using Sphinx project name
339+
llm_description = f"{project} documentation."
340+
341+
lines.append(f"> {llm_description}")
342+
lines.append("")
343+
300344
lines.append("## Docs")
301345
lines.append("")
302346

pytorch_sphinx_theme2/theme.conf

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,15 @@ llm_domain =
5959
# Base path after domain (e.g., "docs/", "vision/", "audio/")
6060
# Combined with domain and version to form full URLs: https://{domain}/{base_path}{version}/
6161
llm_base_path =
62-
# Description of the site for LLMs (appears in llm:description meta tag)
62+
# Description of the site for LLMs (appears in llm:description meta tag and llms.txt quote block)
6363
llm_description =
64-
# Set to false to enable llms.txt generation
64+
# Set to true to disable llms.txt generation (enabled by default)
6565
# When enabled, URLs are resolved: llm_domain > html_baseurl > relative
66-
llm_disabled = true
66+
llm_disabled = false
6767
# Path to a custom llms.txt file (relative to Sphinx source directory).
6868
# When set, this file is copied to the output instead of auto-generating one.
6969
# If not set, a file named llms.txt in the source root is used automatically.
7070
llm_custom_file =
71+
# Set to true to add disambiguating suffixes to duplicate titles
72+
# e.g., "GRU" becomes "GRU (torch.nn.GRU)" and "GRU (torch.nn.GRUCell)"
73+
llm_deduplicate_titles = false

0 commit comments

Comments
 (0)