Skip to content

Commit 02b7c5a

Browse files
authored
fix(docs): improve agentdocsspec compliance (#463)
- Move agent signaling blockquote from Banner component to post-build injection right after <body>, placing it at ~12% of the document instead of >50% (past nav/sidebar). Addresses the "buried deep" warning from the agentdocsspec checker. - Split the monolithic CLI reference (52K+ chars) into 63 per-command markdown endpoints under reference/cli/. Top-level commands are listed in llms.txt; subcommand files include linked navigation to siblings. Includes build-time validation for slug collisions, broken links, and format changes. - Copy sitemap-index.xml to sitemap.xml so the agentdocsspec freshness check can discover it at the conventional path. Move agentDocs() integration after starlight() to ensure sitemap exists when the hook runs.
1 parent cf5214d commit 02b7c5a

File tree

3 files changed

+178
-8
lines changed

3 files changed

+178
-8
lines changed

docs-site/astro.config.mjs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@ export default defineConfig({
1919
],
2020
},
2121
integrations: [
22-
// Generate .md endpoints and llms.txt for agent-friendly docs
23-
agentDocs(),
2422
starlight({
2523
title: 'ICP CLI',
2624
description: 'Command-line tool for developing and deploying applications on the Internet Computer Protocol (ICP)',
@@ -124,5 +122,8 @@ export default defineConfig({
124122
},
125123
],
126124
}),
125+
// Generate .md endpoints, llms.txt, and agent signaling for agent-friendly docs.
126+
// Listed after starlight() so the astro:build:done hook runs after sitemap generation.
127+
agentDocs(),
127128
],
128129
});

docs-site/plugins/astro-agent-docs.mjs

Lines changed: 175 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
*
55
* 1. Markdown endpoints — serves a clean .md file alongside every HTML page
66
* 2. llms.txt — discovery index listing all pages with links to .md endpoints
7+
* 3. Agent signaling — injects a hidden llms.txt directive right after <body>
8+
* in every HTML page so agents discover it early (before nav/sidebar)
79
*
810
* Runs in the astro:build:done hook so it operates on the final build output.
911
*/
@@ -62,8 +64,119 @@ function findSection(filePath) {
6264
return best;
6365
}
6466

67+
// Path to the CLI reference page — split into per-command endpoints for agents.
68+
const CLI_REFERENCE = "reference/cli.md";
69+
70+
/**
71+
* Split the CLI reference into per-command markdown files.
72+
* Each `## \`icp ...\`` heading becomes its own file under reference/cli/.
73+
* Returns metadata for each generated sub-page (for llms.txt).
74+
*/
75+
function splitCliReference(outDir) {
76+
const cliMd = path.join(outDir, CLI_REFERENCE);
77+
if (!fs.existsSync(cliMd)) return [];
78+
79+
const content = fs
80+
.readFileSync(cliMd, "utf-8")
81+
// Strip the clap-markdown generation footer that appears at the end.
82+
.replace(/\n*<hr\/>\s*\n*<small>[\s\S]*?<\/small>\s*$/, "\n");
83+
// Split on ## `icp ...` headings, keeping the heading with the section.
84+
const sections = content.split(/^(?=## `icp\b)/m).filter((s) => s.trim());
85+
86+
const subDir = path.join(outDir, "reference", "cli");
87+
fs.mkdirSync(subDir, { recursive: true });
88+
89+
const subPages = [];
90+
const seenSlugs = new Map(); // slug → command name, for collision detection
91+
for (const section of sections) {
92+
const match = section.match(/^## `(icp[\w\s-]*?)`/);
93+
if (!match) continue;
94+
95+
const command = match[1].trim();
96+
// icp build → build, icp canister call → canister-call
97+
const slug = command === "icp" ? "index" : command.replace(/^icp /, "").replace(/ /g, "-");
98+
const fileName = `${slug}.md`;
99+
100+
// Detect slug collisions (e.g., "icp foo-bar" vs "icp foo bar").
101+
if (seenSlugs.has(slug)) {
102+
throw new Error(
103+
`CLI reference split: slug collision for "${fileName}" ` +
104+
`between commands "${seenSlugs.get(slug)}" and "${command}"`
105+
);
106+
}
107+
seenSlugs.set(slug, command);
108+
109+
// Extract the description: first plain-text line after the heading,
110+
// skipping **Usage:**, ###### headings, list items, and empty lines.
111+
const lines = section.split("\n");
112+
const descLine = lines.find(
113+
(l, i) =>
114+
i > 0 &&
115+
l.trim() &&
116+
!l.startsWith("**Usage") &&
117+
!l.startsWith("#") &&
118+
!l.startsWith("*")
119+
);
120+
const description = descLine ? descLine.trim() : "";
121+
122+
// Rewrite subcommand list items to link to their per-command endpoints.
123+
// e.g., `* \`call\` — ...` → `* [\`call\`](canister-call.md) — ...`
124+
// The parent prefix (e.g., "canister") is used to build the slug.
125+
const parentSlug = command.replace(/^icp ?/, "").replace(/ /g, "-");
126+
const body = section.replace(/^## [^\n]+\n+/, "").replace(
127+
/^\* `(\w[\w-]*)` /gm,
128+
(_, sub) => {
129+
const subSlug = parentSlug ? `${parentSlug}-${sub}` : sub;
130+
return `* [\`${sub}\`](${subSlug}.md) —`;
131+
}
132+
);
133+
134+
fs.writeFileSync(
135+
path.join(subDir, fileName),
136+
BOM + `# ${command}\n\n` + body + "\n"
137+
);
138+
139+
subPages.push({
140+
file: `reference/cli/${fileName}`,
141+
title: `\`${command}\``,
142+
description,
143+
// Top-level commands have exactly one space (e.g., "icp build").
144+
// The bare "icp" root and deep subcommands are excluded from llms.txt.
145+
isTopLevel: (command.match(/ /g) || []).length === 1,
146+
});
147+
}
148+
149+
// Validate: the CLI reference should contain commands. If the format changed
150+
// and nothing was extracted, fail loudly rather than silently producing no output.
151+
if (subPages.length === 0) {
152+
throw new Error(
153+
"CLI reference split: no commands found. " +
154+
"Expected ## `icp ...` headings in " + CLI_REFERENCE
155+
);
156+
}
157+
158+
// Validate: all subcommand links in generated files point to existing files.
159+
for (const { file } of subPages) {
160+
const filePath = path.join(outDir, file);
161+
const md = fs.readFileSync(filePath, "utf-8");
162+
const linkPattern = /\]\((\S+\.md)\)/g;
163+
let linkMatch;
164+
while ((linkMatch = linkPattern.exec(md)) !== null) {
165+
const target = path.join(path.dirname(filePath), linkMatch[1]);
166+
if (!fs.existsSync(target)) {
167+
throw new Error(
168+
`CLI reference split: broken link in ${file}: ` +
169+
`${linkMatch[1]} does not exist`
170+
);
171+
}
172+
}
173+
}
174+
175+
return subPages;
176+
}
177+
65178
/** Generate llms.txt content from collected page metadata. */
66-
function generateLlmsTxt(pages, siteUrl, basePath) {
179+
function generateLlmsTxt(pages, siteUrl, basePath, cliSubPages) {
67180
const base = (siteUrl + basePath).replace(/\/$/, "");
68181

69182
const skillsBase =
@@ -149,6 +262,20 @@ function generateLlmsTxt(pages, siteUrl, basePath) {
149262
? `- [${page.title}](${url}): ${page.description}`
150263
: `- [${page.title}](${url})`;
151264
lines.push(entry);
265+
266+
// Nest top-level command endpoints under the CLI Reference entry.
267+
// Subcommands (e.g., "icp canister call") are omitted from the index
268+
// but still available as .md endpoints for agents to fetch on demand.
269+
if (page.file === CLI_REFERENCE && cliSubPages.length > 0) {
270+
for (const sub of cliSubPages) {
271+
if (!sub.isTopLevel) continue;
272+
const subUrl = `${base}/${sub.file}`;
273+
const subEntry = sub.description
274+
? ` - [${sub.title}](${subUrl}): ${sub.description}`
275+
: ` - [${sub.title}](${subUrl})`;
276+
lines.push(subEntry);
277+
}
278+
}
152279
}
153280
lines.push("");
154281
}
@@ -200,12 +327,58 @@ export default function agentDocs() {
200327

201328
logger.info(`Generated ${pages.length} markdown endpoints`);
202329

330+
// 1b. Split CLI reference into per-command endpoints for agents
331+
const cliSubPages = splitCliReference(outDir);
332+
if (cliSubPages.length > 0) {
333+
logger.info(
334+
`Split CLI reference into ${cliSubPages.length} per-command endpoints`
335+
);
336+
}
337+
203338
// 2. Generate llms.txt
204-
const llmsTxt = generateLlmsTxt(pages, siteUrl, basePath);
339+
const llmsTxt = generateLlmsTxt(pages, siteUrl, basePath, cliSubPages);
205340
fs.writeFileSync(path.join(outDir, "llms.txt"), llmsTxt);
206341
logger.info(
207342
`Generated llms.txt (${llmsTxt.length} chars, ${pages.length} pages)`
208343
);
344+
345+
// 3. Inject agent signaling directive into HTML pages
346+
// Places a visually-hidden blockquote right after <body> so it appears
347+
// early in the document (within the first ~15%), before nav/sidebar.
348+
// Uses CSS clip-rect (not display:none) so it survives HTML-to-markdown
349+
// conversion. See: https://agentdocsspec.com
350+
const llmsTxtUrl = `${basePath}llms.txt`;
351+
const directive =
352+
`<blockquote class="agent-signaling" data-pagefind-ignore>` +
353+
`<p>For AI agents: Documentation index at ` +
354+
`<a href="${llmsTxtUrl}">${llmsTxtUrl}</a></p></blockquote>`;
355+
const htmlFiles = fs.globSync("**/*.html", { cwd: outDir });
356+
let injected = 0;
357+
for (const file of htmlFiles) {
358+
const filePath = path.join(outDir, file);
359+
const html = fs.readFileSync(filePath, "utf-8");
360+
const bodyIdx = html.indexOf("<body");
361+
if (bodyIdx === -1) continue;
362+
const closeIdx = html.indexOf(">", bodyIdx);
363+
if (closeIdx === -1) continue;
364+
const insertAt = closeIdx + 1;
365+
fs.writeFileSync(
366+
filePath,
367+
html.slice(0, insertAt) + directive + html.slice(insertAt)
368+
);
369+
injected++;
370+
}
371+
logger.info(`Injected agent signaling into ${injected} HTML pages`);
372+
373+
// 4. Alias sitemap-index.xml → sitemap.xml
374+
// Astro's sitemap integration outputs sitemap-index.xml, but crawlers
375+
// and the agentdocsspec checker expect /sitemap.xml by convention.
376+
const sitemapIndex = path.join(outDir, "sitemap-index.xml");
377+
const sitemapAlias = path.join(outDir, "sitemap.xml");
378+
if (fs.existsSync(sitemapIndex) && !fs.existsSync(sitemapAlias)) {
379+
fs.copyFileSync(sitemapIndex, sitemapAlias);
380+
logger.info("Copied sitemap-index.xml → sitemap.xml");
381+
}
209382
},
210383
},
211384
};

docs-site/src/components/Banner.astro

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,8 @@
33
// Overrides Starlight's default Banner component so we don't need
44
// banner frontmatter in each file.
55
const content = 'Feedback welcome! Report issues on <a href="https://github.com/dfinity/icp-cli/issues" target="_blank" rel="noopener noreferrer">GitHub</a>, ask questions on the <a href="https://forum.dfinity.org/t/icp-cli-announcements-and-feedback-discussion/60410/1" target="_blank" rel="noopener noreferrer">Forum</a>, or chat with us on <a href="https://discord.internetcomputer.org" target="_blank" rel="noopener noreferrer">Discord</a>.';
6-
const llmsTxtPath = `${import.meta.env.BASE_URL}llms.txt`;
76
---
87

9-
<blockquote class="agent-signaling" data-pagefind-ignore>
10-
<p>For AI agents: Documentation index at <a href={llmsTxtPath}>{llmsTxtPath}</a></p>
11-
</blockquote>
128
<div class="sl-banner" data-pagefind-ignore set:html={content} />
139

1410
<style>

0 commit comments

Comments
 (0)