From 157db81c465e129b62e73bc431f2b09af84e2bd4 Mon Sep 17 00:00:00 2001 From: mvm Date: Fri, 20 Mar 2026 16:41:55 -0500 Subject: [PATCH] Filter directory pages from llms.txt --- src/pages/[...product]/llms.txt.ts | 31 +++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/pages/[...product]/llms.txt.ts b/src/pages/[...product]/llms.txt.ts index 872fa37055f0248..f9e66e64446c240 100644 --- a/src/pages/[...product]/llms.txt.ts +++ b/src/pages/[...product]/llms.txt.ts @@ -2,6 +2,33 @@ import type { APIRoute, GetStaticPaths, InferGetStaticPropsType } from "astro"; import { getCollection } from "astro:content"; import dedent from "dedent"; +/** + * Maximum number of prose characters allowed alongside a DirectoryListing + * component before a page is considered to have real standalone content. + * Pages at or below this threshold are treated as pure navigation containers + * and excluded from llms.txt — their child pages are already listed individually. + */ +const DIRECTORY_PROSE_THRESHOLD = 250; + +/** + * Returns true if the page body consists of a DirectoryListing component with + * DIRECTORY_PROSE_THRESHOLD characters or fewer of surrounding prose. These + * pages are pure section index/navigation containers with no standalone content + * worth including in llms.txt — the child pages are already listed individually. + */ +function isDirectoryOnlyPage(body: string): boolean { + if (!body.includes("DirectoryListing")) return false; + // Strip import lines + let prose = body.replace(/^import\s+.*?from\s+['"].*?['"];?\s*\n?/gm, ""); + // Strip self-closing component tags e.g. + prose = prose.replace(/<[A-Z][^>]*\/>/g, ""); + // Strip paired component tags and their children e.g. ... + prose = prose.replace(/<[A-Z][^>]*>[\s\S]*?<\/[A-Z][^>]*>/g, ""); + // Strip JSX comments + prose = prose.replace(/\{\/\*[\s\S]*?\*\/\}/g, ""); + return prose.trim().length <= DIRECTORY_PROSE_THRESHOLD; +} + export const getStaticPaths = (async () => { const directory = await getCollection("directory"); @@ -22,7 +49,9 @@ export const getStaticPaths = (async () => { const prefix = urlPath; const pages = docs.filter( - (e) => e.id.startsWith(prefix + "/") || e.id === prefix, + (e) => + (e.id.startsWith(prefix + "/") || e.id === prefix) && + !isDirectoryOnlyPage(e.body ?? ""), ); if (pages.length === 0) return null;