feat: add llms.txt

thedaviddias · thedaviddias · commit 4ae91898ac7d · 2025-02-16T13:09:02.000-05:00
diff --git a/app/[lang]/llms.txt/route.ts b/app/[lang]/llms.txt/route.ts
@@ -0,0 +1,112 @@
+import fg from 'fast-glob';
+import matter from 'gray-matter';
+import * as fs from 'node:fs/promises';
+import path from 'node:path';
+import { remark } from 'remark';
+import remarkGfm from 'remark-gfm';
+import remarkStringify from 'remark-stringify';
+
+export const revalidate = false;
+
+// Regular expressions for cleaning up the content
+const IMPORT_REGEX = /import\s+?(?:(?:{[^}]*}|\*|\w+)\s+from\s+)?['"](.*?)['"];?\n?/g;
+const COMPONENT_USAGE_REGEX = /<[A-Z][a-zA-Z]*(?:\s+[^>]*)?(?:\/?>|>[^<]*<\/[A-Z][a-zA-Z]*>)/g;
+const NEXTRA_COMPONENT_REGEX = /<(?:Callout|Steps|Tabs|Tab|FileTree)[^>]*>[^<]*<\/(?:Callout|Steps|Tabs|Tab|FileTree)>/g;
+const MDX_EXPRESSION_REGEX = /{(?:[^{}]|{[^{}]*})*}/g;
+const EXPORT_REGEX = /export\s+(?:default\s+)?(?:const|let|var|function|class|interface|type)?\s+[a-zA-Z_$][0-9a-zA-Z_$]*[\s\S]*?(?:;|\n|$)/g;
+
+export async function GET() {
+  try {
+    const files = await fg(['content/en/patterns/**/*.mdx']);
+
+    const scan = files.map(async (file) => {
+      try {
+        const fileContent = await fs.readFile(file);
+        const { content, data } = matter(fileContent.toString());
+
+        // Get the filename without extension to use as fallback title
+        const basename = path.basename(file, '.mdx');
+
+        // Extract category from file path
+        const pathParts = path.dirname(file).split(path.sep);
+        let category = 'general';
+        if (pathParts.length > 3 && pathParts[3]) {
+          category = pathParts[3];
+        }
+
+        // Skip if the file is marked as hidden or draft
+        if (data.draft || data.hidden) {
+          return null;
+        }
+
+        // Use filename as title if no title in frontmatter, and convert to Title Case
+        const title = data.title || basename.split('-')
+          .map(word => word.charAt(0).toUpperCase() + word.slice(1))
+          .join(' ');
+
+        const processed = await processContent(content);
+        return `File: ${file}
+# ${category.toUpperCase()}: ${title}
+
+${data.description || ''}
+
+${processed}`;
+      } catch (error) {
+        console.error(`Error processing file ${file}:`, error);
+        return null;
+      }
+    });
+
+    const scanned = (await Promise.all(scan)).filter(Boolean);
+
+    if (!scanned.length) {
+      return new Response('No content found', { status: 404 });
+    }
+
+    return new Response(scanned.join('\n\n'));
+  } catch (error) {
+    console.error('Error generating LLM content:', error);
+    return new Response('Internal Server Error', { status: 500 });
+  }
+}
+
+async function processContent(content: string): Promise<string> {
+  try {
+    // Multi-step cleanup to handle different MDX constructs
+    let cleanContent = content
+      // Remove imports first
+      .replace(IMPORT_REGEX, '')
+      // Remove exports
+      .replace(EXPORT_REGEX, '')
+      // Remove Nextra components with their content
+      .replace(NEXTRA_COMPONENT_REGEX, '')
+      // Remove other React components
+      .replace(COMPONENT_USAGE_REGEX, '')
+      // Remove MDX expressions
+      .replace(MDX_EXPRESSION_REGEX, '')
+      // Clean up multiple newlines
+      .replace(/\n{3,}/g, '\n\n')
+      // Remove empty JSX expressions
+      .replace(/{[\s]*}/g, '')
+      // Clean up any remaining JSX-like syntax
+      .replace(/<>[\s\S]*?<\/>/g, '')
+      .replace(/{\s*\/\*[\s\S]*?\*\/\s*}/g, '')
+      .trim();
+
+    // Simple markdown processing without MDX
+    const file = await remark()
+      .use(remarkGfm)
+      .use(remarkStringify)
+      .process(cleanContent);
+
+    return String(file);
+  } catch (error) {
+    console.error('Error processing content:', error);
+    // If processing fails, return a basic cleaned version
+    return content
+      .replace(IMPORT_REGEX, '')
+      .replace(COMPONENT_USAGE_REGEX, '')
+      .replace(MDX_EXPRESSION_REGEX, '')
+      .trim();
+  }
+}
diff --git a/middleware.ts b/middleware.ts
@@ -3,6 +3,6 @@ export { middleware } from 'nextra/locales'
 export const config = {
   // Matcher ignoring `/_next/` and `/api/`
   matcher: [
-    '/((?!api/mdx|api/email|api/patterns/random|api/og|_next/static|_next/image|favicon.ico|robots.txt|og/opengraph-image.png|covers|twitter-image|sitemap.xml|6ba7b811-9dad-11d1-80b4.txt|43mg4ybv6sxxanu24g7dngawd9up5w93.txt|apple-icon.png|manifest|_pagefind|examples).*)'
+    '/((?!api/mdx|api/email|api/patterns/random|api/og|_next/static|_next/image|llms.txt|favicon.ico|robots.txt|og/opengraph-image.png|covers|twitter-image|sitemap.xml|6ba7b811-9dad-11d1-80b4.txt|43mg4ybv6sxxanu24g7dngawd9up5w93.txt|apple-icon.png|manifest|_pagefind|examples).*)'
   ]
 }
diff --git a/package.json b/package.json
@@ -33,10 +33,13 @@
     "@mdn/browser-compat-data": "^5.6.37",
     "@radix-ui/react-slot": "^1.1.2",
     "@sentry/nextjs": "^8.54.0",
+    "@types/chalk": "^2.2.4",
     "@types/dagre": "^0.7.52",
+    "chalk": "^5.4.1",
     "class-variance-authority": "^0.7.1",
     "clsx": "^2.1.1",
     "dagre": "^0.8.5",
+    "fast-glob": "^3.3.3",
     "gray-matter": "^4.0.3",
     "html-to-image": "^1.11.11",
     "lucide-react": "^0.475.0",
@@ -45,13 +48,17 @@
     "next-plausible": "^3.12.4",
     "nextra": "4.2.5",
     "nextra-theme-docs": "4.2.5",
+    "openai": "^4.83.0",
     "react": "19.0.0",
     "react-dom": "19.0.0",
     "react-intersection-observer": "^9.15.1",
     "react-markdown": "^9.0.3",
     "react-resizable-panels": "^2.1.7",
     "reactflow": "^11.11.4",
+    "remark": "^15.0.1",
     "remark-gfm": "^4.0.0",
+    "remark-mdx": "^3.1.0",
+    "remark-stringify": "^11.0.0",
     "remove-markdown": "^0.6.0",
     "require-in-the-middle": "^7.5.1",
     "simple-icons": "^14.6.0",
@@ -69,11 +76,13 @@
     "@types/node": "22.13.1",
     "@types/react": "19.0.8",
     "cross-env": "^7.0.3",
+    "dotenv": "^16.4.7",
     "eslint": "^9.20.0",
     "eslint-config-next": "15.1.6",
     "pagefind": "^1.3.0",
     "plop": "^4.0.1",
     "tailwindcss": "4.0.5",
+    "ts-node": "^10.9.2",
     "tsx": "^4.19.2",
     "typescript": "^5.7.3"
   }
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml

Original file line number	Diff line number	Diff line change
`@@ -3,6 +3,6 @@ export { middleware } from 'nextra/locales'`
`3`	`3`	`export const config = {`
`4`	`4`	// Matcher ignoring `/_next/` and `/api/`
`5`	`5`	`matcher: [`
`6`		`- '/((?!api/mdx\|api/email\|api/patterns/random\|api/og\|_next/static\|_next/image\|favicon.ico\|robots.txt\|og/opengraph-image.png\|covers\|twitter-image\|sitemap.xml\|6ba7b811-9dad-11d1-80b4.txt\|43mg4ybv6sxxanu24g7dngawd9up5w93.txt\|apple-icon.png\|manifest\|_pagefind\|examples).*)'`
	`6`	`+ '/((?!api/mdx\|api/email\|api/patterns/random\|api/og\|_next/static\|_next/image\|llms.txt\|favicon.ico\|robots.txt\|og/opengraph-image.png\|covers\|twitter-image\|sitemap.xml\|6ba7b811-9dad-11d1-80b4.txt\|43mg4ybv6sxxanu24g7dngawd9up5w93.txt\|apple-icon.png\|manifest\|_pagefind\|examples).*)'`
`7`	`7`	`]`
`8`	`8`	`}`