Skip to content

Commit 08d0c8d

Browse files
committed
llm-full script
1 parent 23acbba commit 08d0c8d

File tree

1 file changed

+342
-0
lines changed

1 file changed

+342
-0
lines changed

docs/scripts/extract-docs.js

Lines changed: 342 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,342 @@
1+
const fs = require('fs')
2+
const path = require('path')
3+
const matter = require('gray-matter')
4+
5+
// Configuration
6+
const DOCS_DIR = path.join(__dirname, '..')
7+
const OUTPUT_FILE = path.join(DOCS_DIR, 'public', 'llms-full.txt')
8+
const EXCLUDED_DIRS = ['node_modules', '.vitepress', 'dist', 'cache', 'scripts']
9+
const EXCLUDED_FILES = ['.DS_Store', 'package.json', 'package-lock.json']
10+
11+
// Helper to clean markdown content
12+
function cleanMarkdown(content) {
13+
return (
14+
content
15+
// Format code blocks to be more LLM-friendly
16+
.replace(/```(\w+)?\n([\s\S]*?)```/g, (match, lang, code) => {
17+
// Remove the language identifier if present and clean up the code
18+
return `CODE BLOCK${lang ? ` (${lang})` : ''}:\n${code.trim()}\n\n`
19+
})
20+
// Remove inline code but preserve the content
21+
.replace(/`([^`]*)`/g, '$1')
22+
// Remove HTML comments
23+
.replace(/<!--[\s\S]*?-->/g, '')
24+
// Remove frontmatter
25+
.replace(/---[\s\S]*?---/g, '')
26+
// Remove tabs syntax
27+
.replace(/:::tabs[\s\S]*?:::/g, '')
28+
// Remove images
29+
.replace(/!\[.*?\]\(.*?\)/g, '')
30+
// Remove links but keep text
31+
.replace(/\[([^\]]*)\]\(.*?\)/g, '$1')
32+
// Remove HTML tags
33+
.replace(/<[^>]*>/g, '')
34+
// Remove multiple newlines
35+
.replace(/\n{3,}/g, '\n\n')
36+
// Remove leading/trailing whitespace
37+
.trim()
38+
)
39+
}
40+
41+
// Helper to walk directory recursively
42+
function* walkDir(dir) {
43+
const files = fs.readdirSync(dir)
44+
45+
for (const file of files) {
46+
const filePath = path.join(dir, file)
47+
const stat = fs.statSync(filePath)
48+
49+
if (EXCLUDED_DIRS.includes(file) || EXCLUDED_FILES.includes(file)) {
50+
continue
51+
}
52+
53+
if (stat.isDirectory()) {
54+
yield* walkDir(filePath)
55+
} else if (stat.isFile() && file.endsWith('.md')) {
56+
yield filePath
57+
}
58+
}
59+
}
60+
61+
// Main extraction function
62+
function extractDocs() {
63+
let output = ''
64+
65+
// Add metadata
66+
output += '# Inertia Django Documentation\n'
67+
output += `Extracted on: ${new Date().toISOString()}\n\n`
68+
output +=
69+
'Note: Code blocks are preserved and formatted for LLM consumption.\n\n'
70+
71+
// Process each markdown file
72+
for (const filePath of walkDir(DOCS_DIR)) {
73+
const relativePath = path.relative(DOCS_DIR, filePath)
74+
75+
// Skip awesome.md as it's just links and not useful for LLMs
76+
if (relativePath === 'awesome.md') {
77+
continue
78+
}
79+
80+
const content = fs.readFileSync(filePath, 'utf8')
81+
const { data, content: markdownContent } = matter(content)
82+
83+
const cleanedContent = cleanMarkdown(markdownContent)
84+
85+
if (cleanedContent.trim()) {
86+
output += `## ${relativePath}\n\n`
87+
88+
// Add frontmatter title if available
89+
if (data.title) {
90+
output += `Title: ${data.title}\n\n`
91+
}
92+
93+
output += cleanedContent + '\n\n'
94+
output += '---\n\n'
95+
}
96+
}
97+
98+
// Add auto-generated sections
99+
output += generateCrossReferenceIndex()
100+
output += generateAPIReference()
101+
102+
// Write output file
103+
fs.writeFileSync(OUTPUT_FILE, output)
104+
console.log(`Documentation extracted to: ${OUTPUT_FILE}`)
105+
}
106+
107+
// Generate cross-reference index based on content analysis
108+
function generateCrossReferenceIndex() {
109+
const sections = []
110+
const keywords = new Map()
111+
112+
// Extract sections and their keywords
113+
for (const filePath of walkDir(DOCS_DIR)) {
114+
const relativePath = path.relative(DOCS_DIR, filePath)
115+
116+
// Skip awesome.md as it's just links and not useful for LLMs
117+
if (relativePath === 'awesome.md') {
118+
continue
119+
}
120+
121+
const content = fs.readFileSync(filePath, 'utf8')
122+
const { content: markdownContent } = matter(content)
123+
124+
if (markdownContent.trim()) {
125+
const cleanContent = cleanMarkdown(markdownContent)
126+
const sectionTitle = relativePath.replace('.md', '')
127+
128+
// Extract key terms (method names, component names, etc.)
129+
const codeBlocks =
130+
cleanContent.match(
131+
/CODE BLOCK \([^)]+\):\n([\s\S]*?)(?=\n\n|\n#|$)/g,
132+
) || []
133+
const methodNames = []
134+
const componentNames = []
135+
136+
codeBlocks.forEach((block) => {
137+
// Extract Django method names and patterns
138+
const djangoMethods =
139+
block.match(/def \w+|render|inertia|InertiaResponse|redirect|HttpResponse/g) ||
140+
[]
141+
methodNames.push(...djangoMethods)
142+
143+
// Extract component names
144+
const components =
145+
block.match(/[A-Z][a-zA-Z]*(?:Component|Page|Layout)/g) || []
146+
componentNames.push(...components)
147+
})
148+
149+
sections.push({
150+
title: sectionTitle,
151+
path: relativePath,
152+
methods: [...new Set(methodNames)],
153+
components: [...new Set(componentNames)],
154+
content: cleanContent,
155+
})
156+
}
157+
}
158+
159+
// Build cross-reference map
160+
let crossRefIndex = '\n# CROSS-REFERENCE INDEX\n\n'
161+
162+
// Group by functionality
163+
const functionalGroups = {
164+
'Authentication & Authorization': [
165+
'authentication',
166+
'authorization',
167+
'csrf',
168+
'user',
169+
'authenticate',
170+
],
171+
'Forms & Validation': [
172+
'forms',
173+
'validation',
174+
'useForm',
175+
'errors',
176+
'redirect',
177+
],
178+
'Routing & Navigation': [
179+
'routing',
180+
'links',
181+
'router.visit',
182+
'manual-visits',
183+
'redirects',
184+
'urls',
185+
],
186+
'Data Management': [
187+
'responses',
188+
'props',
189+
'shared-data',
190+
'partial-reloads',
191+
'deferred-props',
192+
],
193+
'File Handling': ['file-uploads', 'FormData', 'multipart'],
194+
Performance: ['asset-versioning', 'code-splitting', 'prefetching', 'ssr'],
195+
'Testing & Debugging': ['testing', 'error-handling', 'progress-indicators'],
196+
'Django Integration': ['middleware', 'settings', 'templates', 'views'],
197+
}
198+
199+
Object.entries(functionalGroups).forEach(([category, keywords]) => {
200+
crossRefIndex += `## ${category}\n\n`
201+
202+
const relatedSections = sections.filter((section) =>
203+
keywords.some(
204+
(keyword) =>
205+
section.title.toLowerCase().includes(keyword) ||
206+
section.content.toLowerCase().includes(keyword),
207+
),
208+
)
209+
210+
relatedSections.forEach((section) => {
211+
crossRefIndex += `- **${section.title}**: Key methods/components: ${[...section.methods, ...section.components].slice(0, 3).join(', ')}\n`
212+
})
213+
214+
crossRefIndex += '\n'
215+
})
216+
217+
return crossRefIndex
218+
}
219+
220+
// Generate API reference from code blocks
221+
function generateAPIReference() {
222+
let apiRef = '\n# API REFERENCE\n\n'
223+
224+
const codeExamples = {
225+
'Django View Methods': [],
226+
'Inertia Client Methods': [],
227+
'Configuration Options': [],
228+
'Form Helpers': [],
229+
'Component Props': [],
230+
}
231+
232+
for (const filePath of walkDir(DOCS_DIR)) {
233+
const relativePath = path.relative(DOCS_DIR, filePath)
234+
235+
// Skip awesome.md as it's just links and not useful for LLMs
236+
if (relativePath === 'awesome.md') {
237+
continue
238+
}
239+
240+
const content = fs.readFileSync(filePath, 'utf8')
241+
const { content: markdownContent } = matter(content)
242+
const cleanContent = cleanMarkdown(markdownContent)
243+
const sectionName = relativePath.replace('.md', '')
244+
245+
// Extract Python code blocks
246+
const pythonBlocks =
247+
cleanContent.match(/CODE BLOCK \(python\):\n([\s\S]*?)(?=\n\n|\n#|$)/g) ||
248+
[]
249+
pythonBlocks.forEach((block) => {
250+
const code = block.replace('CODE BLOCK (python):\n', '')
251+
252+
// Extract Django view methods
253+
const viewMethods = code.match(/def \w+[\s\S]*?(?=def|\n\n|$)/g) || []
254+
viewMethods.forEach((method) => {
255+
const methodName = method.match(/def (\w+)/)?.[1]
256+
if (methodName) {
257+
codeExamples['Django View Methods'].push({
258+
method: methodName,
259+
source: sectionName,
260+
code: method.trim(),
261+
})
262+
}
263+
})
264+
265+
// Extract configuration
266+
const configBlocks =
267+
code.match(/INERTIA_[\w_]*\s*=[\s\S]*?(?=\n\w|\n\n|$)/g) || []
268+
configBlocks.forEach((config) => {
269+
codeExamples['Configuration Options'].push({
270+
source: sectionName,
271+
code: config.trim(),
272+
})
273+
})
274+
})
275+
276+
// Extract JavaScript/TypeScript code blocks
277+
const jsBlocks =
278+
cleanContent.match(
279+
/CODE BLOCK \((js|javascript|typescript|jsx|tsx)\):\n([\s\S]*?)(?=\n\n|\n#|$)/g,
280+
) || []
281+
jsBlocks.forEach((block) => {
282+
const code = block.replace(/CODE BLOCK \([^)]+\):\n/, '')
283+
284+
// Extract router methods
285+
const routerMethods = code.match(/router\.\w+\([^}]*\}/g) || []
286+
routerMethods.forEach((method) => {
287+
const methodName = method.match(/router\.(\w+)/)?.[1]
288+
if (methodName) {
289+
codeExamples['Inertia Client Methods'].push({
290+
method: methodName,
291+
source: sectionName,
292+
code: method.trim(),
293+
})
294+
}
295+
})
296+
297+
// Extract useForm usage
298+
const formUsage = code.match(/useForm\([^}]*\}/g) || []
299+
formUsage.forEach((usage) => {
300+
codeExamples['Form Helpers'].push({
301+
source: sectionName,
302+
code: usage.trim(),
303+
})
304+
})
305+
})
306+
}
307+
308+
// Generate organized API reference
309+
Object.entries(codeExamples).forEach(([category, examples]) => {
310+
if (examples.length > 0) {
311+
apiRef += `## ${category}\n\n`
312+
313+
// Group by method name for view and client methods
314+
if (category.includes('Methods')) {
315+
const methodGroups = {}
316+
examples.forEach((example) => {
317+
if (!methodGroups[example.method]) {
318+
methodGroups[example.method] = []
319+
}
320+
methodGroups[example.method].push(example)
321+
})
322+
323+
Object.entries(methodGroups).forEach(([methodName, methodExamples]) => {
324+
apiRef += `### ${methodName}\n\n`
325+
apiRef += `Used in: ${methodExamples.map((e) => e.source).join(', ')}\n\n`
326+
apiRef += `\`\`\`${methodExamples[0].language || ''}\n${methodExamples[0].code}\n\`\`\`\n\n`
327+
})
328+
} else {
329+
// For other categories, just list examples
330+
examples.forEach((example, index) => {
331+
apiRef += `### Example ${index + 1} (from ${example.source})\n\n`
332+
apiRef += `\`\`\`${example.language || ''}\n${example.code}\n\`\`\`\n\n`
333+
})
334+
}
335+
}
336+
})
337+
338+
return apiRef
339+
}
340+
341+
// Run extraction
342+
extractDocs()

0 commit comments

Comments
 (0)