Skip to content

Commit 8c79bad

Browse files
Add script for generating llms.txt (facebook#4590)
1 parent 1d081a1 commit 8c79bad

File tree

3 files changed

+381
-3
lines changed

3 files changed

+381
-3
lines changed

.gitignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,8 @@ website/build/
3535
!.yarn/plugins
3636
!.yarn/releases
3737
!.yarn/sdks
38-
!.yarn/versions
38+
!.yarn/versions
39+
40+
41+
# Generated file(s) for llms
42+
llms.txt

scripts/generate-llms-txt.js

Lines changed: 373 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,373 @@
1+
const fs = require('fs');
2+
const https = require('https');
3+
const url = require('url');
4+
const path = require('path');
5+
const ts = require('typescript');
6+
7+
const OUTPUT_FILENAME = 'llms.txt';
8+
const TITLE = 'React Native Documentation';
9+
const DESCRIPTION =
10+
'React Native is a framework for building native apps using React. It lets you create mobile apps using only JavaScript and React.';
11+
const URL_PREFIX = 'https://reactnative.dev';
12+
13+
// Function to convert the TypeScript sidebar config to JSON
14+
function convertSidebarConfigToJson(filePath) {
15+
const inputFileContent = fs.readFileSync(filePath, 'utf8');
16+
const tempFilePath = path.join(__dirname, 'temp-sidebar.js');
17+
18+
try {
19+
const {outputText} = ts.transpileModule(inputFileContent, {
20+
compilerOptions: {
21+
module: ts.ModuleKind.CommonJS,
22+
target: ts.ScriptTarget.ES2015,
23+
},
24+
});
25+
26+
fs.writeFileSync(tempFilePath, outputText);
27+
28+
// Clear require cache for the temp file
29+
delete require.cache[require.resolve(tempFilePath)];
30+
31+
const sidebarModule = require(tempFilePath);
32+
33+
return sidebarModule.default;
34+
} catch (error) {
35+
console.error('Error converting sidebar config:', error);
36+
return null;
37+
} finally {
38+
if (fs.existsSync(tempFilePath)) {
39+
fs.unlinkSync(tempFilePath);
40+
}
41+
}
42+
}
43+
44+
const SLUG_TO_URL = {
45+
'architecture-overview': 'overview',
46+
'architecture-glossary': 'glossary',
47+
};
48+
49+
// Function to extract URLs from sidebar config
50+
function extractUrlsFromSidebar(sidebarConfig, prefix) {
51+
const urls = [];
52+
53+
// Process each section (docs, api, components)
54+
Object.entries(sidebarConfig).forEach(([_, categories]) => {
55+
Object.entries(categories).forEach(([_, items]) => {
56+
processItemsForUrls(items, urls, prefix);
57+
});
58+
});
59+
60+
// Replace slugs with their mapped URLs
61+
urls.forEach((url, index) => {
62+
for (const [slug, mappedUrl] of Object.entries(SLUG_TO_URL)) {
63+
if (url.includes(slug)) {
64+
urls[index] = url.replace(slug, mappedUrl);
65+
break;
66+
}
67+
}
68+
});
69+
70+
return urls;
71+
}
72+
73+
// Recursive function to process items and extract URLs
74+
function processItemsForUrls(items, urls, prefix) {
75+
if (typeof items === 'object' && Array.isArray(items.items)) {
76+
processItemsForUrls(items.items, urls, prefix);
77+
return;
78+
}
79+
80+
if (Array.isArray(items)) {
81+
items.forEach(item => {
82+
if (typeof item === 'string') {
83+
urls.push(`${URL_PREFIX}${prefix}/${item}`);
84+
} else if (typeof item === 'object') {
85+
if (item.type === 'doc' && item.id) {
86+
urls.push(`${URL_PREFIX}${prefix}/${item.id}`);
87+
} else if (item.type === 'category' && Array.isArray(item.items)) {
88+
processItemsForUrls(item.items, urls, prefix);
89+
}
90+
}
91+
});
92+
}
93+
}
94+
95+
// Function to check URL status
96+
function checkUrl(urlString) {
97+
return new Promise(resolve => {
98+
const parsedUrl = url.parse(urlString);
99+
100+
const options = {
101+
hostname: parsedUrl.hostname,
102+
path: parsedUrl.path,
103+
method: 'HEAD',
104+
timeout: 5000,
105+
};
106+
107+
const req = https.request(options, res => {
108+
resolve({
109+
url: urlString,
110+
status: res.statusCode,
111+
is404: res.statusCode === 404,
112+
});
113+
});
114+
115+
req.on('error', error => {
116+
resolve({
117+
url: urlString,
118+
status: 'Error',
119+
is404: false,
120+
error: error.message,
121+
});
122+
});
123+
124+
req.on('timeout', () => {
125+
req.destroy();
126+
resolve({
127+
url: urlString,
128+
status: 'Timeout',
129+
is404: false,
130+
});
131+
});
132+
133+
req.end();
134+
});
135+
}
136+
137+
// Process each URL
138+
async function processUrls(urls) {
139+
const unavailableUrls = [];
140+
141+
for (const urlToCheck of urls) {
142+
const result = await checkUrl(urlToCheck);
143+
if (
144+
result.is404 ||
145+
result.status === 'Error' ||
146+
result.status === 'Timeout'
147+
) {
148+
unavailableUrls.push({
149+
url: urlToCheck,
150+
status: result.status,
151+
error: result.error || null,
152+
});
153+
}
154+
}
155+
156+
const result = {
157+
totalUrls: urls.length,
158+
unavailableUrls: unavailableUrls,
159+
};
160+
161+
if (unavailableUrls.length > 0) {
162+
console.log(JSON.stringify(result, null, 2));
163+
} else {
164+
console.log(JSON.stringify(result, null, 2));
165+
}
166+
167+
return result;
168+
}
169+
170+
// Function to extract title from markdown frontmatter
171+
function extractMetadataFromMarkdown(filePath) {
172+
try {
173+
const content = fs.readFileSync(filePath, 'utf8');
174+
const frontmatterMatch = content.match(/---\n([\s\S]*?)\n---/);
175+
if (frontmatterMatch) {
176+
const frontmatter = frontmatterMatch[1];
177+
const titleMatch = frontmatter.match(/title:\s*(.*)/);
178+
const slugMatch = frontmatter.match(/slug:\s*(.*)/);
179+
180+
return {
181+
title: titleMatch
182+
? titleMatch[1].trim()
183+
: filePath.split('/').pop().replace('.md', ''),
184+
slug: slugMatch ? slugMatch[1].trim().replace(/^\//, '') : null,
185+
};
186+
}
187+
// If no frontmatter found, use the filename
188+
return {
189+
title: filePath.split('/').pop().replace('.md', ''),
190+
slug: null,
191+
};
192+
} catch (error) {
193+
console.error(`Error reading file ${filePath}:`, error);
194+
return {
195+
title: filePath.split('/').pop().replace('.md', ''),
196+
slug: null,
197+
};
198+
}
199+
}
200+
201+
// Function to map special cases for file names that don't match the sidebar
202+
function mapDocPath(item, prefix) {
203+
const specialCases = {
204+
'environment-setup': 'getting-started.md',
205+
'native-platform': 'native-platforms.md',
206+
'turbo-native-modules-introduction': 'turbo-native-modules.md',
207+
'fabric-native-components-introduction': 'fabric-native-components.md',
208+
};
209+
210+
if (prefix === '/contributing') {
211+
specialCases['overview'] = 'contributing-overview.md';
212+
}
213+
214+
if (typeof item === 'string') {
215+
return specialCases[item] || `${item}.md`;
216+
} else if (item.type === 'doc' && item.id) {
217+
return specialCases[item.id] || `${item.id}.md`;
218+
}
219+
return `${item}.md`;
220+
}
221+
222+
// Function to generate output for each sidebar
223+
function generateMarkdown(sidebarConfig, docPath, prefix) {
224+
let markdown = '';
225+
226+
// Process each section (docs, api, components)
227+
Object.entries(sidebarConfig).forEach(([section, categories]) => {
228+
markdown += `## ${section.charAt(0).toUpperCase() + section.slice(1)}\n\n`;
229+
230+
// Process each category within the section
231+
Object.entries(categories).forEach(([categoryName, items]) => {
232+
markdown += `### ${categoryName === '0' ? 'General' : categoryName}\n\n`;
233+
234+
if (typeof items === 'object' && Array.isArray(items.items)) {
235+
items = items.items;
236+
}
237+
const reorderedArray = items.every(item => typeof item === 'string')
238+
? items
239+
: [...items].sort((a, b) =>
240+
typeof a === 'string' && typeof b !== 'string'
241+
? -1
242+
: typeof a !== 'string' && typeof b === 'string'
243+
? 1
244+
: 0
245+
);
246+
247+
// Process each item in the category
248+
reorderedArray.forEach(item => {
249+
if (typeof item === 'string') {
250+
// This is a direct page reference
251+
const fullDocPath = `${docPath}${mapDocPath(item, prefix)}`;
252+
const {title, slug} = extractMetadataFromMarkdown(fullDocPath);
253+
markdown += `- [${title}](${URL_PREFIX}${prefix}/${slug ?? item})\n`;
254+
} else if (typeof item === 'object') {
255+
if (item.type === 'doc' && item.id) {
256+
// This is a doc reference with an explicit ID
257+
const fullDocPath = `${docPath}${mapDocPath(item, prefix)}`;
258+
const {title, slug} = extractMetadataFromMarkdown(fullDocPath);
259+
markdown += `- [${title}](${URL_PREFIX}${prefix}/${slug ?? item.id})\n`;
260+
} else if (item.type === 'category' && Array.isArray(item.items)) {
261+
// This is a category with nested items
262+
markdown += `#### ${item.label}\n\n`;
263+
item.items.forEach(nestedItem => {
264+
if (typeof nestedItem === 'string') {
265+
const fullDocPath = `${docPath}${mapDocPath(nestedItem, prefix)}`;
266+
const {title, slug} = extractMetadataFromMarkdown(fullDocPath);
267+
markdown += `- [${title}](${URL_PREFIX}${prefix}/${slug ?? nestedItem})\n`;
268+
} else if (nestedItem.type === 'doc' && nestedItem.id) {
269+
const fullDocPath = `${docPath}${mapDocPath(nestedItem, prefix)}`;
270+
const {title, slug} = extractMetadataFromMarkdown(fullDocPath);
271+
markdown += `- [${title}](${URL_PREFIX}${prefix}/${slug ?? nestedItem.id})\n`;
272+
}
273+
});
274+
}
275+
}
276+
});
277+
});
278+
});
279+
280+
// Format and cleanup whitespaces
281+
return markdown.replace(/(#+ .*)\n/g, '\n$1\n').replace(/\n(\n)+/g, '\n\n');
282+
}
283+
284+
const inputFilePaths = [
285+
{
286+
name: 'sidebars.ts',
287+
docPath: '../docs/',
288+
prefix: '/docs',
289+
},
290+
{
291+
name: 'sidebarsArchitecture.ts',
292+
docPath: './architecture/',
293+
prefix: '/architecture',
294+
},
295+
{
296+
name: 'sidebarsCommunity.ts',
297+
docPath: './community/',
298+
prefix: '/community',
299+
},
300+
{
301+
name: 'sidebarsContributing.ts',
302+
docPath: './contributing/',
303+
prefix: '/contributing',
304+
},
305+
];
306+
307+
let output = `# ${TITLE}\n\n`;
308+
output += `> ${DESCRIPTION}\n\n`;
309+
output += `This documentation covers all aspects of using React Native, from installation to advanced usage.\n\n`;
310+
311+
const generateOutput = () => {
312+
const results = [];
313+
const promises = [];
314+
315+
for (const {name, docPath, prefix} of inputFilePaths) {
316+
const inputFilePath = `./${name}`;
317+
318+
const sidebarConfig = convertSidebarConfigToJson(inputFilePath);
319+
if (sidebarConfig) {
320+
const urls = extractUrlsFromSidebar(sidebarConfig, prefix);
321+
322+
// First check URLs for 404 errors
323+
const promise = processUrls(urls)
324+
.then(result => {
325+
if (result.unavailableUrls.length === 0) {
326+
// Only generate documentation if all URLs are valid
327+
const markdown = generateMarkdown(sidebarConfig, docPath, prefix);
328+
results.push({markdown, prefix});
329+
console.log(`Successfully generated output from ${inputFilePath}`);
330+
} else {
331+
console.error(
332+
'Documentation generation skipped due to broken links'
333+
);
334+
process.exit(1);
335+
}
336+
})
337+
.catch(err => {
338+
console.error('Error processing URLs:', err);
339+
process.exit(1);
340+
});
341+
342+
promises.push(promise);
343+
} else {
344+
console.error('Failed to convert sidebar config to JSON');
345+
process.exit(1);
346+
}
347+
}
348+
349+
// Wait for all promises to complete before writing the file
350+
Promise.all(promises)
351+
.then(() => {
352+
// Sort results to ensure docs section is first
353+
results.sort((a, b) => {
354+
if (a.prefix === '/docs') return -1;
355+
if (b.prefix === '/docs') return 1;
356+
return 0;
357+
});
358+
359+
// Combine all markdown content in the correct order
360+
output += results.map(r => r.markdown).join('\n');
361+
362+
fs.writeFileSync(path.join('build/', OUTPUT_FILENAME), output);
363+
console.log(
364+
`Successfully generated documentation to: ${OUTPUT_FILENAME}`
365+
);
366+
})
367+
.catch(err => {
368+
console.error('Error during processing:', err);
369+
process.exit(1);
370+
});
371+
};
372+
373+
generateOutput();

0 commit comments

Comments
 (0)