Skip to content

Commit 1ffaf35

Browse files
authored
[scripts] LLMS: skip non-existing yet URLs instead of failing (facebook#4630)
1 parent cd1fb2e commit 1ffaf35

File tree

1 file changed

+38
-27
lines changed

1 file changed

+38
-27
lines changed

scripts/generate-llms-txt.js

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -153,18 +153,10 @@ async function processUrls(urls) {
153153
}
154154
}
155155

156-
const result = {
156+
return {
157157
totalUrls: urls.length,
158158
unavailableUrls: unavailableUrls,
159159
};
160-
161-
if (unavailableUrls.length > 0) {
162-
console.log(JSON.stringify(result, null, 2));
163-
} else {
164-
console.log(JSON.stringify(result, null, 2));
165-
}
166-
167-
return result;
168160
}
169161

170162
// Function to extract title from markdown frontmatter
@@ -220,7 +212,7 @@ function mapDocPath(item, prefix) {
220212
}
221213

222214
// Function to generate output for each sidebar
223-
function generateMarkdown(sidebarConfig, docPath, prefix) {
215+
function generateMarkdown(sidebarConfig, docPath, prefix, unavailableUrls) {
224216
let markdown = '';
225217

226218
// Process each section (docs, api, components)
@@ -249,26 +241,36 @@ function generateMarkdown(sidebarConfig, docPath, prefix) {
249241
if (typeof item === 'string') {
250242
// This is a direct page reference
251243
const fullDocPath = `${docPath}${mapDocPath(item, prefix)}`;
252-
const {title, slug} = extractMetadataFromMarkdown(fullDocPath);
253-
markdown += `- [${title}](${URL_PREFIX}${prefix}/${slug ?? item})\n`;
244+
if (!isEntryUnavailable(unavailableUrls, fullDocPath)) {
245+
const {title, slug} = extractMetadataFromMarkdown(fullDocPath);
246+
markdown += `- [${title}](${URL_PREFIX}${prefix}/${slug ?? item})\n`;
247+
}
254248
} else if (typeof item === 'object') {
255249
if (item.type === 'doc' && item.id) {
256250
// This is a doc reference with an explicit ID
257251
const fullDocPath = `${docPath}${mapDocPath(item, prefix)}`;
258-
const {title, slug} = extractMetadataFromMarkdown(fullDocPath);
259-
markdown += `- [${title}](${URL_PREFIX}${prefix}/${slug ?? item.id})\n`;
252+
if (!isEntryUnavailable(unavailableUrls, fullDocPath)) {
253+
const {title, slug} = extractMetadataFromMarkdown(fullDocPath);
254+
markdown += `- [${title}](${URL_PREFIX}${prefix}/${slug ?? item.id})\n`;
255+
}
260256
} else if (item.type === 'category' && Array.isArray(item.items)) {
261257
// This is a category with nested items
262258
markdown += `#### ${item.label}\n\n`;
263259
item.items.forEach(nestedItem => {
264260
if (typeof nestedItem === 'string') {
265261
const fullDocPath = `${docPath}${mapDocPath(nestedItem, prefix)}`;
266-
const {title, slug} = extractMetadataFromMarkdown(fullDocPath);
267-
markdown += `- [${title}](${URL_PREFIX}${prefix}/${slug ?? nestedItem})\n`;
262+
if (!isEntryUnavailable(unavailableUrls, fullDocPath)) {
263+
const {title, slug} =
264+
extractMetadataFromMarkdown(fullDocPath);
265+
markdown += `- [${title}](${URL_PREFIX}${prefix}/${slug ?? nestedItem})\n`;
266+
}
268267
} else if (nestedItem.type === 'doc' && nestedItem.id) {
269268
const fullDocPath = `${docPath}${mapDocPath(nestedItem, prefix)}`;
270-
const {title, slug} = extractMetadataFromMarkdown(fullDocPath);
271-
markdown += `- [${title}](${URL_PREFIX}${prefix}/${slug ?? nestedItem.id})\n`;
269+
if (!isEntryUnavailable(unavailableUrls, fullDocPath)) {
270+
const {title, slug} =
271+
extractMetadataFromMarkdown(fullDocPath);
272+
markdown += `- [${title}](${URL_PREFIX}${prefix}/${slug ?? nestedItem.id})\n`;
273+
}
272274
}
273275
});
274276
}
@@ -319,20 +321,23 @@ const generateOutput = () => {
319321
if (sidebarConfig) {
320322
const urls = extractUrlsFromSidebar(sidebarConfig, prefix);
321323

322-
// First check URLs for 404 errors
323324
const promise = processUrls(urls)
324325
.then(result => {
325-
if (result.unavailableUrls.length === 0) {
326-
// Only generate documentation if all URLs are valid
327-
const markdown = generateMarkdown(sidebarConfig, docPath, prefix);
328-
results.push({markdown, prefix});
329-
console.log(`Successfully generated output from ${inputFilePath}`);
330-
} else {
326+
if (result.unavailableUrls.length > 0) {
331327
console.error(
332-
'Documentation generation skipped due to broken links'
328+
'Skipping new pages not existing in production deployment yet:',
329+
result.unavailableUrls.map(entry => entry.url)
333330
);
334-
process.exit(1);
335331
}
332+
const markdown = generateMarkdown(
333+
sidebarConfig,
334+
docPath,
335+
prefix,
336+
result.unavailableUrls
337+
);
338+
results.push({markdown, prefix});
339+
340+
console.log(`Successfully generated output from ${inputFilePath}`);
336341
})
337342
.catch(err => {
338343
console.error('Error processing URLs:', err);
@@ -370,4 +375,10 @@ const generateOutput = () => {
370375
});
371376
};
372377

378+
function isEntryUnavailable(unavailableUrls, docPath) {
379+
return !unavailableUrls.find(entry =>
380+
entry.url.endsWith(docPath.substring(1))
381+
);
382+
}
383+
373384
generateOutput();

0 commit comments

Comments
 (0)