Skip to content

Commit 126845a

Browse files
authored
docs: Join llms.txt from crawlee-python (#3161)
Similarly to https://github.com/apify/apify-docs/blob/master/scripts/joinLlmsFiles.mjs Script is joining Python llms.txt and llms-full.txt to file hosted at crawlee.dev/llms.txt and crawlee.dev/llms-full.txt
1 parent 3040cb6 commit 126845a

File tree

2 files changed

+51
-1
lines changed

2 files changed

+51
-1
lines changed

website/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
"prettify": "prettier --write --config ./tools/docs-prettier.config.js ../docs/guides/*.md",
1313
"swizzle": "docusaurus swizzle",
1414
"deploy": "rimraf .docusaurus && node --max_old_space_size=16000 node_modules/@docusaurus/core/bin/docusaurus.mjs deploy",
15-
"docusaurus": "docusaurus"
15+
"docusaurus": "docusaurus",
16+
"postbuild": "node ./tools/joinLlmsFiles.mjs"
1617
},
1718
"devDependencies": {
1819
"@apify/eslint-config-ts": "^0.4.0",

website/tools/joinLlmsFiles.mjs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import fs from 'node:fs/promises';
2+
import path from 'node:path';
3+
4+
const BUILD_DIR = path.resolve('build');
5+
6+
const LLMS_TXT_URL = 'https://crawlee.dev/python/llms.txt';
7+
const LLMS_FULL_TXT_URL = 'https://crawlee.dev/python/llms-full.txt';
8+
9+
async function fetchFile(route) {
10+
try {
11+
const res = await fetch(route);
12+
if (!res.ok) throw new Error(`Failed to fetch ${route}: ${res.status}`);
13+
return await res.text();
14+
} catch (err) {
15+
console.error(`Error fetching ${route}:`, err.message);
16+
return '';
17+
}
18+
}
19+
20+
async function joinFiles() {
21+
await fs.mkdir(BUILD_DIR, { recursive: true });
22+
// Fetch and write llms.txt
23+
const llmsTxtContent = await fetchFile(LLMS_TXT_URL);
24+
if (llmsTxtContent) {
25+
await fs.writeFile(path.join(BUILD_DIR, 'llms.txt'), llmsTxtContent, 'utf8');
26+
console.log('Wrote llms.txt to build/');
27+
}
28+
// Fetch and write llms-full.txt
29+
const llmsFullTxtContent = await fetchFile(LLMS_FULL_TXT_URL);
30+
if (llmsFullTxtContent) {
31+
await fs.writeFile(path.join(BUILD_DIR, 'llms-full.txt'), llmsFullTxtContent, 'utf8');
32+
console.log('Wrote llms-full.txt to build/');
33+
}
34+
}
35+
36+
async function sanitizeFile(filePath) {
37+
const content = await fs.readFile(filePath, 'utf8');
38+
const sanitizedContent = content.replace(/<[^>]*>/g, ''); // Remove HTML tags
39+
await fs.writeFile(filePath, sanitizedContent, 'utf8');
40+
console.log(`Sanitized ${filePath}`);
41+
}
42+
43+
joinFiles().catch((err) => {
44+
console.error('Failed to join LLMs files:', err);
45+
process.exit(1);
46+
});
47+
48+
await sanitizeFile(path.join(BUILD_DIR, 'llms.txt'));
49+
await sanitizeFile(path.join(BUILD_DIR, 'llms-full.txt'));

0 commit comments

Comments
 (0)