|
| 1 | +import fs from 'node:fs/promises'; |
| 2 | +import path from 'node:path'; |
| 3 | + |
| 4 | +const BUILD_DIR = path.resolve('build'); |
| 5 | + |
| 6 | +const LLMS_TXT_URL = 'https://crawlee.dev/python/llms.txt'; |
| 7 | +const LLMS_FULL_TXT_URL = 'https://crawlee.dev/python/llms-full.txt'; |
| 8 | + |
| 9 | +async function fetchFile(route) { |
| 10 | + try { |
| 11 | + const res = await fetch(route); |
| 12 | + if (!res.ok) throw new Error(`Failed to fetch ${route}: ${res.status}`); |
| 13 | + return await res.text(); |
| 14 | + } catch (err) { |
| 15 | + console.error(`Error fetching ${route}:`, err.message); |
| 16 | + return ''; |
| 17 | + } |
| 18 | +} |
| 19 | + |
| 20 | +async function joinFiles() { |
| 21 | + await fs.mkdir(BUILD_DIR, { recursive: true }); |
| 22 | + // Fetch and write llms.txt |
| 23 | + const llmsTxtContent = await fetchFile(LLMS_TXT_URL); |
| 24 | + if (llmsTxtContent) { |
| 25 | + await fs.writeFile(path.join(BUILD_DIR, 'llms.txt'), llmsTxtContent, 'utf8'); |
| 26 | + console.log('Wrote llms.txt to build/'); |
| 27 | + } |
| 28 | + // Fetch and write llms-full.txt |
| 29 | + const llmsFullTxtContent = await fetchFile(LLMS_FULL_TXT_URL); |
| 30 | + if (llmsFullTxtContent) { |
| 31 | + await fs.writeFile(path.join(BUILD_DIR, 'llms-full.txt'), llmsFullTxtContent, 'utf8'); |
| 32 | + console.log('Wrote llms-full.txt to build/'); |
| 33 | + } |
| 34 | +} |
| 35 | + |
| 36 | +async function sanitizeFile(filePath) { |
| 37 | + const content = await fs.readFile(filePath, 'utf8'); |
| 38 | + const sanitizedContent = content.replace(/<[^>]*>/g, ''); // Remove HTML tags |
| 39 | + await fs.writeFile(filePath, sanitizedContent, 'utf8'); |
| 40 | + console.log(`Sanitized ${filePath}`); |
| 41 | +} |
| 42 | + |
| 43 | +joinFiles().catch((err) => { |
| 44 | + console.error('Failed to join LLMs files:', err); |
| 45 | + process.exit(1); |
| 46 | +}); |
| 47 | + |
| 48 | +await sanitizeFile(path.join(BUILD_DIR, 'llms.txt')); |
| 49 | +await sanitizeFile(path.join(BUILD_DIR, 'llms-full.txt')); |
0 commit comments