|
| 1 | +/** |
| 2 | + * Generates robots.txt based on deployment environment. |
| 3 | + * |
| 4 | + * - Production (IS_PRODUCTION=true): Allow crawling, include sitemap |
| 5 | + * - Staging/Preview: Disallow all crawling |
| 6 | + * |
| 7 | + * The noindex meta tag (configured in docusaurus.config.ts) provides the |
| 8 | + * primary protection against indexing. The robots.txt Disallow directive |
| 9 | + * provides an additional signal to well-behaved crawlers. |
| 10 | + * |
| 11 | + * Note: Google recommends allowing crawling so crawlers can see noindex tags. |
| 12 | + * However, the Disallow approach is used here as defense-in-depth since: |
| 13 | + * 1. Well-behaved crawlers (Google, Bing) respect robots.txt and won't crawl |
| 14 | + * 2. For any pages that do get crawled, the noindex meta tag prevents indexing |
| 15 | + * 3. This dual approach is commonly used for staging environments |
| 16 | + * |
| 17 | + * Usage: |
| 18 | + * IS_PRODUCTION=true bun scripts/generate-robots-txt.ts |
| 19 | + * bun scripts/generate-robots-txt.ts # defaults to staging/disallow |
| 20 | + */ |
| 21 | + |
| 22 | +import fs from "node:fs"; |
| 23 | +import path from "node:path"; |
| 24 | +import { fileURLToPath } from "node:url"; |
| 25 | + |
| 26 | +// Get directory of current script (compatible with Node.js 18+) |
| 27 | +const __filename = fileURLToPath(import.meta.url); |
| 28 | +const __dirname = path.dirname(__filename); |
| 29 | + |
| 30 | +const STATIC_DIR = path.join(__dirname, "..", "static"); |
| 31 | +const ROBOTS_PATH = path.join(STATIC_DIR, "robots.txt"); |
| 32 | + |
| 33 | +// Production URL for sitemap reference |
| 34 | +const PRODUCTION_URL = "https://docs.comapeo.app"; |
| 35 | + |
| 36 | +const isProduction = process.env.IS_PRODUCTION === "true"; |
| 37 | + |
| 38 | +// Generate appropriate robots.txt content |
| 39 | +const generateRobotsTxt = (): string => { |
| 40 | + if (isProduction) { |
| 41 | + // Production: Allow crawling with sitemap reference |
| 42 | + return `# robots.txt for ${PRODUCTION_URL} |
| 43 | +# Generated automatically during build |
| 44 | +
|
| 45 | +User-agent: * |
| 46 | +Allow: / |
| 47 | +
|
| 48 | +# Sitemap location |
| 49 | +Sitemap: ${PRODUCTION_URL}/sitemap.xml |
| 50 | +`; |
| 51 | + } else { |
| 52 | + // Staging/Preview: Disallow all crawling |
| 53 | + // Combined with noindex meta tags for defense-in-depth |
| 54 | + return `# robots.txt for staging/preview environment |
| 55 | +# Generated automatically during build |
| 56 | +# This file prevents search engines from indexing staging content |
| 57 | +
|
| 58 | +User-agent: * |
| 59 | +Disallow: / |
| 60 | +
|
| 61 | +# Note: This is a staging/preview environment. |
| 62 | +# Production site is at ${PRODUCTION_URL} |
| 63 | +`; |
| 64 | + } |
| 65 | +}; |
| 66 | + |
| 67 | +// Main execution with error handling |
| 68 | +try { |
| 69 | + // Ensure static directory exists |
| 70 | + if (!fs.existsSync(STATIC_DIR)) { |
| 71 | + fs.mkdirSync(STATIC_DIR, { recursive: true }); |
| 72 | + } |
| 73 | + |
| 74 | + // Write robots.txt |
| 75 | + const content = generateRobotsTxt(); |
| 76 | + fs.writeFileSync(ROBOTS_PATH, content, "utf-8"); |
| 77 | + |
| 78 | + const envLabel = isProduction ? "production (allow)" : "staging (disallow)"; |
| 79 | + console.log(`✅ Generated robots.txt for ${envLabel}`); |
| 80 | + console.log(` Path: ${ROBOTS_PATH}`); |
| 81 | +} catch (error) { |
| 82 | + console.error("❌ Failed to generate robots.txt:", error); |
| 83 | + process.exit(1); |
| 84 | +} |
0 commit comments