|
1 | 1 | #!/usr/bin/env node |
2 | 2 |
|
3 | 3 | import Sitemapper from '../lib/assets/sitemapper.js'; |
| 4 | +import { parseArgs } from 'node:util'; |
| 5 | +import fs from 'node:fs'; |
| 6 | + |
| 7 | +// Get version from the package.json file - hardcoded for simplicity |
| 8 | +const VERSION = '4.0.3'; |
4 | 9 |
|
5 | 10 | async function main() { |
6 | | - const sitemapUrl = process.argv[2]; |
| 11 | + const { values, positionals } = parseArgs({ |
| 12 | + args: process.argv.slice(2), |
| 13 | + options: { |
| 14 | + // Core options |
| 15 | + help: { type: 'boolean', short: 'h' }, |
| 16 | + version: { type: 'boolean', short: 'v' }, |
| 17 | + url: { type: 'string', short: 'u' }, |
| 18 | + // Output formatting |
| 19 | + format: { type: 'string', short: 'f', default: 'plaintext' }, |
| 20 | + output: { type: 'string', short: 'o' }, |
| 21 | + // Sitemapper options |
| 22 | + timeout: { type: 'string', short: 't' }, |
| 23 | + debug: { type: 'boolean', short: 'd' }, |
| 24 | + concurrency: { type: 'string', short: 'c' }, |
| 25 | + retries: { type: 'string', short: 'r' }, |
| 26 | + lastmod: { type: 'string', short: 'l' }, |
| 27 | + 'reject-unauthorized': { type: 'boolean' }, |
| 28 | + fields: { type: 'string' }, |
| 29 | + 'user-agent': { type: 'string' }, |
| 30 | + exclusions: { type: 'string' }, |
| 31 | + }, |
| 32 | + allowPositionals: true, |
| 33 | + }); |
| 34 | + |
| 35 | + // Handle help command |
| 36 | + if (values.help) { |
| 37 | + displayHelp(); |
| 38 | + return; |
| 39 | + } |
| 40 | + |
| 41 | + // Handle version command |
| 42 | + if (values.version) { |
| 43 | + console.log(`sitemapper v${VERSION}`); |
| 44 | + return; |
| 45 | + } |
| 46 | + |
| 47 | + // Get URL from positional argument or --url option |
| 48 | + const sitemapUrl = positionals[0] || values.url; |
7 | 49 |
|
8 | 50 | if (!sitemapUrl) { |
9 | | - console.error('Please provide a sitemap URL'); |
10 | | - console.error('Usage: npx sitemapper <sitemap-url>'); |
| 51 | + console.error('Error: Please provide a sitemap URL'); |
| 52 | + console.error('Run with --help for usage information'); |
11 | 53 | process.exit(1); |
12 | 54 | } |
13 | 55 |
|
14 | 56 | try { |
15 | | - const sitemapper = new Sitemapper(); |
16 | | - const { url, sites } = await sitemapper.fetch(sitemapUrl); |
17 | | - |
18 | | - console.log('\nSitemap URL:', url); |
19 | | - console.log('\nFound URLs:'); |
20 | | - sites.forEach((site, index) => { |
21 | | - console.log(`${index + 1}. ${site}`); |
22 | | - }); |
| 57 | + // Parse options for sitemapper |
| 58 | + const options = { |
| 59 | + url: sitemapUrl, |
| 60 | + debug: values.debug || false, |
| 61 | + rejectUnauthorized: values['reject-unauthorized'] !== false, |
| 62 | + }; |
| 63 | + |
| 64 | + // Add numeric options if provided |
| 65 | + if (values.timeout) options.timeout = parseInt(values.timeout, 10); |
| 66 | + if (values.concurrency) { |
| 67 | + options.concurrency = parseInt(values.concurrency, 10); |
| 68 | + } |
| 69 | + if (values.retries) options.retries = parseInt(values.retries, 10); |
| 70 | + if (values.lastmod) options.lastmod = parseInt(values.lastmod, 10); |
| 71 | + |
| 72 | + // Add request headers if user-agent is provided |
| 73 | + if (values['user-agent']) { |
| 74 | + options.requestHeaders = { |
| 75 | + 'User-Agent': values['user-agent'], |
| 76 | + }; |
| 77 | + } |
| 78 | + |
| 79 | + // Parse fields option |
| 80 | + if (values.fields) { |
| 81 | + options.fields = {}; |
| 82 | + const fieldsList = values.fields.split(','); |
| 83 | + for (const field of fieldsList) { |
| 84 | + options.fields[field.trim()] = true; |
| 85 | + } |
| 86 | + } |
| 87 | + |
| 88 | + // Parse exclusions option |
| 89 | + if (values.exclusions) { |
| 90 | + options.exclusions = values.exclusions |
| 91 | + .split(',') |
| 92 | + .map((pattern) => new RegExp(pattern.trim())); |
| 93 | + } |
| 94 | + |
| 95 | + const sitemapper = new Sitemapper(options); |
| 96 | + const result = await sitemapper.fetch(sitemapUrl); |
| 97 | + |
| 98 | + // Format the output based on format option |
| 99 | + let output; |
| 100 | + switch (values.format.toLowerCase()) { |
| 101 | + case 'json': |
| 102 | + output = JSON.stringify(result, null, 2); |
| 103 | + break; |
| 104 | + case 'csv': |
| 105 | + if (options.fields) { |
| 106 | + // Create header row |
| 107 | + const headers = Object.keys(options.fields).join(','); |
| 108 | + // Create data rows |
| 109 | + const rows = result.sites.map((site) => { |
| 110 | + if (typeof site === 'string') { |
| 111 | + return site; |
| 112 | + } |
| 113 | + return Object.keys(options.fields) |
| 114 | + .map((field) => site[field] || '') |
| 115 | + .join(','); |
| 116 | + }); |
| 117 | + output = [headers, ...rows].join('\n'); |
| 118 | + } else { |
| 119 | + // Simple CSV with just URLs |
| 120 | + output = result.sites.join('\n'); |
| 121 | + } |
| 122 | + break; |
| 123 | + case 'plaintext': |
| 124 | + default: |
| 125 | + output = `Sitemap URL: ${result.url}\n\nFound URLs (${result.sites.length}):\n`; |
| 126 | + result.sites.forEach((site, index) => { |
| 127 | + if (typeof site === 'string') { |
| 128 | + output += `${index + 1}. ${site}\n`; |
| 129 | + } else { |
| 130 | + output += `${index + 1}. ${JSON.stringify(site)}\n`; |
| 131 | + } |
| 132 | + }); |
| 133 | + if (result.errors.length > 0) { |
| 134 | + output += `\nErrors (${result.errors.length}):\n`; |
| 135 | + result.errors.forEach((error, index) => { |
| 136 | + output += `${index + 1}. ${error.message} (${error.url})\n`; |
| 137 | + }); |
| 138 | + } |
| 139 | + break; |
| 140 | + } |
| 141 | + |
| 142 | + // Output results |
| 143 | + if (values.output) { |
| 144 | + fs.writeFileSync(values.output, output); |
| 145 | + console.log(`Results written to ${values.output}`); |
| 146 | + } else { |
| 147 | + console.log(output); |
| 148 | + } |
23 | 149 | } catch (error) { |
24 | 150 | console.error('Error:', error.message); |
25 | 151 | process.exit(1); |
26 | 152 | } |
27 | 153 | } |
28 | 154 |
|
| 155 | +function displayHelp() { |
| 156 | + console.log(` |
| 157 | +sitemapper v${VERSION} |
| 158 | +
|
| 159 | +Usage: |
| 160 | + npx sitemapper <sitemap-url> [options] |
| 161 | +
|
| 162 | +Options: |
| 163 | + -h, --help Show this help message and exit |
| 164 | + -v, --version Show version information and exit |
| 165 | + -u, --url <url> Sitemap URL to crawl (alternative to positional arg) |
| 166 | + -o, --output <file> Write results to a file instead of stdout |
| 167 | + -f, --format <format> Output format (plaintext, csv, json) [default: plaintext] |
| 168 | + -t, --timeout <ms> Maximum timeout in ms for a single URL [default: 15000] |
| 169 | + -d, --debug Enable debug logging |
| 170 | + -c, --concurrency <number> Maximum number of concurrent sitemap threads [default: 10] |
| 171 | + -r, --retries <number> Maximum number of retries for failed requests [default: 0] |
| 172 | + -l, --lastmod <timestamp> Minimum lastmod timestamp value for URLs to include |
| 173 | + --reject-unauthorized Reject invalid SSL certificates [default: true] |
| 174 | + --user-agent <string> Set a custom User-Agent header |
| 175 | + --fields <fields> Comma-separated list of fields to include in output |
| 176 | + (loc,lastmod,changefreq,priority,sitemap, |
| 177 | + image:loc,image:title,image:caption, |
| 178 | + video:title,video:description,video:thumbnail_loc) |
| 179 | + --exclusions <patterns> Comma-separated list of regex patterns to exclude URLs |
| 180 | +
|
| 181 | +Examples: |
| 182 | + npx sitemapper https://example.com/sitemap.xml |
| 183 | + npx sitemapper https://example.com/sitemap.xml --format json |
| 184 | + npx sitemapper https://example.com/sitemap.xml --fields loc,lastmod,priority --format csv |
| 185 | +
|
| 186 | +For more information, visit: https://github.com/seantomburke/sitemapper |
| 187 | +`); |
| 188 | +} |
| 189 | + |
29 | 190 | main(); |
0 commit comments