Skip to content

Commit a6d44f9

Browse files
committed
Enhance CLI with additional options and formatting capabilities
1 parent acb0e14 commit a6d44f9

File tree

1 file changed

+172
-11
lines changed

1 file changed

+172
-11
lines changed

bin/sitemapper.js

Lines changed: 172 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,190 @@
11
#!/usr/bin/env node
22

33
import Sitemapper from '../lib/assets/sitemapper.js';
4+
import { parseArgs } from 'node:util';
5+
import fs from 'node:fs';
6+
7+
// Get version from the package.json file - hardcoded for simplicity
8+
const VERSION = '4.0.3';
49

510
async function main() {
6-
const sitemapUrl = process.argv[2];
11+
const { values, positionals } = parseArgs({
12+
args: process.argv.slice(2),
13+
options: {
14+
// Core options
15+
help: { type: 'boolean', short: 'h' },
16+
version: { type: 'boolean', short: 'v' },
17+
url: { type: 'string', short: 'u' },
18+
// Output formatting
19+
format: { type: 'string', short: 'f', default: 'plaintext' },
20+
output: { type: 'string', short: 'o' },
21+
// Sitemapper options
22+
timeout: { type: 'string', short: 't' },
23+
debug: { type: 'boolean', short: 'd' },
24+
concurrency: { type: 'string', short: 'c' },
25+
retries: { type: 'string', short: 'r' },
26+
lastmod: { type: 'string', short: 'l' },
27+
'reject-unauthorized': { type: 'boolean' },
28+
fields: { type: 'string' },
29+
'user-agent': { type: 'string' },
30+
exclusions: { type: 'string' },
31+
},
32+
allowPositionals: true,
33+
});
34+
35+
// Handle help command
36+
if (values.help) {
37+
displayHelp();
38+
return;
39+
}
40+
41+
// Handle version command
42+
if (values.version) {
43+
console.log(`sitemapper v${VERSION}`);
44+
return;
45+
}
46+
47+
// Get URL from positional argument or --url option
48+
const sitemapUrl = positionals[0] || values.url;
749

850
if (!sitemapUrl) {
9-
console.error('Please provide a sitemap URL');
10-
console.error('Usage: npx sitemapper <sitemap-url>');
51+
console.error('Error: Please provide a sitemap URL');
52+
console.error('Run with --help for usage information');
1153
process.exit(1);
1254
}
1355

1456
try {
15-
const sitemapper = new Sitemapper();
16-
const { url, sites } = await sitemapper.fetch(sitemapUrl);
17-
18-
console.log('\nSitemap URL:', url);
19-
console.log('\nFound URLs:');
20-
sites.forEach((site, index) => {
21-
console.log(`${index + 1}. ${site}`);
22-
});
57+
// Parse options for sitemapper
58+
const options = {
59+
url: sitemapUrl,
60+
debug: values.debug || false,
61+
rejectUnauthorized: values['reject-unauthorized'] !== false,
62+
};
63+
64+
// Add numeric options if provided
65+
if (values.timeout) options.timeout = parseInt(values.timeout, 10);
66+
if (values.concurrency) {
67+
options.concurrency = parseInt(values.concurrency, 10);
68+
}
69+
if (values.retries) options.retries = parseInt(values.retries, 10);
70+
if (values.lastmod) options.lastmod = parseInt(values.lastmod, 10);
71+
72+
// Add request headers if user-agent is provided
73+
if (values['user-agent']) {
74+
options.requestHeaders = {
75+
'User-Agent': values['user-agent'],
76+
};
77+
}
78+
79+
// Parse fields option
80+
if (values.fields) {
81+
options.fields = {};
82+
const fieldsList = values.fields.split(',');
83+
for (const field of fieldsList) {
84+
options.fields[field.trim()] = true;
85+
}
86+
}
87+
88+
// Parse exclusions option
89+
if (values.exclusions) {
90+
options.exclusions = values.exclusions
91+
.split(',')
92+
.map((pattern) => new RegExp(pattern.trim()));
93+
}
94+
95+
const sitemapper = new Sitemapper(options);
96+
const result = await sitemapper.fetch(sitemapUrl);
97+
98+
// Format the output based on format option
99+
let output;
100+
switch (values.format.toLowerCase()) {
101+
case 'json':
102+
output = JSON.stringify(result, null, 2);
103+
break;
104+
case 'csv':
105+
if (options.fields) {
106+
// Create header row
107+
const headers = Object.keys(options.fields).join(',');
108+
// Create data rows
109+
const rows = result.sites.map((site) => {
110+
if (typeof site === 'string') {
111+
return site;
112+
}
113+
return Object.keys(options.fields)
114+
.map((field) => site[field] || '')
115+
.join(',');
116+
});
117+
output = [headers, ...rows].join('\n');
118+
} else {
119+
// Simple CSV with just URLs
120+
output = result.sites.join('\n');
121+
}
122+
break;
123+
case 'plaintext':
124+
default:
125+
output = `Sitemap URL: ${result.url}\n\nFound URLs (${result.sites.length}):\n`;
126+
result.sites.forEach((site, index) => {
127+
if (typeof site === 'string') {
128+
output += `${index + 1}. ${site}\n`;
129+
} else {
130+
output += `${index + 1}. ${JSON.stringify(site)}\n`;
131+
}
132+
});
133+
if (result.errors.length > 0) {
134+
output += `\nErrors (${result.errors.length}):\n`;
135+
result.errors.forEach((error, index) => {
136+
output += `${index + 1}. ${error.message} (${error.url})\n`;
137+
});
138+
}
139+
break;
140+
}
141+
142+
// Output results
143+
if (values.output) {
144+
fs.writeFileSync(values.output, output);
145+
console.log(`Results written to ${values.output}`);
146+
} else {
147+
console.log(output);
148+
}
23149
} catch (error) {
24150
console.error('Error:', error.message);
25151
process.exit(1);
26152
}
27153
}
28154

155+
function displayHelp() {
156+
console.log(`
157+
sitemapper v${VERSION}
158+
159+
Usage:
160+
npx sitemapper <sitemap-url> [options]
161+
162+
Options:
163+
-h, --help Show this help message and exit
164+
-v, --version Show version information and exit
165+
-u, --url <url> Sitemap URL to crawl (alternative to positional arg)
166+
-o, --output <file> Write results to a file instead of stdout
167+
-f, --format <format> Output format (plaintext, csv, json) [default: plaintext]
168+
-t, --timeout <ms> Maximum timeout in ms for a single URL [default: 15000]
169+
-d, --debug Enable debug logging
170+
-c, --concurrency <number> Maximum number of concurrent sitemap threads [default: 10]
171+
-r, --retries <number> Maximum number of retries for failed requests [default: 0]
172+
-l, --lastmod <timestamp> Minimum lastmod timestamp value for URLs to include
173+
--reject-unauthorized Reject invalid SSL certificates [default: true]
174+
--user-agent <string> Set a custom User-Agent header
175+
--fields <fields> Comma-separated list of fields to include in output
176+
(loc,lastmod,changefreq,priority,sitemap,
177+
image:loc,image:title,image:caption,
178+
video:title,video:description,video:thumbnail_loc)
179+
--exclusions <patterns> Comma-separated list of regex patterns to exclude URLs
180+
181+
Examples:
182+
npx sitemapper https://example.com/sitemap.xml
183+
npx sitemapper https://example.com/sitemap.xml --format json
184+
npx sitemapper https://example.com/sitemap.xml --fields loc,lastmod,priority --format csv
185+
186+
For more information, visit: https://github.com/seantomburke/sitemapper
187+
`);
188+
}
189+
29190
main();

0 commit comments

Comments
 (0)