Skip to content

Commit 5a8988d

Browse files
committed
✨ Fix sitemap index handling and add interactive progress
Sitemap: - Fix sitemap index files returning child sitemap URLs as pages - Now follows sitemap index to parse actual child sitemaps - Extracts real page URLs instead of sitemap-0.xml etc. Progress display: - Interactive terminals: single updating progress line with ETA - Non-interactive (CI): traditional per-task logging - Shows completion summary with total time and error count
1 parent 94fbefd commit 5a8988d

File tree

4 files changed

+96
-15
lines changed

4 files changed

+96
-15
lines changed

clients/static-site/package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

clients/static-site/src/tasks.js

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,26 @@ function formatDuration(ms) {
128128
return `${seconds}s`;
129129
}
130130

131+
/**
132+
* Check if stdout is an interactive TTY
133+
* @returns {boolean}
134+
*/
135+
function isInteractiveTTY() {
136+
return process.stdout.isTTY && !process.env.CI;
137+
}
138+
139+
/**
140+
* Clear current line and write new content (for TTY)
141+
* @param {string} text - Text to write
142+
*/
143+
function writeProgress(text) {
144+
if (isInteractiveTTY()) {
145+
process.stdout.clearLine(0);
146+
process.stdout.cursorTo(0);
147+
process.stdout.write(text);
148+
}
149+
}
150+
131151
/**
132152
* Process all tasks through the tab pool
133153
* @param {Array<Object>} tasks - Array of task objects
@@ -143,6 +163,7 @@ export async function processAllTasks(tasks, pool, config, logger, deps = {}) {
143163
let total = tasks.length;
144164
let startTime = Date.now();
145165
let taskTimes = [];
166+
let interactive = isInteractiveTTY();
146167

147168
// Merge deps for processTask
148169
let taskDeps = { ...defaultDeps, ...deps };
@@ -176,20 +197,35 @@ export async function processAllTasks(tasks, pool, config, logger, deps = {}) {
176197
let remaining = total - completed;
177198
// Divide by concurrency since tasks run in parallel
178199
let etaMs = (remaining * avgTime) / config.concurrency;
179-
let eta = remaining > 0 ? ` ~${formatDuration(etaMs)} remaining` : '';
180-
181-
logger.info(
182-
` ✓ [${completed}/${total}] ${task.page.path}@${task.viewport.name}${eta}`
183-
);
200+
let eta = remaining > 0 ? `~${formatDuration(etaMs)} remaining` : '';
201+
let percent = Math.round((completed / total) * 100);
202+
203+
if (interactive) {
204+
// Update single progress line
205+
writeProgress(
206+
` 📸 [${completed}/${total}] ${percent}% ${eta} - ${task.page.path}@${task.viewport.name}`
207+
);
208+
} else {
209+
// Non-interactive: log each completion
210+
logger.info(
211+
` ✓ [${completed}/${total}] ${task.page.path}@${task.viewport.name} ${eta}`
212+
);
213+
}
184214
} catch (error) {
185215
completed++;
186216
errors.push({
187217
page: task.page.path,
188218
viewport: task.viewport.name,
189219
error: error.message,
190220
});
221+
222+
if (interactive) {
223+
// Clear progress line and log error
224+
writeProgress('');
225+
process.stdout.write('\n');
226+
}
191227
logger.error(
192-
` ✗ [${completed}/${total}] ${task.page.path}@${task.viewport.name}: ${error.message}`
228+
` ✗ ${task.page.path}@${task.viewport.name}: ${error.message}`
193229
);
194230
} finally {
195231
pool.release(tab);
@@ -198,9 +234,19 @@ export async function processAllTasks(tasks, pool, config, logger, deps = {}) {
198234
config.concurrency
199235
);
200236

237+
// Clear progress line and show completion
238+
if (interactive) {
239+
writeProgress('');
240+
process.stdout.write('\n');
241+
}
242+
201243
// Log total time
202244
let totalTime = Date.now() - startTime;
203-
logger.info(` Completed in ${formatDuration(totalTime)}`);
245+
logger.info(` ✅ Completed ${total} screenshots in ${formatDuration(totalTime)}`);
246+
247+
if (errors.length > 0) {
248+
logger.warn(` ⚠️ ${errors.length} failed`);
249+
}
204250

205251
return errors;
206252
}

clients/static-site/src/utils/sitemap.js

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,40 @@ import { XMLParser } from 'fast-xml-parser';
99

1010
/**
1111
* Parse sitemap XML file and extract URLs
12+
* Follows sitemap index files to get all page URLs
1213
* @param {string} sitemapPath - Absolute path to sitemap.xml file
13-
* @returns {Promise<Array<string>>} Array of URLs from sitemap
14+
* @returns {Promise<Array<string>>} Array of page URLs from sitemap
1415
*/
1516
export async function parseSitemapFile(sitemapPath) {
17+
let { dirname } = await import('node:path');
18+
let { existsSync } = await import('node:fs');
19+
1620
try {
1721
let content = await readFile(sitemapPath, 'utf-8');
18-
return parseSitemapXML(content);
22+
let { urls, childSitemaps } = parseSitemapXML(content);
23+
24+
// If this is a sitemap index, follow child sitemaps
25+
if (childSitemaps.length > 0) {
26+
let baseDir = dirname(sitemapPath);
27+
28+
for (let childUrl of childSitemaps) {
29+
// Extract filename from URL (e.g., "sitemap-0.xml" from "https://example.com/sitemap-0.xml")
30+
let filename = childUrl.split('/').pop();
31+
let childPath = join(baseDir, filename);
32+
33+
if (existsSync(childPath)) {
34+
try {
35+
let childContent = await readFile(childPath, 'utf-8');
36+
let childResult = parseSitemapXML(childContent);
37+
urls.push(...childResult.urls);
38+
} catch {
39+
// Skip unreadable child sitemaps
40+
}
41+
}
42+
}
43+
}
44+
45+
return urls;
1946
} catch (error) {
2047
throw new Error(
2148
`Failed to read sitemap at ${sitemapPath}: ${error.message}`
@@ -26,7 +53,7 @@ export async function parseSitemapFile(sitemapPath) {
2653
/**
2754
* Parse sitemap XML content and extract URLs
2855
* @param {string} xmlContent - Sitemap XML content
29-
* @returns {Array<string>} Array of URLs from sitemap
56+
* @returns {{ urls: Array<string>, childSitemaps: Array<string> }} URLs and child sitemap URLs
3057
*/
3158
export function parseSitemapXML(xmlContent) {
3259
let parser = new XMLParser({
@@ -41,18 +68,24 @@ export function parseSitemapXML(xmlContent) {
4168
let urls = Array.isArray(result.urlset.url)
4269
? result.urlset.url
4370
: [result.urlset.url];
44-
return urls.map(entry => entry.loc).filter(Boolean);
71+
return {
72+
urls: urls.map(entry => entry.loc).filter(Boolean),
73+
childSitemaps: [],
74+
};
4575
}
4676

4777
// Handle sitemap index format (sitemap of sitemaps)
4878
if (result.sitemapindex?.sitemap) {
4979
let sitemaps = Array.isArray(result.sitemapindex.sitemap)
5080
? result.sitemapindex.sitemap
5181
: [result.sitemapindex.sitemap];
52-
return sitemaps.map(entry => entry.loc).filter(Boolean);
82+
return {
83+
urls: [],
84+
childSitemaps: sitemaps.map(entry => entry.loc).filter(Boolean),
85+
};
5386
}
5487

55-
return [];
88+
return { urls: [], childSitemaps: [] };
5689
}
5790

5891
/**

clients/static-site/tests/tasks.test.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ describe('processAllTasks', () => {
215215
let logger = {
216216
info: mock.fn(),
217217
error: mock.fn(),
218+
warn: mock.fn(),
218219
};
219220

220221
let deps = {
@@ -258,6 +259,7 @@ describe('processAllTasks', () => {
258259
let logger = {
259260
info: mock.fn(),
260261
error: mock.fn(),
262+
warn: mock.fn(),
261263
};
262264

263265
let deps = {

0 commit comments

Comments
 (0)