diff --git a/clients/static-site/README.md b/clients/static-site/README.md index 81f30b0f..51bad105 100644 --- a/clients/static-site/README.md +++ b/clients/static-site/README.md @@ -77,7 +77,8 @@ export default { omitBackground: false, }, - concurrency: 3, + // Concurrency auto-detected from CPU cores (min 2, max 8) + // concurrency: 4, // Page filtering include: 'blog/**', @@ -134,12 +135,13 @@ Configuration is merged in this order (later overrides earlier): ## CLI Options - `--viewports ` - Comma-separated viewport definitions (format: `name:WxH`) -- `--concurrency ` - Number of parallel pages to process (default: 3) +- `--concurrency ` - Number of parallel browser tabs (default: auto-detected based on CPU cores, min 2, max 8) - `--include ` - Include page pattern (glob) - `--exclude ` - Exclude page pattern (glob) - `--browser-args ` - Additional Puppeteer browser arguments - `--headless` - Run browser in headless mode (default: true) - `--full-page` - Capture full page screenshots (default: false) +- `--dry-run` - Print discovered pages and task count without capturing screenshots - `--use-sitemap` - Use sitemap.xml for page discovery (default: true) - `--sitemap-path ` - Path to sitemap.xml relative to build directory @@ -382,6 +384,13 @@ jobs: ### Pages not found +Use `--dry-run` to see which pages are discovered without capturing screenshots: +```bash +vizzly static-site ./dist --dry-run +``` + +This shows pages grouped by source (sitemap vs HTML scan), the total screenshot count, and your current configuration. + Ensure your build has completed and check for sitemap.xml or HTML files: ```bash ls dist/sitemap.xml diff --git a/clients/static-site/package-lock.json b/clients/static-site/package-lock.json index e380a80d..1edadf9b 100644 --- a/clients/static-site/package-lock.json +++ b/clients/static-site/package-lock.json @@ -1,12 +1,12 @@ { "name": "@vizzly-testing/static-site", - "version": "0.0.7", + "version": "0.0.8", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@vizzly-testing/static-site", - "version": "0.0.7", + "version": "0.0.8", "license": "MIT", "dependencies": { "cosmiconfig": "^9.0.0", diff --git a/clients/static-site/src/config-schema.js b/clients/static-site/src/config-schema.js index 054e1d4e..9e6d2362 100644 --- a/clients/static-site/src/config-schema.js +++ b/clients/static-site/src/config-schema.js @@ -3,8 +3,24 @@ * Uses Zod for runtime validation */ +import { cpus } from 'node:os'; import { z } from 'zod'; +/** + * Cache CPU count at module load time + * Avoids repeated system calls + */ +let cachedCpuCount = cpus().length; + +/** + * Calculate smart default concurrency based on CPU cores + * Uses half the cores (min 2, max 8) to balance speed vs resource usage + * @returns {number} Default concurrency value + */ +export function getDefaultConcurrency() { + return Math.max(2, Math.min(8, Math.floor(cachedCpuCount / 2))); +} + /** * Viewport schema */ @@ -80,7 +96,7 @@ export let staticSiteConfigSchema = z fullPage: false, omitBackground: false, }), - concurrency: z.number().int().positive().default(3), + concurrency: z.number().int().positive().default(getDefaultConcurrency()), include: z.string().nullable().optional(), exclude: z.string().nullable().optional(), pageDiscovery: pageDiscoverySchema.default({ @@ -95,7 +111,7 @@ export let staticSiteConfigSchema = z viewports: [{ name: 'default', width: 1920, height: 1080 }], browser: { headless: true, args: [] }, screenshot: { fullPage: false, omitBackground: false }, - concurrency: 3, + concurrency: getDefaultConcurrency(), pageDiscovery: { useSitemap: true, sitemapPath: 'sitemap.xml', diff --git a/clients/static-site/src/index.js b/clients/static-site/src/index.js index bfa3530c..6715ad87 100644 --- a/clients/static-site/src/index.js +++ b/clients/static-site/src/index.js @@ -93,6 +93,51 @@ export async function run(buildPath, options = {}, context = {}) { // Load and merge configuration let config = await loadConfig(buildPath, options, vizzlyConfig); + // Handle dry-run mode early - just discover and print pages + if (options.dryRun) { + let pages = await discoverPages(config.buildPath, config); + logger.info( + `🔍 Dry run: Found ${pages.length} pages in ${config.buildPath}\n` + ); + + if (pages.length === 0) { + logger.warn(' No pages found matching your configuration.'); + return; + } + + // Group by source for clarity + let sitemapPages = pages.filter(p => p.source === 'sitemap'); + let htmlPages = pages.filter(p => p.source === 'html'); + + if (sitemapPages.length > 0) { + logger.info(` From sitemap (${sitemapPages.length}):`); + for (let page of sitemapPages) { + logger.info(` ${page.path}`); + } + } + + if (htmlPages.length > 0) { + logger.info(` From HTML scan (${htmlPages.length}):`); + for (let page of htmlPages) { + logger.info(` ${page.path}`); + } + } + + // Show task count that would be generated + let taskCount = pages.length * config.viewports.length; + logger.info(''); + logger.info(`📸 Would capture ${taskCount} screenshots:`); + logger.info( + ` ${pages.length} pages × ${config.viewports.length} viewports` + ); + logger.info( + ` Viewports: ${config.viewports.map(v => `${v.name} (${v.width}×${v.height})`).join(', ')}` + ); + logger.info(` Concurrency: ${config.concurrency} tabs`); + + return; + } + // Determine mode: TDD or Run let debug = logger.debug?.bind(logger) || (() => {}); let isTdd = await isTddModeAvailable(debug); @@ -196,8 +241,20 @@ export async function run(buildPath, options = {}, context = {}) { } if (!isTdd && !hasToken) { - logger.warn('⚠️ No TDD server or API token found'); - logger.info(' Run `vizzly tdd start` or set VIZZLY_TOKEN'); + logger.error('❌ No TDD server or API token found'); + logger.info(''); + logger.info(' To capture screenshots, you need either:'); + logger.info(''); + logger.info(' 1. Start TDD server first (recommended for local dev):'); + logger.info(' vizzly tdd start'); + logger.info(' npx vizzly static-site ./dist'); + logger.info(''); + logger.info(' 2. Or set VIZZLY_TOKEN for cloud uploads:'); + logger.info( + ' VIZZLY_TOKEN=your-token npx vizzly static-site ./dist' + ); + logger.info(''); + return; } // Start HTTP server to serve static site files diff --git a/clients/static-site/src/plugin.js b/clients/static-site/src/plugin.js index 352f9f1f..39ec264d 100644 --- a/clients/static-site/src/plugin.js +++ b/clients/static-site/src/plugin.js @@ -3,6 +3,8 @@ * Registers the `vizzly static-site` command */ +import { getDefaultConcurrency } from './config-schema.js'; + export default { name: 'static-site', version: '0.1.0', @@ -25,7 +27,7 @@ export default { fullPage: false, omitBackground: false, }, - concurrency: 3, + concurrency: getDefaultConcurrency(), include: null, exclude: null, pageDiscovery: { @@ -64,6 +66,10 @@ export default { .option('--browser-args ', 'Additional Puppeteer browser arguments') .option('--headless', 'Run browser in headless mode') .option('--full-page', 'Capture full page screenshots') + .option( + '--dry-run', + 'Print discovered pages without capturing screenshots' + ) .option('--use-sitemap', 'Use sitemap.xml for page discovery') .option( '--sitemap-path ', diff --git a/clients/static-site/src/tasks.js b/clients/static-site/src/tasks.js index d8f5e896..86615f29 100644 --- a/clients/static-site/src/tasks.js +++ b/clients/static-site/src/tasks.js @@ -112,6 +112,93 @@ export async function mapWithConcurrency(items, fn, concurrency) { await Promise.all(results); } +/** + * Format milliseconds as human-readable duration + * @param {number} ms - Milliseconds + * @returns {string} Formatted duration (e.g., "2m 30s", "45s") + */ +function formatDuration(ms) { + let seconds = Math.floor(ms / 1000); + let minutes = Math.floor(seconds / 60); + seconds = seconds % 60; + + if (minutes > 0) { + return `${minutes}m ${seconds}s`; + } + return `${seconds}s`; +} + +/** + * Check if stdout is an interactive TTY + * @returns {boolean} + */ +function isInteractiveTTY() { + return process.stdout.isTTY && !process.env.CI; +} + +/** + * Create a simple output coordinator for TTY progress + * Prevents race conditions when multiple concurrent tasks update progress + * @returns {Object} Coordinator with writeProgress and logError methods + */ +function createOutputCoordinator() { + let pendingErrors = []; + let isWriting = false; + + return { + /** + * Update progress line (only in TTY mode) + * @param {string} text - Progress text + */ + writeProgress(text) { + if (!isInteractiveTTY()) return; + + // Flush any pending errors first + if (pendingErrors.length > 0 && !isWriting) { + isWriting = true; + process.stdout.clearLine(0); + process.stdout.cursorTo(0); + for (let err of pendingErrors) { + process.stdout.write(`${err}\n`); + } + pendingErrors = []; + isWriting = false; + } + + process.stdout.clearLine(0); + process.stdout.cursorTo(0); + process.stdout.write(text); + }, + + /** + * Queue an error message to be printed + * @param {string} message - Error message + * @param {Object} logger - Logger instance + */ + logError(message, logger) { + if (isInteractiveTTY()) { + // Queue error to be printed before next progress update + pendingErrors.push(message); + } + logger.error(message); + }, + + /** + * Clear progress and flush any remaining errors + */ + flush() { + if (!isInteractiveTTY()) return; + + process.stdout.clearLine(0); + process.stdout.cursorTo(0); + for (let err of pendingErrors) { + process.stdout.write(`${err}\n`); + } + pendingErrors = []; + }, + }; +} + /** * Process all tasks through the tab pool * @param {Array} tasks - Array of task objects @@ -125,6 +212,13 @@ export async function processAllTasks(tasks, pool, config, logger, deps = {}) { let errors = []; let completed = 0; let total = tasks.length; + let startTime = Date.now(); + let taskTimes = []; + let interactive = isInteractiveTTY(); + let output = createOutputCoordinator(); + + // Minimum samples before showing ETA (avoids wild estimates from cold start) + let minSamplesForEta = Math.min(5, Math.ceil(total * 0.1)); // Merge deps for processTask let taskDeps = { ...defaultDeps, ...deps }; @@ -132,6 +226,7 @@ export async function processAllTasks(tasks, pool, config, logger, deps = {}) { await mapWithConcurrency( tasks, async task => { + let taskStart = Date.now(); let tab = await pool.acquire(); // Handle case where pool was drained while waiting @@ -147,9 +242,36 @@ export async function processAllTasks(tasks, pool, config, logger, deps = {}) { try { await processTask(tab, task, taskDeps); completed++; - logger.info( - ` ✓ [${completed}/${total}] ${task.page.path}@${task.viewport.name}` - ); + + // Track task duration for ETA calculation + let taskDuration = Date.now() - taskStart; + taskTimes.push(taskDuration); + + // Calculate ETA - only show after enough samples for accuracy + let eta = ''; + if (taskTimes.length >= minSamplesForEta) { + // Use recent samples for better accuracy (exponential-ish weighting) + let recentTimes = taskTimes.slice(-20); + let avgTime = + recentTimes.reduce((a, b) => a + b, 0) / recentTimes.length; + let remaining = total - completed; + // Divide by concurrency since tasks run in parallel + let etaMs = (remaining * avgTime) / config.concurrency; + eta = remaining > 0 ? `~${formatDuration(etaMs)} remaining` : ''; + } + let percent = Math.round((completed / total) * 100); + + if (interactive) { + // Update single progress line + output.writeProgress( + ` 📸 [${completed}/${total}] ${percent}% ${eta} - ${task.page.path}@${task.viewport.name}` + ); + } else { + // Non-interactive: log each completion + logger.info( + ` ✓ [${completed}/${total}] ${task.page.path}@${task.viewport.name} ${eta}` + ); + } } catch (error) { completed++; errors.push({ @@ -157,8 +279,10 @@ export async function processAllTasks(tasks, pool, config, logger, deps = {}) { viewport: task.viewport.name, error: error.message, }); - logger.error( - ` ✗ [${completed}/${total}] ${task.page.path}@${task.viewport.name}: ${error.message}` + + output.logError( + ` ✗ ${task.page.path}@${task.viewport.name}: ${error.message}`, + logger ); } finally { pool.release(tab); @@ -167,5 +291,18 @@ export async function processAllTasks(tasks, pool, config, logger, deps = {}) { config.concurrency ); + // Flush any remaining output + output.flush(); + + // Log total time + let totalTime = Date.now() - startTime; + logger.info( + ` ✅ Completed ${total} screenshots in ${formatDuration(totalTime)}` + ); + + if (errors.length > 0) { + logger.warn(` ⚠️ ${errors.length} failed`); + } + return errors; } diff --git a/clients/static-site/src/utils/sitemap.js b/clients/static-site/src/utils/sitemap.js index 5d641134..396f5002 100644 --- a/clients/static-site/src/utils/sitemap.js +++ b/clients/static-site/src/utils/sitemap.js @@ -3,19 +3,102 @@ * Functions for extracting URLs from sitemap.xml files */ +import { existsSync } from 'node:fs'; import { readFile } from 'node:fs/promises'; -import { join } from 'node:path'; +import { basename, dirname, join, resolve, sep } from 'node:path'; import { XMLParser } from 'fast-xml-parser'; +/** + * Check if a path is within the base directory + * Prevents path traversal attacks + * @param {string} targetPath - Path to validate + * @param {string} baseDir - Base directory that should contain the target + * @returns {boolean} True if targetPath is within baseDir + */ +function isWithinDirectory(targetPath, baseDir) { + let resolvedBase = resolve(baseDir); + let resolvedTarget = resolve(targetPath); + + return ( + resolvedTarget.startsWith(resolvedBase + sep) || + resolvedTarget === resolvedBase + ); +} + +/** + * Safely extract filename from URL + * Validates the filename doesn't contain path traversal sequences + * @param {string} url - URL to extract filename from + * @returns {string|null} Safe filename or null if invalid + */ +function safeFilenameFromUrl(url) { + // Extract the last path segment + let filename = url.split('/').pop() || ''; + + // Reject if it contains path traversal or suspicious characters + if ( + filename.includes('..') || + filename.includes('/') || + filename.includes('\\') || + filename.includes('\0') + ) { + return null; + } + + // Use basename as extra safety + filename = basename(filename); + + // Must be a valid sitemap filename + if (!filename.endsWith('.xml')) { + return null; + } + + return filename; +} + /** * Parse sitemap XML file and extract URLs + * Follows sitemap index files to get all page URLs * @param {string} sitemapPath - Absolute path to sitemap.xml file - * @returns {Promise>} Array of URLs from sitemap + * @returns {Promise>} Array of page URLs from sitemap */ export async function parseSitemapFile(sitemapPath) { try { let content = await readFile(sitemapPath, 'utf-8'); - return parseSitemapXML(content); + let { urls, childSitemaps } = parseSitemapXML(content); + + // If this is a sitemap index, follow child sitemaps + if (childSitemaps.length > 0) { + let baseDir = dirname(sitemapPath); + + for (let childUrl of childSitemaps) { + // Safely extract filename from URL + let filename = safeFilenameFromUrl(childUrl); + if (!filename) { + // Skip invalid filenames (potential path traversal) + continue; + } + + let childPath = join(baseDir, filename); + + // Verify the resolved path is still within baseDir + if (!isWithinDirectory(childPath, baseDir)) { + continue; + } + + if (existsSync(childPath)) { + try { + let childContent = await readFile(childPath, 'utf-8'); + let childResult = parseSitemapXML(childContent); + urls.push(...childResult.urls); + } catch { + // Skip unreadable child sitemaps + } + } + } + } + + return urls; } catch (error) { throw new Error( `Failed to read sitemap at ${sitemapPath}: ${error.message}` @@ -26,7 +109,7 @@ export async function parseSitemapFile(sitemapPath) { /** * Parse sitemap XML content and extract URLs * @param {string} xmlContent - Sitemap XML content - * @returns {Array} Array of URLs from sitemap + * @returns {{ urls: Array, childSitemaps: Array }} URLs and child sitemap URLs */ export function parseSitemapXML(xmlContent) { let parser = new XMLParser({ @@ -41,7 +124,10 @@ export function parseSitemapXML(xmlContent) { let urls = Array.isArray(result.urlset.url) ? result.urlset.url : [result.urlset.url]; - return urls.map(entry => entry.loc).filter(Boolean); + return { + urls: urls.map(entry => entry.loc).filter(Boolean), + childSitemaps: [], + }; } // Handle sitemap index format (sitemap of sitemaps) @@ -49,10 +135,13 @@ export function parseSitemapXML(xmlContent) { let sitemaps = Array.isArray(result.sitemapindex.sitemap) ? result.sitemapindex.sitemap : [result.sitemapindex.sitemap]; - return sitemaps.map(entry => entry.loc).filter(Boolean); + return { + urls: [], + childSitemaps: sitemaps.map(entry => entry.loc).filter(Boolean), + }; } - return []; + return { urls: [], childSitemaps: [] }; } /** @@ -91,11 +180,9 @@ export function urlsToRelativePaths(urls, baseUrl) { * Discover sitemap in build directory * Looks for common sitemap filenames * @param {string} buildDir - Build directory path - * @returns {Promise} Path to sitemap if found, null otherwise + * @returns {string|null} Path to sitemap if found, null otherwise */ -export async function discoverSitemap(buildDir) { - let { existsSync } = await import('node:fs'); - +export function discoverSitemap(buildDir) { let commonNames = ['sitemap.xml', 'sitemap_index.xml', 'sitemap-index.xml']; for (let filename of commonNames) { diff --git a/clients/static-site/tests/config-schema.test.js b/clients/static-site/tests/config-schema.test.js new file mode 100644 index 00000000..bf94e6b8 --- /dev/null +++ b/clients/static-site/tests/config-schema.test.js @@ -0,0 +1,190 @@ +/** + * Tests for configuration schema validation + */ + +import assert from 'node:assert'; +import { cpus } from 'node:os'; +import { describe, it } from 'node:test'; +import { + getDefaultConcurrency, + validateStaticSiteConfig, + validateStaticSiteConfigWithDefaults, +} from '../src/config-schema.js'; + +describe('config-schema', () => { + describe('getDefaultConcurrency', () => { + it('returns a positive integer', () => { + let concurrency = getDefaultConcurrency(); + + assert.ok(Number.isInteger(concurrency)); + assert.ok(concurrency > 0); + }); + + it('returns at least 2', () => { + let concurrency = getDefaultConcurrency(); + + assert.ok(concurrency >= 2); + }); + + it('returns at most 8', () => { + let concurrency = getDefaultConcurrency(); + + assert.ok(concurrency <= 8); + }); + + it('calculates based on CPU cores', () => { + let cores = cpus().length; + let expected = Math.max(2, Math.min(8, Math.floor(cores / 2))); + let concurrency = getDefaultConcurrency(); + + assert.strictEqual(concurrency, expected); + }); + }); + + describe('validateStaticSiteConfig', () => { + it('validates minimal config', () => { + let config = {}; + + let validated = validateStaticSiteConfig(config); + + assert.ok(validated.viewports); + assert.ok(validated.browser); + assert.ok(validated.screenshot); + assert.ok(validated.pageDiscovery); + }); + + it('applies default concurrency from CPU cores', () => { + let config = {}; + + let validated = validateStaticSiteConfig(config); + let expected = getDefaultConcurrency(); + + assert.strictEqual(validated.concurrency, expected); + }); + + it('allows overriding concurrency', () => { + let config = { concurrency: 10 }; + + let validated = validateStaticSiteConfig(config); + + assert.strictEqual(validated.concurrency, 10); + }); + + it('validates viewports', () => { + let config = { + viewports: [ + { name: 'mobile', width: 375, height: 667 }, + { name: 'desktop', width: 1920, height: 1080 }, + ], + }; + + let validated = validateStaticSiteConfig(config); + + assert.strictEqual(validated.viewports.length, 2); + assert.strictEqual(validated.viewports[0].name, 'mobile'); + }); + + it('rejects invalid viewport', () => { + let config = { + viewports: [{ name: 'invalid', width: -100, height: 667 }], + }; + + assert.throws(() => validateStaticSiteConfig(config)); + }); + + it('validates browser config', () => { + let config = { + browser: { + headless: false, + args: ['--no-sandbox'], + }, + }; + + let validated = validateStaticSiteConfig(config); + + assert.strictEqual(validated.browser.headless, false); + assert.deepStrictEqual(validated.browser.args, ['--no-sandbox']); + }); + + it('validates screenshot config', () => { + let config = { + screenshot: { + fullPage: true, + omitBackground: true, + }, + }; + + let validated = validateStaticSiteConfig(config); + + assert.strictEqual(validated.screenshot.fullPage, true); + assert.strictEqual(validated.screenshot.omitBackground, true); + }); + + it('validates page discovery config', () => { + let config = { + pageDiscovery: { + useSitemap: false, + sitemapPath: 'custom-sitemap.xml', + scanHtml: true, + }, + }; + + let validated = validateStaticSiteConfig(config); + + assert.strictEqual(validated.pageDiscovery.useSitemap, false); + assert.strictEqual( + validated.pageDiscovery.sitemapPath, + 'custom-sitemap.xml' + ); + }); + + it('validates include/exclude patterns', () => { + let config = { + include: '/blog/*', + exclude: '/drafts/*', + }; + + let validated = validateStaticSiteConfig(config); + + assert.strictEqual(validated.include, '/blog/*'); + assert.strictEqual(validated.exclude, '/drafts/*'); + }); + + it('allows null include/exclude', () => { + let config = { + include: null, + exclude: null, + }; + + let validated = validateStaticSiteConfig(config); + + assert.strictEqual(validated.include, null); + assert.strictEqual(validated.exclude, null); + }); + }); + + describe('validateStaticSiteConfigWithDefaults', () => { + it('returns defaults when config is undefined', () => { + let validated = validateStaticSiteConfigWithDefaults(undefined); + + assert.ok(validated.viewports); + assert.ok(validated.browser); + assert.ok(validated.concurrency > 0); + }); + + it('returns defaults when config is null', () => { + let validated = validateStaticSiteConfigWithDefaults(null); + + assert.ok(validated.viewports); + assert.ok(validated.concurrency > 0); + }); + + it('validates provided config', () => { + let config = { concurrency: 5 }; + + let validated = validateStaticSiteConfigWithDefaults(config); + + assert.strictEqual(validated.concurrency, 5); + }); + }); +}); diff --git a/clients/static-site/tests/config.test.js b/clients/static-site/tests/config.test.js index b9a4735d..396b23e1 100644 --- a/clients/static-site/tests/config.test.js +++ b/clients/static-site/tests/config.test.js @@ -66,7 +66,10 @@ describe('config', () => { let config = parseCliOptions(options); assert.strictEqual(config.pageDiscovery.useSitemap, false); - assert.strictEqual(config.pageDiscovery.sitemapPath, 'custom-sitemap.xml'); + assert.strictEqual( + config.pageDiscovery.sitemapPath, + 'custom-sitemap.xml' + ); }); }); diff --git a/clients/static-site/tests/crawler.test.js b/clients/static-site/tests/crawler.test.js index 4f27c3b9..938cbf18 100644 --- a/clients/static-site/tests/crawler.test.js +++ b/clients/static-site/tests/crawler.test.js @@ -31,7 +31,10 @@ describe('crawler', () => { }); it('normalizes separators', () => { - assert.strictEqual(filePathToUrlPath('blog\\post-1.html'), '/blog/post-1'); + assert.strictEqual( + filePathToUrlPath('blog\\post-1.html'), + '/blog/post-1' + ); }); it('ensures leading slash', () => { diff --git a/clients/static-site/tests/dry-run.test.js b/clients/static-site/tests/dry-run.test.js new file mode 100644 index 00000000..d8ddeddb --- /dev/null +++ b/clients/static-site/tests/dry-run.test.js @@ -0,0 +1,183 @@ +/** + * Tests for dry-run mode + * Verifies that --dry-run prints discovered pages without capturing screenshots + */ + +import assert from 'node:assert'; +import { mkdir, rm, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { describe, it, before, after, mock } from 'node:test'; +import { run } from '../src/index.js'; + +describe('dry-run mode', () => { + let testDir; + let logMessages; + let mockLogger; + + before(async () => { + // Create temp directory with test files + testDir = join(tmpdir(), `vizzly-dry-run-test-${Date.now()}`); + await mkdir(testDir, { recursive: true }); + + // Create test HTML files + await writeFile( + join(testDir, 'index.html'), + 'Home' + ); + await writeFile( + join(testDir, 'about.html'), + 'About' + ); + + // Create nested page + await mkdir(join(testDir, 'blog'), { recursive: true }); + await writeFile( + join(testDir, 'blog', 'post-1.html'), + 'Blog Post' + ); + }); + + after(async () => { + await rm(testDir, { recursive: true, force: true }); + }); + + it('prints discovered pages without launching browser', async () => { + logMessages = []; + mockLogger = { + info: msg => logMessages.push({ level: 'info', msg }), + warn: msg => logMessages.push({ level: 'warn', msg }), + error: msg => logMessages.push({ level: 'error', msg }), + debug: () => {}, + }; + + await run(testDir, { dryRun: true }, { logger: mockLogger }); + + // Should have logged found pages + let foundMessage = logMessages.find(m => m.msg.includes('Dry run:')); + assert.ok(foundMessage, 'Should log dry run header'); + assert.ok(foundMessage.msg.includes('3 pages'), 'Should find 3 pages'); + + // Should show HTML scan source + let htmlScanMessage = logMessages.find(m => + m.msg.includes('From HTML scan') + ); + assert.ok(htmlScanMessage, 'Should show HTML scan source'); + + // Should show individual paths + let aboutPath = logMessages.find(m => m.msg.includes('/about')); + assert.ok(aboutPath, 'Should list /about page'); + + let blogPath = logMessages.find(m => m.msg.includes('/blog/post-1')); + assert.ok(blogPath, 'Should list /blog/post-1 page'); + + // Should show task count summary + let taskMessage = logMessages.find(m => m.msg.includes('Would capture')); + assert.ok(taskMessage, 'Should show screenshot count'); + }); + + it('shows viewport information', async () => { + logMessages = []; + mockLogger = { + info: msg => logMessages.push({ level: 'info', msg }), + warn: msg => logMessages.push({ level: 'warn', msg }), + error: msg => logMessages.push({ level: 'error', msg }), + debug: () => {}, + }; + + await run(testDir, { dryRun: true }, { logger: mockLogger }); + + // Should show viewport details + let viewportMessage = logMessages.find(m => m.msg.includes('Viewports:')); + assert.ok(viewportMessage, 'Should show viewport information'); + }); + + it('shows concurrency setting', async () => { + logMessages = []; + mockLogger = { + info: msg => logMessages.push({ level: 'info', msg }), + warn: msg => logMessages.push({ level: 'warn', msg }), + error: msg => logMessages.push({ level: 'error', msg }), + debug: () => {}, + }; + + await run(testDir, { dryRun: true }, { logger: mockLogger }); + + // Should show concurrency + let concurrencyMessage = logMessages.find(m => + m.msg.includes('Concurrency:') + ); + assert.ok(concurrencyMessage, 'Should show concurrency setting'); + assert.ok(concurrencyMessage.msg.includes('tabs'), 'Should mention tabs'); + }); + + it('respects include pattern', async () => { + logMessages = []; + mockLogger = { + info: msg => logMessages.push({ level: 'info', msg }), + warn: msg => logMessages.push({ level: 'warn', msg }), + error: msg => logMessages.push({ level: 'error', msg }), + debug: () => {}, + }; + + await run( + testDir, + { dryRun: true, include: '/blog/*' }, + { logger: mockLogger } + ); + + // Should only find 1 page (blog post) + let foundMessage = logMessages.find(m => m.msg.includes('Dry run:')); + assert.ok( + foundMessage.msg.includes('1 pages'), + 'Should find 1 page matching pattern' + ); + }); + + it('respects exclude pattern', async () => { + logMessages = []; + mockLogger = { + info: msg => logMessages.push({ level: 'info', msg }), + warn: msg => logMessages.push({ level: 'warn', msg }), + error: msg => logMessages.push({ level: 'error', msg }), + debug: () => {}, + }; + + await run( + testDir, + { dryRun: true, exclude: '/blog/*' }, + { logger: mockLogger } + ); + + // Should find 2 pages (home and about, excluding blog) + let foundMessage = logMessages.find(m => m.msg.includes('Dry run:')); + assert.ok( + foundMessage.msg.includes('2 pages'), + 'Should find 2 pages after exclusion' + ); + }); + + it('warns when no pages found', async () => { + let emptyDir = join(tmpdir(), `vizzly-empty-test-${Date.now()}`); + await mkdir(emptyDir, { recursive: true }); + + logMessages = []; + mockLogger = { + info: msg => logMessages.push({ level: 'info', msg }), + warn: msg => logMessages.push({ level: 'warn', msg }), + error: msg => logMessages.push({ level: 'error', msg }), + debug: () => {}, + }; + + try { + await run(emptyDir, { dryRun: true }, { logger: mockLogger }); + + let warnMessage = logMessages.find( + m => m.level === 'warn' && m.msg.includes('No pages found') + ); + assert.ok(warnMessage, 'Should warn when no pages found'); + } finally { + await rm(emptyDir, { recursive: true, force: true }); + } + }); +}); diff --git a/clients/static-site/tests/hooks.test.js b/clients/static-site/tests/hooks.test.js index 55a003ce..48b21e77 100644 --- a/clients/static-site/tests/hooks.test.js +++ b/clients/static-site/tests/hooks.test.js @@ -106,7 +106,10 @@ describe('hooks', () => { await applyHook(mockPage, mockHook, context); - assert.deepStrictEqual(mockHook.mock.calls[0].arguments, [mockPage, context]); + assert.deepStrictEqual(mockHook.mock.calls[0].arguments, [ + mockPage, + context, + ]); }); it('handles null hook gracefully', async () => { diff --git a/clients/static-site/tests/screenshot.test.js b/clients/static-site/tests/screenshot.test.js index 627b1615..1d441745 100644 --- a/clients/static-site/tests/screenshot.test.js +++ b/clients/static-site/tests/screenshot.test.js @@ -129,7 +129,13 @@ describe('generateScreenshotProperties', () => { let viewport1 = { name: 'mobile', width: 375, height: 667 }; let viewport2 = { name: 'desktop', width: 1920, height: 1080 }; - assert.strictEqual(generateScreenshotProperties(viewport1).viewport, 'mobile'); - assert.strictEqual(generateScreenshotProperties(viewport2).viewport, 'desktop'); + assert.strictEqual( + generateScreenshotProperties(viewport1).viewport, + 'mobile' + ); + assert.strictEqual( + generateScreenshotProperties(viewport2).viewport, + 'desktop' + ); }); }); diff --git a/clients/static-site/tests/tasks.test.js b/clients/static-site/tests/tasks.test.js index 2fd5949f..949f115a 100644 --- a/clients/static-site/tests/tasks.test.js +++ b/clients/static-site/tests/tasks.test.js @@ -201,7 +201,8 @@ describe('processAllTasks', () => { assert.strictEqual(errors.length, 0); assert.strictEqual(acquireCount, 2); assert.strictEqual(releaseCalls, 2); - assert.strictEqual(logger.info.mock.callCount(), 2); + // 2 task logs + 1 completion time log + assert.strictEqual(logger.info.mock.callCount(), 3); }); it('collects errors when tasks fail', async () => { @@ -214,6 +215,7 @@ describe('processAllTasks', () => { let logger = { info: mock.fn(), error: mock.fn(), + warn: mock.fn(), }; let deps = { @@ -257,6 +259,7 @@ describe('processAllTasks', () => { let logger = { info: mock.fn(), error: mock.fn(), + warn: mock.fn(), }; let deps = { diff --git a/clients/static-site/tests/utils/sitemap.test.js b/clients/static-site/tests/utils/sitemap.test.js new file mode 100644 index 00000000..4dfed671 --- /dev/null +++ b/clients/static-site/tests/utils/sitemap.test.js @@ -0,0 +1,266 @@ +/** + * Tests for sitemap parsing utilities + */ + +import assert from 'node:assert'; +import { mkdir, rm, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { after, before, describe, it } from 'node:test'; +import { + parseSitemapFile, + parseSitemapXML, + urlsToRelativePaths, +} from '../../src/utils/sitemap.js'; + +describe('sitemap', () => { + describe('parseSitemapXML', () => { + it('parses standard sitemap format', () => { + let xml = ` + + https://example.com/ + https://example.com/about + https://example.com/blog/post-1 + `; + + let result = parseSitemapXML(xml); + + assert.strictEqual(result.urls.length, 3); + assert.strictEqual(result.childSitemaps.length, 0); + assert.strictEqual(result.urls[0], 'https://example.com/'); + assert.strictEqual(result.urls[1], 'https://example.com/about'); + assert.strictEqual(result.urls[2], 'https://example.com/blog/post-1'); + }); + + it('parses sitemap with single URL', () => { + let xml = ` + + https://example.com/ + `; + + let result = parseSitemapXML(xml); + + assert.strictEqual(result.urls.length, 1); + assert.strictEqual(result.urls[0], 'https://example.com/'); + }); + + it('parses sitemap index format', () => { + let xml = ` + + https://example.com/sitemap-0.xml + https://example.com/sitemap-1.xml + `; + + let result = parseSitemapXML(xml); + + assert.strictEqual(result.urls.length, 0); + assert.strictEqual(result.childSitemaps.length, 2); + assert.strictEqual( + result.childSitemaps[0], + 'https://example.com/sitemap-0.xml' + ); + assert.strictEqual( + result.childSitemaps[1], + 'https://example.com/sitemap-1.xml' + ); + }); + + it('parses sitemap index with single child', () => { + let xml = ` + + https://example.com/sitemap-0.xml + `; + + let result = parseSitemapXML(xml); + + assert.strictEqual(result.urls.length, 0); + assert.strictEqual(result.childSitemaps.length, 1); + }); + + it('returns empty arrays for invalid XML', () => { + let xml = ``; + + let result = parseSitemapXML(xml); + + assert.strictEqual(result.urls.length, 0); + assert.strictEqual(result.childSitemaps.length, 0); + }); + + it('handles URLs with extra metadata', () => { + let xml = ` + + + https://example.com/page + 2024-01-15 + weekly + 0.8 + + `; + + let result = parseSitemapXML(xml); + + assert.strictEqual(result.urls.length, 1); + assert.strictEqual(result.urls[0], 'https://example.com/page'); + }); + + it('filters out entries without loc', () => { + let xml = ` + + https://example.com/valid + 2024-01-15 + `; + + let result = parseSitemapXML(xml); + + assert.strictEqual(result.urls.length, 1); + assert.strictEqual(result.urls[0], 'https://example.com/valid'); + }); + }); + + describe('urlsToRelativePaths', () => { + it('converts full URLs to relative paths', () => { + let urls = [ + 'https://example.com/', + 'https://example.com/about', + 'https://example.com/blog/post-1', + ]; + + let paths = urlsToRelativePaths(urls, 'https://example.com'); + + assert.deepStrictEqual(paths, ['/', '/about', '/blog/post-1']); + }); + + it('handles URLs with trailing slashes', () => { + let urls = ['https://example.com/about/', 'https://example.com/blog/']; + + let paths = urlsToRelativePaths(urls, 'https://example.com'); + + assert.deepStrictEqual(paths, ['/about', '/blog']); + }); + + it('handles root URL with trailing slash', () => { + let urls = ['https://example.com/']; + + let paths = urlsToRelativePaths(urls, 'https://example.com'); + + assert.deepStrictEqual(paths, ['/']); + }); + + it('handles empty base URL', () => { + let urls = ['https://example.com/page', 'https://other.com/page']; + + let paths = urlsToRelativePaths(urls, ''); + + assert.deepStrictEqual(paths, ['/page', '/page']); + }); + + it('handles URLs without protocol', () => { + let urls = ['/about', '/blog/post']; + + let paths = urlsToRelativePaths(urls, ''); + + assert.deepStrictEqual(paths, ['/about', '/blog/post']); + }); + + it('adds leading slash when missing', () => { + let urls = ['about', 'blog/post']; + + let paths = urlsToRelativePaths(urls, ''); + + assert.deepStrictEqual(paths, ['/about', '/blog/post']); + }); + }); + + describe('parseSitemapFile security', () => { + let testDir; + let secretDir; + + before(async () => { + // Create temp directories + testDir = join(tmpdir(), `vizzly-sitemap-security-${Date.now()}`); + secretDir = join(tmpdir(), `vizzly-secret-${Date.now()}`); + await mkdir(testDir, { recursive: true }); + await mkdir(secretDir, { recursive: true }); + + // Create a "secret" file outside the test directory + await writeFile( + join(secretDir, 'secret.xml'), + ` + + https://example.com/secret-page + ` + ); + + // Create a valid child sitemap in the test directory + await writeFile( + join(testDir, 'sitemap-0.xml'), + ` + + https://example.com/valid-page + ` + ); + }); + + after(async () => { + await rm(testDir, { recursive: true, force: true }); + await rm(secretDir, { recursive: true, force: true }); + }); + + it('rejects path traversal in sitemap index URLs', async () => { + // Malicious sitemap index trying to read files outside the directory + let maliciousSitemap = ` + + https://evil.com/../../../${secretDir}/secret.xml + https://example.com/sitemap-0.xml + `; + + await writeFile(join(testDir, 'sitemap.xml'), maliciousSitemap); + + let urls = await parseSitemapFile(join(testDir, 'sitemap.xml')); + + // Should only get the valid page, not the secret one + assert.ok( + urls.some(u => u.includes('valid-page')), + 'Should include valid page' + ); + assert.ok( + !urls.some(u => u.includes('secret-page')), + 'Should NOT include secret page from path traversal' + ); + }); + + it('rejects filenames with path traversal sequences', async () => { + let maliciousSitemap = ` + + https://evil.com/../secret.xml + https://evil.com/..%2Fsecret.xml + https://example.com/sitemap-0.xml + `; + + await writeFile(join(testDir, 'sitemap.xml'), maliciousSitemap); + + let urls = await parseSitemapFile(join(testDir, 'sitemap.xml')); + + // Should only get the valid page + assert.strictEqual(urls.length, 1); + assert.ok(urls[0].includes('valid-page')); + }); + + it('rejects non-xml filenames', async () => { + let maliciousSitemap = ` + + https://evil.com/etc/passwd + https://evil.com/.env + https://example.com/sitemap-0.xml + `; + + await writeFile(join(testDir, 'sitemap.xml'), maliciousSitemap); + + let urls = await parseSitemapFile(join(testDir, 'sitemap.xml')); + + // Should only get the valid page + assert.strictEqual(urls.length, 1); + assert.ok(urls[0].includes('valid-page')); + }); + }); +});