diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1c3b8368..290c96ff 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -102,3 +102,128 @@ jobs: - name: Build run: npm run build + + perf-budgets: + runs-on: ubuntu-latest + needs: + - backend-tests + - frontend-tests + timeout-minutes: 40 + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 + + - name: Set up Node.js + uses: actions/setup-node@0a44ba78451273a1ed8ac2fee4e347c72dfd377f + with: + node-version: '20' + cache: 'npm' + cache-dependency-path: ./frontend/package-lock.json + + - name: Install dependencies + working-directory: ./frontend + run: npm ci + + - name: Install Playwright browsers + working-directory: ./frontend + run: npx playwright install --with-deps chromium + + - name: Start stack + run: docker compose --env-file .env.development -f docker-compose.dev.yml up -d --build + + - name: Wait for API + run: | + for i in {1..60}; do curl -sf http://localhost:8000/healthcheck && break || sleep 2; done + + - name: Wait for Frontend + run: | + for i in {1..60}; do curl -sf http://localhost:3000/models/manifest.json && break || sleep 2; done + + - name: Seed backend for perf + env: + BASE_URL: http://localhost:8000 + run: | + python - <<'PY' +import json +import os +import urllib.error +import urllib.request + +BASE_URL = os.environ.get("BASE_URL", "http://localhost:8000") + +def post(path: str, payload: dict) -> dict: + req = urllib.request.Request( + f"{BASE_URL}{path}", + data=json.dumps(payload).encode("utf-8"), + headers={"Content-Type": "application/json"}, + ) + try: + with urllib.request.urlopen(req, timeout=10) as resp: + return json.loads(resp.read().decode("utf-8")) + except urllib.error.HTTPError as exc: + detail = exc.read().decode("utf-8", "ignore") + raise SystemExit(f"Seed request failed ({exc.code}): {detail}") + +material = post( + "/api/materials/", + {"name": "Walnut", "texture_url": None, "cost_per_sq_ft": 12.5}, +) +material_id = material.get("id") +if not material_id: + raise SystemExit("Material creation failed; missing id") + +post( + "/api/modules/", + { + "name": "Base600", + "width": 600.0, + "height": 720.0, + "depth": 580.0, + "base_price": 100.0, + "material_id": material_id, + }, +) +PY + + - name: Run performance budgets + run: npm run --prefix frontend perf:budget + + - name: Upload perf budget results + if: always() + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 + with: + name: perf-budget-results + path: | + perf-results/perf-budget-summary.json + perf-results/perf-budget-junit.xml + + - name: Publish performance budget summary + if: always() + uses: actions/upload-test-results@0c62d1d6f6cfaf4c5859e1b358a5d2df4f96701a + with: + files: perf-results/perf-budget-junit.xml + + - name: Shutdown stack + if: always() + run: docker compose --env-file .env.development -f docker-compose.dev.yml down + + canary-metrics: + runs-on: ubuntu-latest + needs: + - perf-budgets + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 + + - name: Evaluate canary metrics + env: + CANARY_METRICS_FIXTURE: tests/perf/canary-metrics.fixture.json + P95_THRESHOLD_MS: '3000' + ERROR_RATE_THRESHOLD: '0.02' + REGRESSION_TOLERANCE_PCT: '0.1' + run: python scripts/ci/check_canary_metrics.py + + - name: Upload canary metrics summary + if: always() + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 + with: + name: canary-metrics-summary + path: perf-results/canary-metrics-summary.json diff --git a/.gitignore b/.gitignore index 85f7eeea..c39e5bb9 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,7 @@ out/ .env .env.* !.env.example +perf-results/ # Docker **/.dockerignore diff --git a/.perf-budget.yml b/.perf-budget.yml new file mode 100644 index 00000000..92dfb29d --- /dev/null +++ b/.perf-budget.yml @@ -0,0 +1,66 @@ +version: 2 +defaults: + run_count: 3 + throttling: + profile: slow-4g + cpu_slowdown_multiplier: 4 + download_throughput_kbps: 1500 + upload_throughput_kbps: 750 + request_latency_ms: 40 +scenarios: + - id: configurator-load + description: Directly load the configurator with seeded data + url: http://localhost:3000/configurator + waits: + - type: selector + selector: "canvas" + timeout_ms: 60000 + - type: networkidle + idle_ms: 5000 + timeout_ms: 60000 + selectors: + viewer_canvas: "canvas" + generate_button: "button:has-text(\"Generate Quote\")" + price_total: "text=Total:" + metrics: + - id: first-contentful-paint + aggregation: p75 + threshold: 2000 + unit: ms + - id: largest-contentful-paint + aggregation: p75 + threshold: 3500 + unit: ms + - id: total-blocking-time + aggregation: p75 + threshold: 200 + unit: ms + - id: cumulative-layout-shift + aggregation: p75 + threshold: 0.1 + unit: score + - id: homepage-to-configurator + description: Navigate from the marketing homepage into the configurator experience + run_count: 5 + steps: + - type: goto + url: http://localhost:3000/ + wait_until: networkidle + - type: wait_for_selector + selector: "main" + timeout_ms: 30000 + - type: goto + url: http://localhost:3000/configurator + wait_until: networkidle + - type: wait_for_selector + selector: "canvas" + timeout_ms: 60000 + metrics: + - id: navigation-duration + aggregation: p90 + threshold: 3000 + unit: ms + - id: largest-contentful-paint + aggregation: p95 + threshold: 4000 + unit: ms diff --git a/docs/release-checklist.md b/docs/release-checklist.md new file mode 100644 index 00000000..6b92f306 --- /dev/null +++ b/docs/release-checklist.md @@ -0,0 +1,54 @@ +# Release Checklist + +This checklist connects the CI performance signals introduced in this repository with the +runtime observability tooling that guards production releases. It is intended for release +managers and on-call engineers who need a repeatable flow that ties Grafana alerts, Tempo +traces, and CI preview environments together. + +## 1. Validate CI performance budgets + +1. Confirm the **Performance Budgets** job succeeded in the latest CI run. + * Inspect the JUnit summary uploaded to the run (artifact `perf-budget-results`). + * Review `perf-results/perf-budget-summary.json` for the concrete values captured by + Playwright (P75 configurator load, P90 navigation duration, P95 LCP). +2. If any threshold failed, investigate before promoting the release candidate: + * Re-run the job against the preview environment to determine whether the regression + is deterministic or environment-specific. + * Capture a Grafana dashboard snapshot showing the relevant Web Vitals panel and link + it in the incident tracker. + +## 2. Check canary latency and error regressions + +1. Verify the **Canary Metrics** job completed without failures. +2. Review `perf-results/canary-metrics-summary.json` to compare the current build's P95 + latency and error rate with the previous build and the configured budgets. +3. If the job failed: + * Acknowledge or silence the corresponding Grafana alert for the service-level + objective (SLO). + * Escalate to the on-call engineer through the paging integration configured for the + Grafana alert rule (OpsGenie, PagerDuty, etc.). + * Use the Tempo trace search link emitted by the job (or Grafana Explore) to confirm + whether elevated latency correlates with specific spans. + +## 3. Correlate CI results with Grafana dashboards + +1. Publish the artifacts from the CI run (`perf-budget-summary.json` and + `canary-metrics-summary.json`) to the shared release channel or ticket. +2. Update the Grafana dashboard annotations with the build ID and a link to the CI run so + that on-call engineers can quickly correlate spikes with the release candidate. +3. Ensure the Grafana dashboard panels for "CI Build Budgets" and "Runtime Latency" use the + Pushgateway/JUnit exports for side-by-side comparisons. + +## 4. Preview gating and release sign-off + +1. Block the promotion of the preview environment to staging/production until both + performance-related jobs have succeeded for the release branch commit. +2. Confirm no active Grafana alerts remain open for the release window. If alerts exist, + document mitigations and obtain sign-off from the incident commander before proceeding. +3. Log the final decision in the release record, linking to: + * The CI run with passing performance jobs. + * Grafana alert history or dashboards showing the green state. + * Tempo traces or logs that justify the decision when applicable. + +Following this checklist guarantees that CI regressions, Grafana alerts, and on-call +notifications remain aligned, providing a defensible audit trail for each production push. diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 99d5439c..7da723cb 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -43,7 +43,8 @@ "ts-jest": "^29.2.5", "ts-node": "^10.9.2", "typescript": "^5", - "vitest": "^1.6.0" + "vitest": "^1.6.0", + "yaml": "^2.8.1" } }, "node_modules/@adobe/css-tools": { @@ -12910,16 +12911,16 @@ "license": "ISC" }, "node_modules/yaml": { - "version": "2.7.1", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.7.1.tgz", - "integrity": "sha512-10ULxpnOCQXxJvBgxsn9ptjq6uviG/htZKk9veJGhlqn3w/DxQ631zFF+nlQXLwmImeS5amR2dl2U8sg6U9jsQ==", + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.1.tgz", + "integrity": "sha512-lcYcMxX2PO9XMGvAJkJ3OsNMw+/7FKes7/hgerGUYWIoWu5j/+YQqcZr5JnPZWzOsEBgMbSbiSTn/dv/69Mkpw==", "dev": true, "license": "ISC", "bin": { "yaml": "bin.mjs" }, "engines": { - "node": ">= 14" + "node": ">= 14.6" } }, "node_modules/yargs": { diff --git a/frontend/package.json b/frontend/package.json index c37705e1..79c3f740 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -8,6 +8,7 @@ "start": "next start", "lint": "next lint", "test:e2e": "playwright test", + "perf:budget": "node --loader ts-node/esm tests/perf/run-perf-budget.ts", "assets:gen": "python ../scripts/generate_reference_glbs.py", "assets:pack": "bash ../scripts/pack_models.sh", "assets:validate": "python ../scripts/glb_validate.py public/models/*.glb --fail-on-warning", @@ -20,15 +21,15 @@ "@chakra-ui/react": "^2.8.2", "@emotion/react": "^11.13.3", "@emotion/styled": "^11.13.0", + "@react-three/drei": "^9.120.5", + "@react-three/fiber": "^8.17.10", "framer-motion": "^11.5.4", "next": "^14.2.28", "react": "^18", "react-dom": "^18", "react-icons": "^5.3.0", - "zustand": "^4.5.0", "three": "^0.171.0", - "@react-three/fiber": "^8.17.10", - "@react-three/drei": "^9.120.5" + "zustand": "^4.5.0" }, "devDependencies": { "@playwright/test": "^1.56.0", @@ -49,12 +50,10 @@ "postcss": "^8", "tailwindcss": "^3.3.0", "ts-jest": "^29.2.5", + "ts-node": "^10.9.2", "typescript": "^5", - "gltfpack": "0.25.0", - "meshoptimizer": "0.25.0", "vitest": "^1.6.0", - "@playwright/test": "^1.56.0", - "ts-node": "^10.9.2" + "yaml": "^2.8.1" }, "jest": { "setupFilesAfterEnv": [ diff --git a/frontend/tests/perf/run-perf-budget.ts b/frontend/tests/perf/run-perf-budget.ts new file mode 100644 index 00000000..c0b52a7a --- /dev/null +++ b/frontend/tests/perf/run-perf-budget.ts @@ -0,0 +1,577 @@ +import fs from 'fs'; +import path from 'path'; +import yaml from 'yaml'; +import { chromium, Browser, BrowserContext, Page } from 'playwright'; + +type Aggregation = 'p50' | 'p75' | 'p90' | 'p95' | 'median' | 'mean'; + +type WaitStep = + | { type: 'selector'; selector: string; timeout_ms?: number } + | { type: 'networkidle'; idle_ms?: number; timeout_ms?: number }; + +type ScenarioStep = + | { type: 'goto'; url: string; wait_until?: 'load' | 'domcontentloaded' | 'networkidle' } + | { type: 'wait_for_selector'; selector: string; timeout_ms?: number } + | { type: 'wait_for_timeout'; timeout_ms: number }; + +type MetricConfig = { + id: string; + aggregation: Aggregation | 'p75' | 'p90' | 'p95'; + threshold: number; + unit: string; +}; + +type ThrottlingConfig = { + profile?: string; + cpu_slowdown_multiplier?: number; + download_throughput_kbps?: number; + upload_throughput_kbps?: number; + request_latency_ms?: number; +}; + +type ScenarioConfig = { + id: string; + description?: string; + url?: string; + waits?: WaitStep[]; + steps?: ScenarioStep[]; + metrics: MetricConfig[]; + selectors?: Record; + run_count?: number; +}; + +type PerfBudgetConfig = { + version: number; + defaults?: { + run_count?: number; + throttling?: ThrottlingConfig; + }; + scenarios: ScenarioConfig[]; +}; + +type ScenarioMetrics = Record; + +type RunMetrics = Record; + +type AggregatedMetric = { + id: string; + aggregation: string; + threshold: number; + unit: string; + value: number; + passed: boolean; + samples: number[]; +}; + +type ScenarioResult = { + id: string; + description?: string; + metrics: AggregatedMetric[]; +}; + +type JUnitTestCase = { + classname: string; + name: string; + time: string; + failure?: { message: string; details: string }; +}; + +type JUnitSuite = { + name: string; + tests: number; + failures: number; + time: string; + testcases: JUnitTestCase[]; +}; + +type JUnitReport = { + name: string; + tests: number; + failures: number; + time: string; + suites: JUnitSuite[]; +}; + +type Summary = { + scenarios: ScenarioResult[]; + passed: boolean; +}; + +const rootDir = path.resolve(__dirname, '../../..'); +const configPath = path.join(rootDir, '.perf-budget.yml'); +const resultsDir = path.join(rootDir, 'perf-results'); + +/** + * Load and validate the performance budget configuration from disk. + * + * @returns The parsed PerfBudgetConfig read from the configured config path + * @throws Error if the configuration is missing a top-level `scenarios` array + */ +function readConfig(): PerfBudgetConfig { + const file = fs.readFileSync(configPath, 'utf-8'); + const parsed = yaml.parse(file) as PerfBudgetConfig; + if (!parsed || !Array.isArray(parsed.scenarios)) { + throw new Error('Invalid perf budget configuration: missing scenarios'); + } + return parsed; +} + +/** + * Applies CPU and network emulation to the given browser context/page according to the provided throttling settings. + * + * When `throttling` is omitted, no emulation is applied. If `cpu_slowdown_multiplier` is a positive number, + * a CDP session is used to set the CPU throttling rate. If any of `download_throughput_kbps`, `upload_throughput_kbps`, + * or `request_latency_ms` are provided, network emulation is enabled and those values are applied (throughput values + * are converted from kbps to bytes/sec as required by the CDP). + * + * @param context - The Playwright BrowserContext to create a CDP session on. + * @param page - The Playwright Page associated with the context (used to bind the CDP session). + * @param throttling - Optional throttling parameters: + * - `cpu_slowdown_multiplier`: CPU slowdown multiplier (greater than 0 enables CPU throttling). + * - `download_throughput_kbps`: Download throughput in kilobits per second. + * - `upload_throughput_kbps`: Upload throughput in kilobits per second. + * - `request_latency_ms`: Additional request latency in milliseconds. + */ +async function applyThrottling(context: BrowserContext, page: Page, throttling?: ThrottlingConfig) { + if (!throttling) { + return; + } + const session = await context.newCDPSession(page); + if (typeof throttling.cpu_slowdown_multiplier === 'number' && throttling.cpu_slowdown_multiplier > 0) { + await session.send('Emulation.setCPUThrottlingRate', { rate: throttling.cpu_slowdown_multiplier }); + } + const download = throttling.download_throughput_kbps; + const upload = throttling.upload_throughput_kbps; + const latency = throttling.request_latency_ms; + if (download || upload || latency) { + await session.send('Network.enable'); + await session.send('Network.emulateNetworkConditions', { + offline: false, + latency: latency ?? 0, + downloadThroughput: download ? (download * 1024) / 8 : -1, + uploadThroughput: upload ? (upload * 1024) / 8 : -1, + }); + } +} + +/** + * Installs in-page performance observers that record LCP entries, cumulative layout shift, and total blocking time into a global store. + * + * Injects a global `__perfBudget` object on the target page and registers PerformanceObserver instances that populate: + * - `lcpEntries`: array of LCP PerformanceEntry objects + * - `cls`: cumulative layout shift value + * - `tbt`: accumulated total blocking time (ms) + * + * @param page - The Playwright `Page` to attach the observers to + */ +async function setupPerformanceObservers(page: Page) { + await page.addInitScript(() => { + const globalAny = globalThis as any; + globalAny.__perfBudget = { + lcpEntries: [] as PerformanceEntry[], + cls: 0, + tbt: 0, + }; + + if (globalAny.PerformanceObserver) { + try { + const lcpObserver = new PerformanceObserver((entryList) => { + const entries = entryList.getEntries(); + const store = globalAny.__perfBudget; + store.lcpEntries.push(...entries); + }); + lcpObserver.observe({ type: 'largest-contentful-paint', buffered: true }); + } catch (error) { + console.warn('LCP observer failed', error); + } + + try { + const clsObserver = new PerformanceObserver((entryList) => { + const store = globalAny.__perfBudget; + for (const entry of entryList.getEntries() as any[]) { + if (!entry.hadRecentInput) { + store.cls += entry.value; + } + } + }); + clsObserver.observe({ type: 'layout-shift', buffered: true }); + } catch (error) { + console.warn('CLS observer failed', error); + } + + try { + const longTaskObserver = new PerformanceObserver((entryList) => { + const store = globalAny.__perfBudget; + for (const entry of entryList.getEntries()) { + const blockingTime = entry.duration - 50; + if (blockingTime > 0) { + store.tbt += blockingTime; + } + } + }); + longTaskObserver.observe({ type: 'longtask' }); + } catch (error) { + console.warn('Long task observer failed', error); + } + } + }); +} + +/** + * Performs a configured wait step on the given page, supporting selector and network-idle waits. + * + * @param wait - Wait step configuration. If `type` is `"selector"`, waits for `selector` with a default timeout of 30000 ms unless `timeout_ms` is provided. If `type` is `"networkidle"`, waits for the page network to become idle with a default timeout of 60000 ms unless `timeout_ms` is provided; if `idle_ms` is set and greater than zero, waits an additional `idle_ms` milliseconds after network idle. + */ +async function performWait(page: Page, wait: WaitStep) { + if (wait.type === 'selector') { + await page.waitForSelector(wait.selector, { timeout: wait.timeout_ms ?? 30000 }); + } else if (wait.type === 'networkidle') { + await page.waitForLoadState('networkidle', { timeout: wait.timeout_ms ?? 60000 }); + if (wait.idle_ms && wait.idle_ms > 0) { + await page.waitForTimeout(wait.idle_ms); + } + } +} + +/** + * Execute a single scenario step against the provided Playwright page. + * + * Supports three step types: + * - `goto`: navigates the page to `step.url` and waits until the specified `step.wait_until` event (defaults to `load`) or navigation completes. + * - `wait_for_selector`: waits for the given `step.selector` to appear (optional `step.timeout_ms` in milliseconds). + * - `wait_for_timeout`: waits for `step.timeout_ms` milliseconds. + * + * @param page - The Playwright Page to operate on. + * @param step - The step configuration describing the action to perform. + */ +async function performScenarioStep(page: Page, step: ScenarioStep) { + if (step.type === 'goto') { + await page.goto(step.url, { waitUntil: step.wait_until ?? 'load', timeout: 60000 }); + } else if (step.type === 'wait_for_selector') { + await page.waitForSelector(step.selector, { timeout: step.timeout_ms ?? 30000 }); + } else if (step.type === 'wait_for_timeout') { + await page.waitForTimeout(step.timeout_ms); + } +} + +/** + * Runs a single performance scenario in a new browser context and returns collected runtime metrics. + * + * @param scenario - The scenario configuration to execute (URL or ordered steps, waits, and scenario id). + * @param browser - Playwright browser instance used to create an isolated context for the run. + * @param defaults - Default perf budget settings (e.g., throttling) applied to the run when present. + * @returns A `RunMetrics` map of metric identifier to numeric value or `null` when a measurement is not available. + * @throws Error if the provided scenario contains neither `url` nor `steps`. + */ +async function executeScenarioRun( + scenario: ScenarioConfig, + browser: Browser, + defaults: PerfBudgetConfig['defaults'], +): Promise { + const context = await browser.newContext(); + const page = await context.newPage(); + await setupPerformanceObservers(page); + await applyThrottling(context, page, defaults?.throttling); + + if (scenario.steps && scenario.steps.length > 0) { + for (const step of scenario.steps) { + await performScenarioStep(page, step); + } + } else if (scenario.url) { + await page.goto(scenario.url, { waitUntil: 'load', timeout: 60000 }); + if (scenario.waits) { + for (const wait of scenario.waits) { + await performWait(page, wait); + } + } + } else { + throw new Error(`Scenario ${scenario.id} missing url or steps`); + } + + // Give observers time to flush buffered events + await page.waitForTimeout(1000); + + const metrics = await page.evaluate(() => { + const navEntries = performance.getEntriesByType('navigation'); + const paints = performance.getEntriesByName('first-contentful-paint'); + const globalAny = globalThis as any; + const store = globalAny.__perfBudget || { lcpEntries: [], cls: 0, tbt: 0 }; + const lcpEntries = Array.isArray(store.lcpEntries) ? store.lcpEntries : []; + const lastNav = navEntries[navEntries.length - 1] as PerformanceNavigationTiming | undefined; + const fcp = paints.length > 0 ? paints[paints.length - 1].startTime : null; + const lcpEntry = lcpEntries.length > 0 ? lcpEntries[lcpEntries.length - 1] : null; + const lcp = lcpEntry ? (lcpEntry as any).startTime ?? (lcpEntry as any).renderTime ?? (lcpEntry as any).loadTime ?? null : null; + const cls = typeof store.cls === 'number' ? store.cls : null; + const tbt = typeof store.tbt === 'number' ? store.tbt : null; + + return { + 'navigation-duration': lastNav ? lastNav.duration : null, + 'first-contentful-paint': fcp, + 'largest-contentful-paint': lcp, + 'total-blocking-time': tbt, + 'cumulative-layout-shift': cls, + }; + }); + + await context.close(); + return metrics; +} + +/** + * Aggregate an array of per-run metric objects into buckets of numeric samples keyed by metric identifier. + * + * @param metrics - Array of run metric maps produced by individual scenario executions + * @returns A mapping from each metric id to an array of finite numeric samples collected across runs + */ +function collectScenarioMetrics(metrics: RunMetrics[]): ScenarioMetrics { + const result: ScenarioMetrics = {}; + for (const run of metrics) { + for (const [metricId, value] of Object.entries(run)) { + if (typeof value === 'number' && Number.isFinite(value)) { + if (!result[metricId]) { + result[metricId] = []; + } + result[metricId].push(value); + } + } + } + return result; +} + +/** + * Computes the requested percentile from a sorted numeric array. + * + * @param sorted - Array of numbers sorted in ascending order. + * @param percentileValue - Percentile to compute, between 0 and 1 inclusive (for example, `0.5` for the median). + * @returns The interpolated percentile value for `percentileValue`; `NaN` if `sorted` is empty. + */ +function percentile(sorted: number[], percentileValue: number): number { + if (sorted.length === 0) { + return NaN; + } + const index = (sorted.length - 1) * percentileValue; + const lower = Math.floor(index); + const upper = Math.ceil(index); + if (lower === upper) { + return sorted[lower]; + } + return sorted[lower] + (sorted[upper] - sorted[lower]) * (index - lower); +} + +/** + * Compute an aggregate statistic from a list of numeric samples. + * + * Supported aggregations: 'mean', 'median' (alias 'p50'), 'p75', 'p90', 'p95'. + * + * @param values - The numeric samples to aggregate + * @param aggregation - The aggregation method to apply + * @returns The aggregated numeric value; `NaN` if `values` is empty + * @throws Error if `aggregation` is not one of the supported methods + */ +function aggregate(values: number[], aggregation: Aggregation | string): number { + if (values.length === 0) { + return NaN; + } + const sorted = [...values].sort((a, b) => a - b); + switch (aggregation) { + case 'mean': + return values.reduce((sum, value) => sum + value, 0) / values.length; + case 'median': + case 'p50': + return percentile(sorted, 0.5); + case 'p75': + return percentile(sorted, 0.75); + case 'p90': + return percentile(sorted, 0.9); + case 'p95': + return percentile(sorted, 0.95); + default: + throw new Error(`Unsupported aggregation: ${aggregation}`); + } +} + +/** + * Builds a JUnit-compatible report representing scenario metric results and budget violations. + * + * @param results - Array of scenario results to include in the report + * @returns A JUnitReport containing one test suite per scenario; each metric is a test case and metrics that exceeded their thresholds are represented as failures + */ +function createJUnitReport(results: ScenarioResult[]): JUnitReport { + const suites: JUnitSuite[] = []; + let totalTests = 0; + let totalFailures = 0; + + for (const scenario of results) { + const testcases: JUnitTestCase[] = []; + for (const metric of scenario.metrics) { + const testcase: JUnitTestCase = { + classname: `perf.${scenario.id}`, + name: `${scenario.id} ${metric.id}`, + time: '0', + }; + if (!metric.passed) { + testcase.failure = { + message: `Budget exceeded for ${metric.id}`, + details: `Expected ${metric.aggregation} <= ${metric.threshold}${metric.unit}, observed ${metric.value.toFixed(2)}${metric.unit}`, + }; + totalFailures += 1; + } + testcases.push(testcase); + totalTests += 1; + } + suites.push({ + name: scenario.id, + tests: scenario.metrics.length, + failures: scenario.metrics.filter((metric) => !metric.passed).length, + time: '0', + testcases, + }); + } + + return { + name: 'perf-budget', + tests: totalTests, + failures: totalFailures, + time: '0', + suites, + }; +} + +/** + * Serialize a JUnitReport to XML and write it to the perf-results/perf-budget-junit.xml file. + * + * Creates the results directory if it does not exist before writing the file. + * + * @param report - The JUnit report object to serialize and persist + */ +function writeJUnitReport(report: JUnitReport) { + const xmlLines: string[] = []; + xmlLines.push(''); + xmlLines.push( + ``, + ); + for (const suite of report.suites) { + xmlLines.push( + ` `, + ); + for (const testcase of suite.testcases) { + xmlLines.push( + ` `, + ); + if (testcase.failure) { + xmlLines.push( + ` ${escapeXml(testcase.failure.details)}`, + ); + } + xmlLines.push(' '); + } + xmlLines.push(' '); + } + xmlLines.push(''); + + fs.mkdirSync(resultsDir, { recursive: true }); + fs.writeFileSync(path.join(resultsDir, 'perf-budget-junit.xml'), xmlLines.join('\n'), 'utf-8'); +} + +/** + * Escape XML special characters in a string for safe inclusion in XML. + * + * @param value - The string to escape + * @returns The input string with `&`, `"` , `<`, and `>` replaced by their XML entities (`&`, `"`, `<`, `>`) + */ +function escapeXml(value: string): string { + return value + .replace(/&/g, '&') + .replace(/"/g, '"') + .replace(//g, '>'); +} + +/** + * Persist the provided performance summary to perf-results/perf-budget-summary.json. + * + * Ensures the results directory exists and writes `summary` as pretty-printed JSON to the file. + * + * @param summary - The aggregated performance summary to save + */ +function writeSummary(summary: Summary) { + fs.mkdirSync(resultsDir, { recursive: true }); + fs.writeFileSync(path.join(resultsDir, 'perf-budget-summary.json'), JSON.stringify(summary, null, 2), 'utf-8'); +} + +/** + * Executes all configured performance scenarios, aggregates their metrics, and produces reports. + * + * Reads the performance budget configuration, runs each scenario the configured number of times, aggregates metric samples according to each metric's aggregation strategy, evaluates them against thresholds, writes a JSON summary and a JUnit XML report to the results directory, and sets a non-zero process exit code when any metric violates its threshold. + */ +async function run() { + const config = readConfig(); + const defaults = config.defaults ?? {}; + const browser = await chromium.launch({ headless: true }); + const scenarioResults: ScenarioResult[] = []; + let overallPassed = true; + + try { + for (const scenario of config.scenarios) { + const runCount = scenario.run_count ?? defaults.run_count ?? 1; + const runMetrics: RunMetrics[] = []; + for (let i = 0; i < runCount; i += 1) { + console.log(`Running scenario ${scenario.id} (${i + 1}/${runCount})`); + const metrics = await executeScenarioRun(scenario, browser, defaults); + runMetrics.push(metrics); + } + const scenarioValues = collectScenarioMetrics(runMetrics); + const aggregatedMetrics: AggregatedMetric[] = []; + for (const metricConfig of scenario.metrics) { + const values = scenarioValues[metricConfig.id]; + if (!values || values.length === 0) { + console.warn(`No samples collected for ${scenario.id} metric ${metricConfig.id}`); + continue; + } + const value = aggregate(values, metricConfig.aggregation); + const passed = value <= metricConfig.threshold; + if (!passed) { + overallPassed = false; + } + aggregatedMetrics.push({ + id: metricConfig.id, + aggregation: metricConfig.aggregation, + threshold: metricConfig.threshold, + unit: metricConfig.unit, + value, + passed, + samples: values, + }); + console.log( + `${scenario.id} ${metricConfig.id} ${metricConfig.aggregation}: ${value.toFixed(2)}${metricConfig.unit} (threshold ${metricConfig.threshold}${metricConfig.unit})`, + ); + } + scenarioResults.push({ + id: scenario.id, + description: scenario.description, + metrics: aggregatedMetrics, + }); + } + } finally { + await browser.close(); + } + + const summary: Summary = { + scenarios: scenarioResults, + passed: overallPassed, + }; + writeSummary(summary); + writeJUnitReport(createJUnitReport(scenarioResults)); + + if (!overallPassed) { + console.error('Performance budgets failed. See perf-results/perf-budget-summary.json for details.'); + process.exitCode = 1; + } else { + console.log('Performance budgets satisfied.'); + } +} + +run().catch((error) => { + console.error('Failed to execute performance budgets', error); + process.exitCode = 1; +}); \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index f60b23be..71ebe479 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -39,6 +39,7 @@ nav: - AI Engine: AI_ENGINE.md - GLB Asset Standard: GLB_ASSET_STANDARD.md - Template UI: template-preview.md + - Release Checklist: release-checklist.md plugins: - search - include-markdown diff --git a/perf-budget.yml b/perf-budget.yml deleted file mode 100644 index b9805519..00000000 --- a/perf-budget.yml +++ /dev/null @@ -1,43 +0,0 @@ -version: 1 -run_count: 3 -throttling: - profile: slow-4g - cpuSlowdownMultiplier: 4 - downloadThroughputKbps: 1500 - uploadThroughputKbps: 750 - requestLatencyMs: 40 -pages: - - id: configurator - url: http://localhost:3000/configurator - waits: - - type: selector - selector: "canvas" - timeout_ms: 60000 - - type: networkidle - idle_ms: 5000 - timeout_ms: 60000 - selectors: - viewer_canvas: "canvas" - generate_button: "button:has-text(\"Generate Quote\")" - price_total: "text=Total:" - metrics: - - id: first-contentful-paint - aggregation: p75 - threshold: 2000 - unit: ms - - id: largest-contentful-paint - aggregation: p75 - threshold: 3500 - unit: ms - - id: speed-index - aggregation: p75 - threshold: 3200 - unit: ms - - id: total-blocking-time - aggregation: p75 - threshold: 200 - unit: ms - - id: cumulative-layout-shift - aggregation: p75 - threshold: 0.1 - unit: score diff --git a/scripts/ci/check_canary_metrics.py b/scripts/ci/check_canary_metrics.py new file mode 100644 index 00000000..98f659fe --- /dev/null +++ b/scripts/ci/check_canary_metrics.py @@ -0,0 +1,347 @@ +#!/usr/bin/env python3 +"""Validate canary latency and error budgets using Prometheus/Tempo data.""" +from __future__ import annotations + +import json +import os +import sys +import time +import urllib.error +import urllib.parse +import urllib.request +from dataclasses import dataclass +from typing import Any, Dict, Optional + + +@dataclass +class CanaryMetrics: + latency_p95_ms: float + error_rate: float + trace_latency_p95_ms: Optional[float] = None + previous_latency_p95_ms: Optional[float] = None + previous_error_rate: Optional[float] = None + build: Optional[str] = None + previous_build: Optional[str] = None + generated_at: Optional[str] = None + + +class CanaryCheckError(RuntimeError): + """Raised when the canary check fails.""" + + +def _float_or_none(value: Any) -> Optional[float]: + """ + Convert the given value to a float, returning None if the value cannot be converted. + + Returns: + float_value (Optional[float]): The value converted to a `float`, or `None` if conversion raises `TypeError` or `ValueError`. + """ + try: + return float(value) + except (TypeError, ValueError): + return None + + +def _http_get_json(url: str, params: Optional[Dict[str, str]] = None) -> Dict[str, Any]: + """ + Fetches JSON from the given URL, optionally adding URL-encoded query parameters, and returns the parsed payload. + + Parameters: + url (str): The request URL or base endpoint. + params (Optional[Dict[str, str]]): Query parameters to URL-encode and append to the URL. + + Returns: + Dict[str, Any]: The parsed JSON response as a Python dictionary. + """ + query = f"{url}?{urllib.parse.urlencode(params)}" if params else url + with urllib.request.urlopen(query, timeout=10) as response: + return json.loads(response.read().decode("utf-8")) + + +def _extract_prom_value(payload: Dict[str, Any]) -> Optional[float]: + """ + Extracts a numeric sample value from a Prometheus-style query response. + + Parameters: + payload (dict): The JSON-decoded response from Prometheus' HTTP API. + + Returns: + float: The extracted numeric value when present. + None: If the response status is not "success", contains no results, or a numeric sample cannot be determined. + """ + if payload.get("status") != "success": + return None + data = payload.get("data", {}) + result = data.get("result", []) + if not result: + return None + first = result[0] + values = first.get("value") or first.get("values") + if isinstance(values, list): + sample = values[-1] + if isinstance(sample, list) and len(sample) >= 2: + return _float_or_none(sample[1]) + if isinstance(values, (int, float, str)): + return _float_or_none(values) + return None + + +def _query_prometheus(base_url: str, query: str) -> Optional[float]: + """ + Query a Prometheus instant query endpoint and return the numeric result if present. + + Returns: + float: Numeric value extracted from the Prometheus response, or `None` if the HTTP request failed or the response did not contain a usable numeric result. + """ + endpoint = f"{base_url.rstrip('/')}/api/v1/query" + try: + payload = _http_get_json(endpoint, {"query": query}) + except urllib.error.URLError as exc: # pragma: no cover - network failure is logged + print(f"Failed to query Prometheus: {exc}", file=sys.stderr) + return None + return _extract_prom_value(payload) + + +def _load_fixture(path: str) -> CanaryMetrics: + """ + Load canary metrics from a JSON fixture file. + + Parameters: + path (str): Filesystem path to a JSON fixture containing top-level keys + "current", "previous", "tempo", and "metadata". Missing numeric fields + default to 0.0 or None as appropriate. + + Returns: + CanaryMetrics: Instance populated from the fixture: + - latency_p95_ms and error_rate taken from `current`. + - trace_latency_p95_ms taken from `tempo`. + - previous_latency_p95_ms and previous_error_rate taken from `previous`. + - build, previous_build, and generated_at taken from `metadata`. + """ + with open(path, "r", encoding="utf-8") as handle: + payload = json.load(handle) + current = payload.get("current", {}) + previous = payload.get("previous", {}) + tempo = payload.get("tempo", {}) + metadata = payload.get("metadata", {}) + return CanaryMetrics( + latency_p95_ms=float(current.get("latency_p95_ms", 0.0)), + error_rate=float(current.get("error_rate", 0.0)), + trace_latency_p95_ms=_float_or_none(tempo.get("trace_latency_p95_ms")), + previous_latency_p95_ms=_float_or_none(previous.get("latency_p95_ms")), + previous_error_rate=_float_or_none(previous.get("error_rate")), + build=metadata.get("build"), + previous_build=metadata.get("previous_build"), + generated_at=metadata.get("generated_at"), + ) + + +def _collect_metrics_from_services() -> Optional[CanaryMetrics]: + """ + Collect canary metrics from Prometheus and Tempo based on environment configuration. + + Attempts to query Prometheus for current P95 latency and error rate and, when configured, previous-period metrics and Tempo trace P95 latency. Required environment variables for live collection are PROMETHEUS_URL, PROMETHEUS_LATENCY_QUERY, and PROMETHEUS_ERROR_QUERY. Optional environment variables: + - PROMETHEUS_PREVIOUS_LATENCY_QUERY, PROMETHEUS_PREVIOUS_ERROR_QUERY: queries for previous metrics. + - TEMPO_URL, TEMPO_TRACE_QUERY: Tempo search API and query for trace latency. + - BUILD_TAG or GITHUB_SHA: current build identifier. + - PREVIOUS_BUILD_TAG: previous build identifier. + + If Prometheus is unreachable, missing required configuration, or returns no data for the primary latency or error queries, the function returns None to signal that callers should fall back to fixture data. On success, returns a CanaryMetrics instance populated with collected values (including trace_latency_p95_ms when available), previous values when provided, build metadata, and a UTC ISO-like generated_at timestamp. + + Returns: + Optional[CanaryMetrics]: A populated CanaryMetrics object when live collection succeeds, or `None` when live data is unavailable and a fixture should be used. + """ + prom_url = os.environ.get("PROMETHEUS_URL") + latency_query = os.environ.get("PROMETHEUS_LATENCY_QUERY") + error_query = os.environ.get("PROMETHEUS_ERROR_QUERY") + previous_latency_query = os.environ.get("PROMETHEUS_PREVIOUS_LATENCY_QUERY") + previous_error_query = os.environ.get("PROMETHEUS_PREVIOUS_ERROR_QUERY") + + if not (prom_url and latency_query and error_query): + return None + + latency = _query_prometheus(prom_url, latency_query) + error_rate = _query_prometheus(prom_url, error_query) + + if latency is None or error_rate is None: + print( + "Prometheus query returned no data for latency or error metrics; " + "falling back to fixture", + file=sys.stderr, + ) + return None + previous_latency = ( + _query_prometheus(prom_url, previous_latency_query) + if previous_latency_query + else None + ) + previous_error = ( + _query_prometheus(prom_url, previous_error_query) + if previous_error_query + else None + ) + + tempo_url = os.environ.get("TEMPO_URL") + trace_latency_query = os.environ.get("TEMPO_TRACE_QUERY") + trace_latency = None + if tempo_url and trace_latency_query: + try: + payload = _http_get_json( + f"{tempo_url.rstrip('/')}/api/search", {"q": trace_latency_query} + ) + trace_latency = _extract_prom_value(payload) + except urllib.error.URLError as exc: + print(f"Failed to query Tempo: {exc}", file=sys.stderr) + + build_tag = os.environ.get("BUILD_TAG") or os.environ.get("GITHUB_SHA") + previous_build = os.environ.get("PREVIOUS_BUILD_TAG") + + return CanaryMetrics( + latency_p95_ms=float(latency), + error_rate=float(error_rate), + trace_latency_p95_ms=trace_latency, + previous_latency_p95_ms=_float_or_none(previous_latency), + previous_error_rate=_float_or_none(previous_error), + build=build_tag, + previous_build=previous_build, + generated_at=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + ) + + +def _load_metrics() -> CanaryMetrics: + """ + Load canary metrics from configured services, falling back to a JSON fixture when live collection is unavailable. + + If environment and service queries provide metrics, those are returned; otherwise the fixture path from CANARY_METRICS_FIXTURE (or the default "tests/perf/canary-metrics.fixture.json") is used and its contents are returned. The chosen fixture path is printed when the fallback is used. + + Returns: + CanaryMetrics: Collected canary metrics and metadata, sourced from live services when available or from the fixture otherwise. + """ + metrics = _collect_metrics_from_services() + if metrics: + return metrics + + fixture_path = os.environ.get("CANARY_METRICS_FIXTURE", "tests/perf/canary-metrics.fixture.json") + print(f"Using canary metrics fixture: {fixture_path}") + return _load_fixture(fixture_path) + + +def _write_summary(metrics: CanaryMetrics, passed: bool) -> None: + """ + Write a JSON summary of the provided canary metrics and pass/fail result to perf-results/canary-metrics-summary.json. + + Parameters: + metrics (CanaryMetrics): Collected canary metrics and metadata to include in the summary. + passed (bool): Whether the canary checks passed; written as the `passed` field in the summary. + """ + summary_path = os.path.join("perf-results", "canary-metrics-summary.json") + os.makedirs(os.path.dirname(summary_path), exist_ok=True) + payload = { + "latency_p95_ms": metrics.latency_p95_ms, + "error_rate": metrics.error_rate, + "trace_latency_p95_ms": metrics.trace_latency_p95_ms, + "previous_latency_p95_ms": metrics.previous_latency_p95_ms, + "previous_error_rate": metrics.previous_error_rate, + "build": metrics.build, + "previous_build": metrics.previous_build, + "generated_at": metrics.generated_at, + "passed": passed, + } + with open(summary_path, "w", encoding="utf-8") as handle: + json.dump(payload, handle, indent=2) + + +def _evaluate(metrics: CanaryMetrics) -> None: + """ + Validate the provided canary metrics against configured thresholds and raise on any violations. + + Checks performed: + - P95 latency exceeds P95_THRESHOLD_MS (default 3000). + - Error rate exceeds ERROR_RATE_THRESHOLD (default 0.02). + - Latency regression relative to previous_latency_p95_ms exceeds REGRESSION_TOLERANCE_PCT (default 0.1). + - Error rate regression relative to previous_error_rate exceeds REGRESSION_TOLERANCE_PCT. + + Parameters: + metrics (CanaryMetrics): Current canary metrics; when present, previous_latency_p95_ms and previous_error_rate are used for regression checks. + + Raises: + CanaryCheckError: If any threshold or regression check fails. The exception message contains a semicolon-separated list of failure descriptions. + """ + latency_budget = float(os.environ.get("P95_THRESHOLD_MS", 3000)) + error_budget = float(os.environ.get("ERROR_RATE_THRESHOLD", 0.02)) + regression_tolerance_pct = float(os.environ.get("REGRESSION_TOLERANCE_PCT", 0.1)) + + failures = [] + + if metrics.latency_p95_ms > latency_budget: + failures.append( + f"P95 latency {metrics.latency_p95_ms:.2f}ms exceeds budget {latency_budget:.2f}ms" + ) + + if metrics.error_rate > error_budget: + failures.append( + f"Error rate {metrics.error_rate:.4f} exceeds budget {error_budget:.4f}" + ) + + if ( + metrics.previous_latency_p95_ms is not None + and metrics.latency_p95_ms > metrics.previous_latency_p95_ms * (1 + regression_tolerance_pct) + ): + failures.append( + "P95 latency regression compared to previous build: " + f"{metrics.latency_p95_ms:.2f}ms vs {metrics.previous_latency_p95_ms:.2f}ms" + ) + + if ( + metrics.previous_error_rate is not None + and metrics.error_rate > metrics.previous_error_rate * (1 + regression_tolerance_pct) + ): + failures.append( + "Error rate regression compared to previous build: " + f"{metrics.error_rate:.4f} vs {metrics.previous_error_rate:.4f}" + ) + + if failures: + joined = "; ".join(failures) + raise CanaryCheckError(joined) + + +def main() -> int: + """ + Run the canary metric validation flow, emit a human-readable summary, write a pass/fail summary file, and return an exit code. + + The function loads metrics, evaluates them against configured thresholds, prints status and comparisons to stdout/stderr, writes a JSON summary file indicating pass or fail, and exits with a code appropriate to the result. + + Returns: + int: `0` if all checks pass, `1` if any check fails. + """ + metrics = _load_metrics() + try: + _evaluate(metrics) + except CanaryCheckError as exc: + print(f"Canary metrics check failed: {exc}", file=sys.stderr) + _write_summary(metrics, passed=False) + return 1 + + print( + "Canary metrics within thresholds. " + f"latency_p95_ms={metrics.latency_p95_ms:.2f}, " + f"error_rate={metrics.error_rate:.4f}" + ) + if metrics.trace_latency_p95_ms is not None: + print(f"Tempo trace P95 latency: {metrics.trace_latency_p95_ms:.2f}ms") + comparison_parts = [] + if metrics.previous_latency_p95_ms is not None: + comparison_parts.append( + f"latency_p95_ms={metrics.previous_latency_p95_ms:.2f}" + ) + if metrics.previous_error_rate is not None: + comparison_parts.append(f"error_rate={metrics.previous_error_rate:.4f}") + if comparison_parts: + print("Compared to previous build: " + ", ".join(comparison_parts)) + _write_summary(metrics, passed=True) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/tests/perf/canary-metrics.fixture.json b/tests/perf/canary-metrics.fixture.json new file mode 100644 index 00000000..0b929e54 --- /dev/null +++ b/tests/perf/canary-metrics.fixture.json @@ -0,0 +1,18 @@ +{ + "current": { + "latency_p95_ms": 2750, + "error_rate": 0.012 + }, + "previous": { + "latency_p95_ms": 2900, + "error_rate": 0.015 + }, + "tempo": { + "trace_latency_p95_ms": 2600 + }, + "metadata": { + "build": "fixture", + "previous_build": "fixture-previous", + "generated_at": "2024-01-01T00:00:00Z" + } +}