From da2ecb7d3823f7193313ddc5ebf354fb00f33683 Mon Sep 17 00:00:00 2001
From: Shayan <shayan@coin.link>
Date: Thu, 16 Oct 2025 11:05:03 -0700
Subject: [PATCH 1/2] Add CI performance budgets with observability gating

---
 .github/workflows/ci.yml                    | 145 ++++++++++
 docs/release-checklist.md                   |  34 +++
 frontend/package-lock.json                  |   9 +
 frontend/package.json                       |   4 +-
 frontend/tests/e2e/perf/perf-budget.spec.ts | 277 ++++++++++++++++++++
 mkdocs.yml                                  |   1 +
 perf-budget.yml                             |  18 ++
 scripts/ci/__init__.py                      |   1 +
 scripts/ci/check_canary_budgets.py          | 201 ++++++++++++++
 scripts/publish-perf-metrics.mjs            | 159 +++++++++++
 10 files changed, 848 insertions(+), 1 deletion(-)
 create mode 100644 docs/release-checklist.md
 create mode 100644 frontend/tests/e2e/perf/perf-budget.spec.ts
 create mode 100644 scripts/ci/__init__.py
 create mode 100644 scripts/ci/check_canary_budgets.py
 create mode 100644 scripts/publish-perf-metrics.mjs

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1c3b8368..6516dc46 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -102,3 +102,148 @@ jobs:
 
     - name: Build
       run: npm run build
+
+  perf-budget:
+    runs-on: ubuntu-latest
+    needs:
+      - backend-tests
+      - frontend-tests
+    timeout-minutes: 30
+    steps:
+    - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+
+    - name: Set up Node.js
+      uses: actions/setup-node@0a44ba78451273a1ed8ac2fee4e347c72dfd377f
+      with:
+        node-version: '20'
+        cache: 'npm'
+        cache-dependency-path: ./frontend/package-lock.json
+
+    - name: Install Playwright dependencies
+      run: |
+        cd frontend
+        npm ci
+        npx playwright install --with-deps
+
+    - name: Create development environment file
+      run: |
+        cat <<'ENV' > .env.development
+        PROJECT_NAME=paform
+        FRONTEND_PORT=3000
+        BACKEND_PORT=8000
+        BACKEND_HOST=backend-dev
+        HYGRAPH_WEBHOOK_SECRET=ci-placeholder
+        DATABASE_URL=sqlite:///./paform.db
+        ENV
+
+    - name: Start application stack
+      run: docker compose --env-file .env.development -f docker-compose.dev.yml up -d --build
+
+    - name: Wait for API
+      run: |
+        for i in {1..60}; do curl -sf http://localhost:8000/healthcheck && break || sleep 2; done
+
+    - name: Wait for Frontend
+      run: |
+        for i in {1..60}; do curl -sf http://localhost:3000/models/manifest.json && break || sleep 2; done
+
+    - name: Seed backend fixtures
+      env:
+        BASE_URL: http://localhost:8000
+      run: |
+        python - <<'PY'
+import json
+import os
+import urllib.error
+import urllib.request
+
+BASE_URL = os.environ.get("BASE_URL", "http://localhost:8000")
+
+def post(path: str, payload: dict) -> dict:
+    req = urllib.request.Request(
+        f"{BASE_URL}{path}",
+        data=json.dumps(payload).encode("utf-8"),
+        headers={"Content-Type": "application/json"},
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=10) as resp:
+            return json.loads(resp.read().decode("utf-8"))
+    except urllib.error.HTTPError as exc:
+        detail = exc.read().decode("utf-8", "ignore")
+        raise SystemExit(f"Seed request failed ({exc.code}): {detail}")
+
+material = post(
+    "/api/materials/",
+    {"name": "Walnut", "texture_url": None, "cost_per_sq_ft": 12.5},
+)
+material_id = material.get("id")
+if not material_id:
+    raise SystemExit("Material creation failed; missing id")
+
+post(
+    "/api/modules/",
+    {
+        "name": "Base600",
+        "width": 600.0,
+        "height": 720.0,
+        "depth": 580.0,
+        "base_price": 100.0,
+        "material_id": material_id,
+    },
+)
+PY
+
+    - name: Run Playwright performance budget
+      env:
+        PERF_BUDGET_FILE: ../perf-budget.yml
+        PERF_RESULTS_DIR: ../perf-results
+      run: |
+        mkdir -p perf-results
+        cd frontend
+        npx playwright test tests/e2e/perf/perf-budget.spec.ts --reporter=junit,line --output=playwright-report
+
+    - name: Publish performance metrics
+      if: always()
+      env:
+        PERF_RESULTS_DIR: perf-results
+        PUSHGATEWAY_URL: ${{ secrets.PUSHGATEWAY_URL }}
+        PUSHGATEWAY_JOB: ci-performance-budget
+        GITHUB_SHA: ${{ github.sha }}
+        GITHUB_REF: ${{ github.ref }}
+      run: |
+        node scripts/publish-perf-metrics.mjs
+
+    - name: Upload performance artifacts
+      if: always()
+      uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
+      with:
+        name: perf-budget-artifacts
+        path: |
+          perf-results
+          frontend/playwright-report
+
+    - name: Shutdown application stack
+      if: always()
+      run: docker compose --env-file .env.development -f docker-compose.dev.yml down
+
+  observability-budgets:
+    runs-on: ubuntu-latest
+    needs: perf-budget
+    steps:
+    - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+
+    - name: Evaluate canary latency and error budgets
+      env:
+        PROMETHEUS_URL: ${{ secrets.PROMETHEUS_URL }}
+        PROMETHEUS_LATENCY_QUERY: ${{ vars.PROMETHEUS_LATENCY_QUERY }}
+        PROMETHEUS_ERROR_RATE_QUERY: ${{ vars.PROMETHEUS_ERROR_RATE_QUERY }}
+        LATENCY_BUDGET_MS: 3000
+        ERROR_RATE_BUDGET: 0.02
+        REGRESSION_TOLERANCE: 0.15
+        BASELINE_OFFSET_SECONDS: 3600
+        LATENCY_UNIT_SCALE: 1000
+        TEMPO_URL: ${{ secrets.TEMPO_URL }}
+        TEMPO_QUERY: ${{ secrets.TEMPO_QUERY }}
+        TEMPO_DURATION_BUDGET_MS: 4000
+      run: |
+        python3 scripts/ci/check_canary_budgets.py
diff --git a/docs/release-checklist.md b/docs/release-checklist.md
new file mode 100644
index 00000000..f41ed6c2
--- /dev/null
+++ b/docs/release-checklist.md
@@ -0,0 +1,34 @@
+# Release Checklist
+
+This checklist captures the operational gates that must be satisfied before promoting a build to production. It ties runtime telemetry, CI results, and preview deployments together so on-call responders have a consistent view of system health.
+
+## 1. Verify CI performance budgets
+
+1. Inspect the **perf-budget** job in the CI workflow. Confirm the Playwright run stores the latest navigation and LCP percentiles in the `perf-results` artifact or that they appear in Grafana when the Pushgateway integration is configured.
+2. Ensure the homepage → configurator journey meets the navigation (P90 < 3s) and Largest Contentful Paint (P95 < 4s) budgets. Any regression should block the release until a remediation plan is documented.
+3. Cross check the JUnit report (or Pushgateway metrics) against historical trends before approving the release branch merge.
+
+## 2. Review canary latency and error budgets
+
+1. Open the **observability-budgets** job logs for the Prometheus/Tempo regression check. Confirm the P95 API latency and error-rate budgets are below their configured thresholds and have not regressed more than the allowed tolerance from the previous build.
+2. If the job fails, review the Prometheus dashboards for the affected service and either mitigate or roll back before continuing the release.
+
+## 3. Validate Grafana alerts and on-call notifications
+
+1. Check that the Grafana dashboard for the configurator experience displays the latest CI metrics (either from the Pushgateway metrics or the uploaded JUnit results).
+2. Confirm Grafana alert rules reference the same Prometheus queries used in CI and that alert routing targets the active on-call channel (PagerDuty, Opsgenie, or Slack On-call). Perform a synthetic alert test each quarter to verify paging works end-to-end.
+3. Update the on-call schedule if coverage changed since the previous release.
+
+## 4. Preview environment gating
+
+1. Before cutting a release tag, verify that the preview deployment (staging or review app) is healthy:
+   - Synthetic navigation checks load within the CI-defined budgets.
+   - Error budgets are green in Grafana/Prometheus for the staging namespace.
+2. Confirm preview environment alerts route to the same on-call channel so responders see issues ahead of production rollout.
+3. Capture a short Loom or screenshot walkthrough of the configurator flow and attach it to the release ticket to document the state of the UI prior to launch.
+
+## 5. Final approval
+
+1. Ensure all checklist items above are marked complete in the release issue template.
+2. Obtain sign-off from the engineering lead (performance budgets) and on-call lead (alert routing) before performing the production deploy.
+3. After deployment, monitor Grafana dashboards for at least one full canary window to confirm runtime telemetry stays within budget. If alerts fire, follow the incident response playbook and update the release retrospective.
diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index 99d5439c..47cbf0a3 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -28,6 +28,7 @@
         "@testing-library/react": "^15.0.7",
         "@testing-library/user-event": "^14.5.2",
         "@types/jest": "^29.5.12",
+        "@types/js-yaml": "^4.0.9",
         "@types/node": "^20",
         "@types/react": "^18",
         "@types/react-dom": "^18",
@@ -37,6 +38,7 @@
         "gltfpack": "0.25.0",
         "jest": "^29.7.0",
         "jest-environment-jsdom": "^29.7.0",
+        "js-yaml": "^4.1.0",
         "meshoptimizer": "0.25.0",
         "postcss": "^8",
         "tailwindcss": "^3.3.0",
@@ -3180,6 +3182,13 @@
         "pretty-format": "^29.0.0"
       }
     },
+    "node_modules/@types/js-yaml": {
+      "version": "4.0.9",
+      "resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
+      "integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/@types/jsdom": {
       "version": "20.0.1",
       "resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-20.0.1.tgz",
diff --git a/frontend/package.json b/frontend/package.json
index c37705e1..1320587a 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -39,6 +39,7 @@
     "@types/node": "^20",
     "@types/react": "^18",
     "@types/react-dom": "^18",
+    "@types/js-yaml": "^4.0.9",
     "autoprefixer": "^10.0.1",
     "eslint": "^8",
     "eslint-config-next": "14.2.0",
@@ -54,7 +55,8 @@
     "meshoptimizer": "0.25.0",
     "vitest": "^1.6.0",
     "@playwright/test": "^1.56.0",
-    "ts-node": "^10.9.2"
+    "ts-node": "^10.9.2",
+    "js-yaml": "^4.1.0"
   },
   "jest": {
     "setupFilesAfterEnv": [
diff --git a/frontend/tests/e2e/perf/perf-budget.spec.ts b/frontend/tests/e2e/perf/perf-budget.spec.ts
new file mode 100644
index 00000000..0c9e0a06
--- /dev/null
+++ b/frontend/tests/e2e/perf/perf-budget.spec.ts
@@ -0,0 +1,277 @@
+import { expect, test } from '@playwright/test';
+import type { Browser } from '@playwright/test';
+import fs from 'node:fs';
+import path from 'node:path';
+import yaml from 'js-yaml';
+
+type WaitConfig = {
+  type: 'selector' | 'networkidle';
+  selector?: string;
+  timeout_ms?: number;
+  idle_ms?: number;
+};
+
+type MetricConfig = {
+  id: string;
+  aggregation: string;
+  threshold: number;
+  unit: string;
+};
+
+type JourneyConfig = {
+  id: string;
+  description?: string;
+  from_url: string;
+  to_url: string;
+  waits?: WaitConfig[];
+  metrics?: MetricConfig[];
+};
+
+type PerfBudgetConfig = {
+  run_count?: number;
+  journeys?: JourneyConfig[];
+};
+
+type JourneyMetricSummary = {
+  quantile: string;
+  value_ms: number;
+  threshold_ms?: number;
+  passed: boolean;
+};
+
+type JourneyResult = {
+  id: string;
+  description?: string;
+  runs: Array<{
+    navigation_duration_ms: number;
+    largest_contentful_paint_ms: number;
+  }>;
+  aggregated: {
+    navigation_duration?: JourneyMetricSummary;
+    largest_contentful_paint?: JourneyMetricSummary;
+  };
+};
+
+const percentile = (values: number[], percentileValue: number): number => {
+  if (!values.length) {
+    return 0;
+  }
+  const sorted = [...values].sort((a, b) => a - b);
+  const rank = (sorted.length - 1) * percentileValue;
+  const lowerIndex = Math.floor(rank);
+  const upperIndex = Math.ceil(rank);
+  if (lowerIndex === upperIndex) {
+    return sorted[lowerIndex];
+  }
+  const weight = rank - lowerIndex;
+  return sorted[lowerIndex] + (sorted[upperIndex] - sorted[lowerIndex]) * weight;
+};
+
+const loadPerfBudget = (): PerfBudgetConfig => {
+  const configPath = process.env.PERF_BUDGET_FILE
+    ? path.resolve(process.cwd(), process.env.PERF_BUDGET_FILE)
+    : path.resolve(__dirname, '../../../../perf-budget.yml');
+
+  const raw = fs.readFileSync(configPath, 'utf8');
+  return yaml.load(raw) as PerfBudgetConfig;
+};
+
+const toQuantile = (aggregation: string): number | undefined => {
+  const match = /^p(\d{1,2})$/i.exec(aggregation.trim());
+  if (!match) {
+    return undefined;
+  }
+  const percentileNumber = Number(match[1]);
+  return percentileNumber / 100;
+};
+
+const ensureResultsDir = (): string => {
+  const dir = process.env.PERF_RESULTS_DIR
+    ? path.resolve(process.cwd(), process.env.PERF_RESULTS_DIR)
+    : path.resolve(__dirname, '../../../../perf-results');
+  fs.mkdirSync(dir, { recursive: true });
+  return dir;
+};
+
+const writeJourneyResult = (dir: string, result: JourneyResult): void => {
+  const filePath = path.join(dir, `${result.id}.json`);
+  fs.writeFileSync(filePath, JSON.stringify(result, null, 2), 'utf8');
+};
+
+const attachJourneyResult = async (result: JourneyResult): Promise<void> => {
+  await test.info().attach(`${result.id}-metrics.json`, {
+    body: Buffer.from(JSON.stringify(result, null, 2)),
+    contentType: 'application/json',
+  });
+};
+
+const collectNavigationMetrics = async (
+  browser: Browser,
+  fromUrl: string,
+  toUrl: string,
+  waits: WaitConfig[] | undefined,
+): Promise<{ navigation: number; lcp: number }> => {
+  const context = await browser.newContext();
+
+  await context.addInitScript(() => {
+    (window as typeof window & {
+      __lcpEntries?: PerformanceEntry[];
+      __lcpObserver?: PerformanceObserver;
+    }).__lcpEntries = [];
+  });
+
+  const page = await context.newPage();
+  await page.goto(fromUrl, { waitUntil: 'networkidle' });
+
+  await page.evaluate(() => {
+    const globalWindow = window as typeof window & {
+      __lcpEntries?: PerformanceEntry[];
+      __lcpObserver?: PerformanceObserver;
+    };
+
+    globalWindow.__lcpEntries = [];
+    if (globalWindow.__lcpObserver) {
+      globalWindow.__lcpObserver.disconnect();
+    }
+    globalWindow.__lcpObserver = new PerformanceObserver((entryList) => {
+      const entries = entryList.getEntries();
+      globalWindow.__lcpEntries = [
+        ...(globalWindow.__lcpEntries ?? []),
+        ...entries,
+      ];
+    });
+    globalWindow.__lcpObserver.observe({ type: 'largest-contentful-paint', buffered: true });
+
+    performance.clearResourceTimings();
+    performance.clearMeasures();
+    performance.clearMarks();
+  });
+
+  await page.goto(toUrl, { waitUntil: 'networkidle' });
+
+  if (waits) {
+    for (const wait of waits) {
+      if (wait.type === 'selector' && wait.selector) {
+        await page.waitForSelector(wait.selector, {
+          state: 'attached',
+          timeout: wait.timeout_ms ?? 30_000,
+        });
+      }
+      if (wait.type === 'networkidle') {
+        await page.waitForLoadState('networkidle', { timeout: wait.timeout_ms ?? 30_000 });
+        if (wait.idle_ms) {
+          await page.waitForTimeout(wait.idle_ms);
+        }
+      }
+    }
+  }
+
+  await page.waitForTimeout(500);
+
+  const navigationDuration = await page.evaluate(() => {
+    const entries = performance.getEntriesByType('navigation');
+    const last = entries[entries.length - 1] as PerformanceNavigationTiming | undefined;
+    return last ? last.duration : Number.NaN;
+  });
+
+  const lcp = await page.evaluate(() => {
+    const globalWindow = window as typeof window & {
+      __lcpEntries?: PerformanceEntry[];
+    };
+    const entries = (globalWindow.__lcpEntries ?? []) as Array<PerformanceEntry & { renderTime?: number; loadTime?: number; startTime: number }>;
+    if (!entries.length) {
+      return Number.NaN;
+    }
+    return entries.reduce((acc, entry) => {
+      const candidate = entry.startTime ?? entry.renderTime ?? entry.loadTime ?? 0;
+      return Math.max(acc, candidate);
+    }, 0);
+  });
+
+  await context.close();
+
+  return { navigation: navigationDuration, lcp };
+};
+
+const perfConfig = loadPerfBudget();
+const runCount = perfConfig.run_count ?? 3;
+const resultsDir = ensureResultsDir();
+
+test.describe('performance budget journeys', () => {
+  if (!perfConfig.journeys || perfConfig.journeys.length === 0) {
+    test('no journeys defined', async () => {
+      test.skip(true, 'No journeys defined in perf budget');
+    });
+    return;
+  }
+
+  for (const journey of perfConfig.journeys) {
+    test(journey.id, async () => {
+      const runs: Array<{ navigation_duration_ms: number; largest_contentful_paint_ms: number }> = [];
+
+      for (let iteration = 0; iteration < runCount; iteration += 1) {
+        const metrics = await collectNavigationMetrics(browser, journey.from_url, journey.to_url, journey.waits);
+        runs.push({
+          navigation_duration_ms: metrics.navigation,
+          largest_contentful_paint_ms: metrics.lcp,
+        });
+      }
+
+      const navigationValues = runs.map((run) => run.navigation_duration_ms).filter((value) => Number.isFinite(value));
+      const lcpValues = runs.map((run) => run.largest_contentful_paint_ms).filter((value) => Number.isFinite(value));
+
+      const aggregated: JourneyResult['aggregated'] = {};
+
+      const metricConfigById = new Map<string, MetricConfig>();
+      for (const metric of journey.metrics ?? []) {
+        metricConfigById.set(metric.id, metric);
+      }
+
+      if (navigationValues.length) {
+        const metricConfig = metricConfigById.get('navigation-duration');
+        const quantile = metricConfig ? toQuantile(metricConfig.aggregation) : 0.9;
+        const value = percentile(navigationValues, quantile ?? 0.9);
+        aggregated.navigation_duration = {
+          quantile: metricConfig ? metricConfig.aggregation.toLowerCase() : 'p90',
+          value_ms: value,
+          threshold_ms: metricConfig?.threshold,
+          passed: metricConfig ? value <= metricConfig.threshold : true,
+        };
+      }
+
+      if (lcpValues.length) {
+        const metricConfig = metricConfigById.get('largest-contentful-paint');
+        const quantile = metricConfig ? toQuantile(metricConfig.aggregation) : 0.95;
+        const value = percentile(lcpValues, quantile ?? 0.95);
+        aggregated.largest_contentful_paint = {
+          quantile: metricConfig ? metricConfig.aggregation.toLowerCase() : 'p95',
+          value_ms: value,
+          threshold_ms: metricConfig?.threshold,
+          passed: metricConfig ? value <= metricConfig.threshold : true,
+        };
+      }
+
+      const result: JourneyResult = {
+        id: journey.id,
+        description: journey.description,
+        runs,
+        aggregated,
+      };
+
+      writeJourneyResult(resultsDir, result);
+      await attachJourneyResult(result);
+
+      if (aggregated.navigation_duration && aggregated.navigation_duration.threshold_ms !== undefined) {
+        expect(aggregated.navigation_duration.value_ms, 'homepage to configurator navigation P90 should remain under budget').toBeLessThanOrEqual(
+          aggregated.navigation_duration.threshold_ms,
+        );
+      }
+
+      if (aggregated.largest_contentful_paint && aggregated.largest_contentful_paint.threshold_ms !== undefined) {
+        expect(aggregated.largest_contentful_paint.value_ms, 'homepage to configurator LCP P95 should remain under budget').toBeLessThanOrEqual(
+          aggregated.largest_contentful_paint.threshold_ms,
+        );
+      }
+    });
+  }
+});
diff --git a/mkdocs.yml b/mkdocs.yml
index f60b23be..3043dcf4 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -33,6 +33,7 @@ nav:
   - Getting Started: getting-started.md
   - Backend Development: backend-development.md
   - Frontend Development: frontend-development.md
+  - Release Checklist: release-checklist.md
   - Docker and Deployment: docker-deployment.md
   - Architecture: ARCHITECTURE.md
   - API Spec: API_SPEC.md
diff --git a/perf-budget.yml b/perf-budget.yml
index b9805519..f04b875c 100644
--- a/perf-budget.yml
+++ b/perf-budget.yml
@@ -41,3 +41,21 @@ pages:
         aggregation: p75
         threshold: 0.1
         unit: score
+journeys:
+  - id: homepage-to-configurator
+    description: "Navigate from the marketing homepage to the configurator experience"
+    from_url: http://localhost:3000/
+    to_url: http://localhost:3000/configurator
+    waits:
+      - type: selector
+        selector: "canvas"
+        timeout_ms: 60000
+    metrics:
+      - id: navigation-duration
+        aggregation: p90
+        threshold: 3000
+        unit: ms
+      - id: largest-contentful-paint
+        aggregation: p95
+        threshold: 4000
+        unit: ms
diff --git a/scripts/ci/__init__.py b/scripts/ci/__init__.py
new file mode 100644
index 00000000..aaa6e9b5
--- /dev/null
+++ b/scripts/ci/__init__.py
@@ -0,0 +1 @@
+"""CI helper scripts for observability automation."""
diff --git a/scripts/ci/check_canary_budgets.py b/scripts/ci/check_canary_budgets.py
new file mode 100644
index 00000000..bbbfc72b
--- /dev/null
+++ b/scripts/ci/check_canary_budgets.py
@@ -0,0 +1,201 @@
+"""Validate canary latency and error budgets using Prometheus and Tempo."""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass
+class BudgetResult:
+    """Hold the evaluation result for a particular budget."""
+
+    name: str
+    current: float
+    previous: Optional[float]
+    threshold: float
+    unit: str
+    passed: bool
+    regression: Optional[float] = None
+
+    def to_line(self) -> str:
+        previous_part = f", previous={self.previous:.4f}{self.unit}" if self.previous is not None else ""
+        regression_part = f", regression={self.regression:.2%}" if self.regression is not None else ""
+        status = "OK" if self.passed else "FAIL"
+        return (
+            f"[{status}] {self.name}: current={self.current:.4f}{self.unit}{previous_part}"
+            f" (threshold={self.threshold:.4f}{self.unit}{regression_part})"
+        )
+
+
+def _env(name: str, default: Optional[str] = None) -> Optional[str]:
+    value = os.getenv(name)
+    return value if value not in (None, "") else default
+
+
+def query_prometheus(base_url: str, query: str, timestamp: float) -> float:
+    """Execute an instant Prometheus query and return the first scalar value."""
+
+    encoded_query = urllib.parse.urlencode({"query": query, "time": f"{timestamp:.3f}"})
+    url = f"{base_url.rstrip('/')}/api/v1/query?{encoded_query}"
+    try:
+        with urllib.request.urlopen(url, timeout=15) as response:
+            payload = json.loads(response.read().decode("utf-8"))
+    except urllib.error.URLError as exc:  # pragma: no cover - network errors
+        raise SystemExit(f"Failed to query Prometheus at {url}: {exc}")
+
+    if payload.get("status") != "success":
+        raise SystemExit(f"Prometheus query failed: {payload}")
+
+    results = payload.get("data", {}).get("result", [])
+    if not results:
+        raise SystemExit(f"Prometheus query returned no data for {query}")
+
+    value = results[0].get("value")
+    if not value or len(value) < 2:
+        raise SystemExit(f"Prometheus result malformed: {results[0]}")
+
+    return float(value[1])
+
+
+def evaluate_budget(
+    *,
+    name: str,
+    base_url: str,
+    query: str,
+    unit_scale: float,
+    threshold: float,
+    regression_tolerance: float,
+    baseline_offset_seconds: float,
+    unit_label: str,
+) -> BudgetResult:
+    now = time.time()
+    current_value_raw = query_prometheus(base_url, query, now)
+    previous_value_raw = query_prometheus(base_url, query, now - baseline_offset_seconds)
+
+    current_value = current_value_raw * unit_scale
+    previous_value = previous_value_raw * unit_scale
+
+    regression_ratio = (current_value - previous_value) / previous_value if previous_value else None
+
+    within_threshold = current_value <= threshold
+    within_regression = (
+        regression_ratio is None or regression_ratio <= regression_tolerance
+    )
+
+    return BudgetResult(
+        name=name,
+        current=current_value,
+        previous=previous_value,
+        threshold=threshold,
+        unit=unit_label,
+        passed=within_threshold and within_regression,
+        regression=regression_ratio,
+    )
+
+
+def query_tempo(base_url: str, query_json: str) -> dict:
+    data = query_json.encode("utf-8")
+    request = urllib.request.Request(
+        f"{base_url.rstrip('/')}/api/search", data=data, headers={"Content-Type": "application/json"}
+    )
+    try:
+        with urllib.request.urlopen(request, timeout=15) as response:
+            return json.loads(response.read().decode("utf-8"))
+    except urllib.error.URLError as exc:  # pragma: no cover - network errors
+        raise SystemExit(f"Failed to query Tempo: {exc}")
+
+
+def check_tempo_regressions(base_url: str, query: str, duration_budget_ms: float) -> BudgetResult:
+    payload = query_tempo(base_url, query)
+    traces = payload.get("traces", [])
+    if not traces:
+        raise SystemExit("Tempo query returned no traces; cannot evaluate budget")
+
+    durations_ms = [trace.get("durationMs") for trace in traces if isinstance(trace.get("durationMs"), (int, float))]
+    if not durations_ms:
+        raise SystemExit("Tempo query did not include trace durations")
+
+    current = max(durations_ms)
+    return BudgetResult(
+        name="tempo-trace-duration",
+        current=current,
+        previous=None,
+        threshold=duration_budget_ms,
+        unit=" ms",
+        passed=current <= duration_budget_ms,
+    )
+
+
+def main() -> int:
+    prom_url = _env("PROMETHEUS_URL")
+    latency_query = _env("PROMETHEUS_LATENCY_QUERY")
+    error_query = _env("PROMETHEUS_ERROR_RATE_QUERY")
+
+    if not prom_url or not latency_query or not error_query:
+        print("[canary] Prometheus configuration missing; skipping canary budget validation.")
+        return 0
+
+    latency_budget_ms = float(_env("LATENCY_BUDGET_MS", "3000"))
+    error_budget_rate = float(_env("ERROR_RATE_BUDGET", "0.02"))
+    regression_tolerance = float(_env("REGRESSION_TOLERANCE", "0.15"))
+    baseline_offset_seconds = float(_env("BASELINE_OFFSET_SECONDS", str(60 * 60)))
+    latency_unit_scale = float(_env("LATENCY_UNIT_SCALE", "1000"))
+
+    results = []
+    results.append(
+        evaluate_budget(
+            name="p95-latency",
+            base_url=prom_url,
+            query=latency_query,
+            unit_scale=latency_unit_scale,
+            threshold=latency_budget_ms,
+            regression_tolerance=regression_tolerance,
+            baseline_offset_seconds=baseline_offset_seconds,
+            unit_label=" ms",
+        )
+    )
+
+    error_result = evaluate_budget(
+        name="error-rate",
+        base_url=prom_url,
+        query=error_query,
+        unit_scale=1.0,
+        threshold=error_budget_rate,
+        regression_tolerance=regression_tolerance,
+        baseline_offset_seconds=baseline_offset_seconds,
+        unit_label=" rate",
+    )
+    results.append(error_result)
+
+    tempo_url = _env("TEMPO_URL")
+    tempo_query = _env("TEMPO_QUERY")
+    tempo_budget_ms = _env("TEMPO_DURATION_BUDGET_MS")
+    if tempo_url and tempo_query and tempo_budget_ms:
+        tempo_result = check_tempo_regressions(tempo_url, tempo_query, float(tempo_budget_ms))
+        results.append(tempo_result)
+
+    failures = [result for result in results if not result.passed]
+
+    for result in results:
+        print(result.to_line())
+
+    if failures:
+        print("Budget failures detected:")
+        for failure in failures:
+            print(f" - {failure.name}")
+        return 1
+
+    print("All canary budgets within thresholds.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/publish-perf-metrics.mjs b/scripts/publish-perf-metrics.mjs
new file mode 100644
index 00000000..5b09f0f7
--- /dev/null
+++ b/scripts/publish-perf-metrics.mjs
@@ -0,0 +1,159 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import path from 'node:path';
+
+const RESULTS_DIR = process.env.PERF_RESULTS_DIR
+  ? path.resolve(process.cwd(), process.env.PERF_RESULTS_DIR)
+  : path.resolve(process.cwd(), 'perf-results');
+
+const PUSHGATEWAY_URL = process.env.PUSHGATEWAY_URL ?? '';
+const PUSHGATEWAY_JOB = process.env.PUSHGATEWAY_JOB ?? 'ci-performance-budget';
+const GIT_SHA = process.env.GITHUB_SHA ?? '';
+const GIT_REF = process.env.GITHUB_REF ?? '';
+
+const readJourneyFiles = async () => {
+  try {
+    const entries = await fs.readdir(RESULTS_DIR);
+    const jsonFiles = entries.filter((entry) => entry.endsWith('.json'));
+    const payloads = [];
+    for (const file of jsonFiles) {
+      const content = await fs.readFile(path.join(RESULTS_DIR, file), 'utf8');
+      payloads.push(JSON.parse(content));
+    }
+    return payloads;
+  } catch (error) {
+    if ((error instanceof Error && 'code' in error && error.code === 'ENOENT') ||
+        (typeof error === 'object' && error !== null && 'code' in error && error.code === 'ENOENT')) {
+      return [];
+    }
+    throw error;
+  }
+};
+
+const formatPrometheusBody = (journey) => {
+  const labelsBase = `journey="${journey.id}"`;
+  const lines = [
+    '# TYPE ci_navigation_duration_seconds summary',
+  ];
+
+  if (journey.aggregated?.navigation_duration?.value_ms !== undefined) {
+    const navigationSeconds = journey.aggregated.navigation_duration.value_ms / 1000;
+    const quantileLabel = journey.aggregated.navigation_duration.quantile;
+    lines.push(`ci_navigation_duration_seconds{${labelsBase},quantile="${quantileLabel}"} ${navigationSeconds}`);
+  }
+
+  if (journey.aggregated?.largest_contentful_paint?.value_ms !== undefined) {
+    const lcpSeconds = journey.aggregated.largest_contentful_paint.value_ms / 1000;
+    const quantileLabel = journey.aggregated.largest_contentful_paint.quantile;
+    lines.push(`ci_lcp_seconds{${labelsBase},quantile="${quantileLabel}"} ${lcpSeconds}`);
+  }
+
+  const metaLabels = [`journey="${journey.id}"`];
+  if (GIT_SHA) {
+    metaLabels.push(`git_sha="${GIT_SHA}"`);
+  }
+  if (GIT_REF) {
+    metaLabels.push(`git_ref="${GIT_REF}"`);
+  }
+
+  lines.push(`# TYPE ci_perf_build_info gauge`);
+  lines.push(`ci_perf_build_info{${metaLabels.join(',')}} 1`);
+
+  return lines.join('\n');
+};
+
+const pushToPushgateway = async (journeyMetrics) => {
+  const baseUrl = PUSHGATEWAY_URL.replace(/\/$/, '');
+  for (const journey of journeyMetrics) {
+    const body = formatPrometheusBody(journey);
+    const targetUrl = `${baseUrl}/metrics/job/${encodeURIComponent(PUSHGATEWAY_JOB)}/journey/${encodeURIComponent(journey.id)}`;
+    const response = await fetch(targetUrl, {
+      method: 'POST',
+      headers: { 'Content-Type': 'text/plain' },
+      body,
+    });
+    if (!response.ok) {
+      const text = await response.text();
+      throw new Error(`Failed to push metrics to Pushgateway (${response.status}): ${text}`);
+    }
+  }
+};
+
+const toJUnit = (journeyMetrics) => {
+  const testcases = journeyMetrics.map((journey) => {
+    const name = journey.description ? `${journey.id} — ${journey.description}` : journey.id;
+    const lines = [];
+    if (journey.aggregated?.navigation_duration) {
+      const metric = journey.aggregated.navigation_duration;
+      lines.push(`navigation_${metric.quantile}=${metric.value_ms.toFixed(2)}ms`);
+      if (metric.threshold_ms !== undefined) {
+        lines.push(`navigation_threshold=${metric.threshold_ms}`);
+      }
+      lines.push(`navigation_passed=${metric.passed}`);
+    }
+    if (journey.aggregated?.largest_contentful_paint) {
+      const metric = journey.aggregated.largest_contentful_paint;
+      lines.push(`lcp_${metric.quantile}=${metric.value_ms.toFixed(2)}ms`);
+      if (metric.threshold_ms !== undefined) {
+        lines.push(`lcp_threshold=${metric.threshold_ms}`);
+      }
+      lines.push(`lcp_passed=${metric.passed}`);
+    }
+    const systemOut = lines.join('\n');
+    const failure = Object.values(journey.aggregated ?? {}).some((metric) => metric && metric.threshold_ms !== undefined && !metric.passed);
+    return {
+      name,
+      systemOut,
+      failure,
+      failureMessage: failure ? 'Performance budget regression detected' : undefined,
+    };
+  });
+
+  const failures = testcases.filter((tc) => tc.failure).length;
+  const xmlParts = [
+    '<?xml version="1.0" encoding="UTF-8"?>',
+    `<testsuites tests="${testcases.length}" failures="${failures}">`,
+    `<testsuite name="performance-budget" tests="${testcases.length}" failures="${failures}">`,
+  ];
+
+  for (const testcase of testcases) {
+    xmlParts.push(`<testcase classname="performance-budget" name="${testcase.name}">`);
+    if (testcase.failure && testcase.failureMessage) {
+      xmlParts.push(`<failure message="${testcase.failureMessage}"/>`);
+    }
+    if (testcase.systemOut) {
+      xmlParts.push(`<system-out><![CDATA[${testcase.systemOut}]]></system-out>`);
+    }
+    xmlParts.push('</testcase>');
+  }
+
+  xmlParts.push('</testsuite>');
+  xmlParts.push('</testsuites>');
+  return xmlParts.join('');
+};
+
+const writeJUnitReport = async (journeyMetrics) => {
+  const xml = toJUnit(journeyMetrics);
+  await fs.writeFile(path.join(RESULTS_DIR, 'perf-budget.junit.xml'), xml, 'utf8');
+};
+
+const main = async () => {
+  const journeyMetrics = await readJourneyFiles();
+  if (!journeyMetrics.length) {
+    console.log(`No performance results found in ${RESULTS_DIR}; skipping publish step.`);
+    return;
+  }
+
+  if (PUSHGATEWAY_URL) {
+    await pushToPushgateway(journeyMetrics);
+    console.log(`Published ${journeyMetrics.length} journey metrics to Pushgateway.`);
+  } else {
+    await writeJUnitReport(journeyMetrics);
+    console.log(`Wrote JUnit report for ${journeyMetrics.length} journeys to ${path.join(RESULTS_DIR, 'perf-budget.junit.xml')}.`);
+  }
+};
+
+main().catch((error) => {
+  console.error('[publish-perf-metrics] Failed to publish metrics:', error);
+  process.exitCode = 1;
+});

From 2710543aa10a7e329d5458d0c7820de14ae4d9e7 Mon Sep 17 00:00:00 2001
From: Shayan <shayan@coin.link>
Date: Thu, 16 Oct 2025 11:57:33 -0700
Subject: [PATCH 2/2] Fix performance budget tests to use browser fixture

---
 frontend/tests/e2e/perf/perf-budget.spec.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frontend/tests/e2e/perf/perf-budget.spec.ts b/frontend/tests/e2e/perf/perf-budget.spec.ts
index 0c9e0a06..225086b8 100644
--- a/frontend/tests/e2e/perf/perf-budget.spec.ts
+++ b/frontend/tests/e2e/perf/perf-budget.spec.ts
@@ -206,7 +206,7 @@ test.describe('performance budget journeys', () => {
   }
 
   for (const journey of perfConfig.journeys) {
-    test(journey.id, async () => {
+    test(journey.id, async ({ browser }) => {
       const runs: Array<{ navigation_duration_ms: number; largest_contentful_paint_ms: number }> = [];
 
       for (let iteration = 0; iteration < runCount; iteration += 1) {