Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions frontend/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

188 changes: 188 additions & 0 deletions frontend/tests/e2e/perf-budget.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import { test, expect, Page } from '@playwright/test';
import fs from 'node:fs';
import path from 'node:path';
import yaml from 'js-yaml';

type Aggregation = 'p75' | 'p90' | 'p95';

type MetricBudget = {
id: string;
aggregation: Aggregation;
threshold: number;
unit: string;
};

type WaitInstruction =
| { type: 'selector'; selector: string; timeout_ms?: number }
| { type: 'networkidle'; idle_ms?: number; timeout_ms?: number };

type PageBudget = {
id: string;
url?: string;
waits?: WaitInstruction[];
selectors?: Record<string, string>;
};

type JourneyBudget = {
id: string;
description?: string;
start_url: string;
target_page: string;
waits?: WaitInstruction[];
metrics: MetricBudget[];
};

type PerfBudget = {
run_count: number;
pages?: PageBudget[];
journeys?: JourneyBudget[];
};
Comment on lines +35 to +39
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major

Type doesn’t include throttling; add it to reflect perf-budget.yml

Include throttling in the schema so it can be applied.

 type PerfBudget = {
   run_count: number;
   pages?: PageBudget[];
   journeys?: JourneyBudget[];
+  throttling?: {
+    cpuSlowdownMultiplier?: number;
+    downloadThroughputKbps?: number;
+    uploadThroughputKbps?: number;
+    requestLatencyMs?: number;
+  };
 };
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
type PerfBudget = {
run_count: number;
pages?: PageBudget[];
journeys?: JourneyBudget[];
};
type PerfBudget = {
run_count: number;
pages?: PageBudget[];
journeys?: JourneyBudget[];
throttling?: {
cpuSlowdownMultiplier?: number;
downloadThroughputKbps?: number;
uploadThroughputKbps?: number;
requestLatencyMs?: number;
};
};
🤖 Prompt for AI Agents
In frontend/tests/e2e/perf-budget.spec.ts around lines 35 to 39, the PerfBudget
type is missing the throttling field defined in perf-budget.yml; add an optional
throttling property to the PerfBudget type that matches the YAML shape (for
example throttling?: { rtt?: number; throughput?: number;
cpuSlowdownMultiplier?: number } or a named Throttling interface) so tests can
apply network/CPU throttling when evaluating budgets.


type JourneyRunMetrics = {
navigation: number[];
'largest-contentful-paint': number[];
};

const budgetPath = path.resolve(__dirname, '../../..', 'perf-budget.yml');

function loadBudget(): PerfBudget {
const raw = fs.readFileSync(budgetPath, 'utf8');
const parsed = yaml.load(raw) as PerfBudget | undefined;
if (!parsed) {
throw new Error(`Unable to parse performance budget at ${budgetPath}`);
}
if (!parsed.run_count || parsed.run_count < 1) {
throw new Error('perf-budget.yml must define a run_count greater than zero');
}
return parsed;
}

function percentile(values: number[], agg: Aggregation): number {
if (!values.length) {
return 0;
}
const sorted = [...values].sort((a, b) => a - b);
const percentileValue = Number(agg.slice(1));
const rank = (percentileValue / 100) * (sorted.length - 1);
const lower = Math.floor(rank);
const upper = Math.ceil(rank);
if (lower === upper) {
return sorted[lower];
}
const weight = rank - lower;
return sorted[lower] * (1 - weight) + sorted[upper] * weight;
}

async function applyWaits(page: Page, waits: WaitInstruction[] | undefined) {
if (!waits?.length) {
return;
}
for (const wait of waits) {
if (wait.type === 'selector') {
await page.waitForSelector(wait.selector, {
timeout: wait.timeout_ms ?? 30_000,
});
} else if (wait.type === 'networkidle') {
const timeout = wait.timeout_ms ?? 30_000;
await page.waitForLoadState('networkidle', { timeout });
if (wait.idle_ms) {
await page.waitForTimeout(wait.idle_ms);
}
}
}
}

const budget = loadBudget();
const pagesById = new Map<string, PageBudget>();
for (const pageBudget of budget.pages ?? []) {
pagesById.set(pageBudget.id, pageBudget);
}

test.describe('Performance budgets', () => {
test.describe.configure({ mode: 'serial' });

Comment on lines +101 to +103
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Throttling from perf-budget.yml is not applied (network/CPU); budgets won’t reflect slow-4g

The test ignores configured throttling, leading to non-representative timings. Apply DevTools throttling (Chromium) and CPU slowdown.

 import { test, expect, Page } from '@playwright/test';
@@
 type JourneyRunMetrics = {
   navigation: number[];
   'largest-contentful-paint': number[];
 };
 
+const kbpsToBps = (kbps: number) => Math.max(1, Math.floor((kbps * 1024) / 8));
+
+async function applyThrottlingIfSupported(page: Page, t?: PerfBudget['throttling']) {
+  if (!t) return;
+  // CDP only on Chromium
+  const client = await page.context().newCDPSession(page);
+  await client.send('Network.enable');
+  const hasNet = t.downloadThroughputKbps || t.uploadThroughputKbps || t.requestLatencyMs;
+  if (hasNet) {
+    await client.send('Network.emulateNetworkConditions', {
+      offline: false,
+      downloadThroughput: t.downloadThroughputKbps ? kbpsToBps(t.downloadThroughputKbps) : -1,
+      uploadThroughput: t.uploadThroughputKbps ? kbpsToBps(t.uploadThroughputKbps) : -1,
+      latency: t.requestLatencyMs ?? 0,
+    });
+  }
+  if (t.cpuSlowdownMultiplier && t.cpuSlowdownMultiplier > 1) {
+    await client.send('Emulation.setCPUThrottlingRate', { rate: t.cpuSlowdownMultiplier });
+  }
+}
@@
   for (const journey of budget.journeys ?? []) {
     test(journey.id, async ({ page }, testInfo) => {
@@
-      for (let run = 0; run < budget.run_count; run += 1) {
+      for (let run = 0; run < budget.run_count; run += 1) {
         await page.context().clearCookies();
         await page.goto('about:blank');
 
+        await applyThrottlingIfSupported(page, budget.throttling);

Also applies to: 135-145

🤖 Prompt for AI Agents
In frontend/tests/e2e/perf-budget.spec.ts around lines 101-103 (and also apply
same change for lines 135-145), the test suite does not apply network/CPU
throttling so measured timings aren't using slow-4g; update the Playwright test
setup to enable Chromium DevTools throttling and CPU slowdown before running the
performance budget checks: obtain the test browser context/page for Chromium,
call page.context().grantPermissions if needed, use
page.emulateNetworkConditions or CDP session to set network conditions to
slow-4g and send DevTools command to set CPU throttling rate (e.g., 4x
slowdown), ensure these throttles are enabled in a beforeAll or test.before hook
and removed in afterAll or test.after to keep other tests unaffected.

for (const journey of budget.journeys ?? []) {
test(journey.id, async ({ page }, testInfo) => {
const targetPage = pagesById.get(journey.target_page);
if (!targetPage) {
throw new Error(`Journey ${journey.id} references unknown page id ${journey.target_page}`);
}

await page.addInitScript(() => {
(window as unknown as { __lcp?: number }).__lcp = 0;
try {
new PerformanceObserver((entryList) => {
const entries = entryList.getEntries();
if (!entries.length) {
return;
}
const lastEntry = entries[entries.length - 1];
const current = (window as unknown as { __lcp?: number }).__lcp ?? 0;
if (typeof lastEntry.startTime === 'number') {
(window as unknown as { __lcp?: number }).__lcp = Math.max(current, lastEntry.startTime);
}
}).observe({ type: 'largest-contentful-paint', buffered: true });
} catch (error) {
console.warn('Failed to initialise LCP observer', error);
}
});

Comment on lines +111 to +129
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

LCP unsupported path silently returns 0

On non-Chromium, observer may fail and yield 0, potentially masking regressions. Consider skipping LCP checks when unsupported or falling back to paint timing.

-        } catch (error) {
-          console.warn('Failed to initialise LCP observer', error);
-        }
+        } catch {
+          // Flag unsupported instead of returning zero
+          (window as unknown as { __lcp_unsupported?: boolean }).__lcp_unsupported = true;
+        }

Then, when validating:

// before using lcp
const lcpUnsupported = await page.evaluate(() => (window as any).__lcp_unsupported === true);
if (lcpUnsupported && journey.metrics.some(m => m.id === 'largest-contentful-paint')) {
  test.skip(true, 'LCP not supported in this browser');
}

const metrics: JourneyRunMetrics = {
navigation: [],
'largest-contentful-paint': [],
};

for (let run = 0; run < budget.run_count; run += 1) {
await page.context().clearCookies();
await page.goto('about:blank');

await page.goto(journey.start_url, { waitUntil: 'networkidle' });
await page.waitForTimeout(250);

await page.goto(targetPage.url ?? journey.start_url, { waitUntil: 'networkidle' });
await applyWaits(page, journey.waits ?? targetPage.waits);
await page.waitForTimeout(500);

const navDuration = await page.evaluate(() => {
const entries = performance.getEntriesByType('navigation');
const last = entries[entries.length - 1];
return last?.duration ?? 0;
});
const lcp = await page.evaluate(() => (window as unknown as { __lcp?: number }).__lcp ?? 0);

metrics.navigation.push(navDuration);
metrics['largest-contentful-paint'].push(lcp);
}

const summary: Record<string, { aggregation: Aggregation; threshold: number; value: number; unit: string }> = {};

for (const metric of journey.metrics) {
const dataset = metrics[metric.id as keyof JourneyRunMetrics];
if (!dataset) {
throw new Error(`No dataset recorded for metric ${metric.id}`);
}
const value = percentile(dataset, metric.aggregation);
summary[metric.id] = {
aggregation: metric.aggregation,
threshold: metric.threshold,
value,
unit: metric.unit,
};
const formattedValue = value.toFixed(2);
await test.step(`Validate ${metric.id} ${metric.aggregation} (observed ${formattedValue}${metric.unit})`, async () => {
expect(
value,
`Expected ${metric.id} ${metric.aggregation} to be <= ${metric.threshold}${metric.unit}, observed ${formattedValue}${metric.unit}`,
).toBeLessThanOrEqual(metric.threshold);
});
}

const artifactPath = testInfo.outputPath(`${journey.id}-metrics.json`);
fs.writeFileSync(artifactPath, JSON.stringify({ id: journey.id, description: journey.description, summary, metrics }, null, 2));
await testInfo.attach(`${journey.id}-metrics`, {
path: artifactPath,
contentType: 'application/json',
});
});
}
});
4 changes: 4 additions & 0 deletions tools/canary-baseline.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"latency_p95_ms": 2750,
"error_rate": 0.015
}
196 changes: 196 additions & 0 deletions tools/check_canary_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
#!/usr/bin/env python3
"""Validate canary latency and error-rate budgets against Prometheus/Tempo."""

from __future__ import annotations

import argparse
import json
import math
import sys
import time
import urllib.error
import urllib.parse
import urllib.request
from pathlib import Path
from typing import Any, Dict, Optional


def _load_json_from_url(url: str, timeout: int = 10) -> Any:
request = urllib.request.Request(url)
with urllib.request.urlopen(request, timeout=timeout) as response: # noqa: S310 (trusted CI context)
if response.status != 200:
raise RuntimeError(f"Request to {url} returned status {response.status}")
data = response.read()
return json.loads(data.decode("utf-8"))


def _load_json_from_path(path: Path) -> Any:
with path.open("r", encoding="utf-8") as handle:
return json.load(handle)


def _extract_vector_value(payload: Dict[str, Any]) -> float:
data = payload.get("data", {})
result = data.get("result", [])
extracted: list[float] = []
for entry in result:
value = entry.get("value")
if isinstance(value, list) and len(value) >= 2:
try:
extracted.append(float(value[1]))
except (TypeError, ValueError):
continue
if not extracted:
raise ValueError("No numeric samples found in metric payload")
return sum(extracted) / len(extracted)


def _maybe_load_baseline(path: Optional[Path]) -> Optional[Dict[str, Any]]:
if not path:
return None
if not path.exists():
return None
return _load_json_from_path(path)


def _format_delta(current: float, baseline: float) -> str:
if baseline == 0:
return "n/a"
delta = ((current - baseline) / baseline) * 100
return f"{delta:+.2f}%"


def _resolve_metric(
label: str,
url: Optional[str],
query: Optional[str],
fixture: Optional[Path],
) -> float:
if fixture:
payload = _load_json_from_path(fixture)
return _extract_vector_value(payload)
if not url or not query:
raise ValueError(f"Missing configuration for {label}: provide URL/query or fixture")
parsed_url = urllib.parse.urljoin(url, "api/v1/query")
encoded_query = urllib.parse.urlencode({"query": query})
full_url = f"{parsed_url}?{encoded_query}" if "?" not in parsed_url else f"{parsed_url}&{encoded_query}"
payload = _load_json_from_url(full_url)
return _extract_vector_value(payload)


def main(argv: list[str]) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--prometheus-url", type=str, default=None, help="Base URL of Prometheus server")
parser.add_argument("--prometheus-query", type=str, default=None, help="PromQL expression returning an instant vector")
parser.add_argument(
"--prometheus-fixture",
type=Path,
default=None,
help="Path to fixture JSON emulating the Prometheus response",
)
parser.add_argument("--tempo-url", type=str, default=None, help="Base URL of Tempo server")
parser.add_argument("--tempo-query", type=str, default=None, help="Query expression returning an instant vector")
parser.add_argument(
"--tempo-fixture",
type=Path,
default=None,
help="Path to fixture JSON emulating the Tempo response",
)
parser.add_argument("--latency-threshold-ms", type=float, required=True, help="Maximum allowed latency (ms)")
parser.add_argument("--error-threshold", type=float, required=True, help="Maximum allowed error rate (ratio)")
parser.add_argument(
"--baseline",
type=Path,
default=None,
help="JSON file containing baseline metrics from the previous successful build",
)
parser.add_argument(
"--regression-tolerance",
type=float,
default=0.1,
help="Allowed fractional regression relative to the baseline (default 0.1 = 10%)",
)
parser.add_argument(
"--output",
type=Path,
default=None,
help="Write the evaluated metrics to this JSON file for downstream dashboards",
)

args = parser.parse_args(argv)

try:
latency = _resolve_metric(
label="Prometheus latency",
url=args.prometheus_url,
query=args.prometheus_query,
fixture=args.prometheus_fixture,
)
error_rate = _resolve_metric(
label="Tempo error rate",
url=args.tempo_url,
query=args.tempo_query,
fixture=args.tempo_fixture,
)
except (urllib.error.URLError, ValueError, RuntimeError) as exc: # pragma: no cover - defensive path
print(f"::error::Failed to retrieve metrics: {exc}")
return 1

baseline = _maybe_load_baseline(args.baseline)
timestamp_ms = int(time.time() * 1000)

summary = {
"timestamp_ms": timestamp_ms,
"latency_p95_ms": latency,
"error_rate": error_rate,
"latency_threshold_ms": args.latency_threshold_ms,
"error_threshold": args.error_threshold,
"baseline": baseline,
"regression_tolerance": args.regression_tolerance,
}

failures: list[str] = []

if latency > args.latency_threshold_ms:
failures.append(
f"Latency {latency:.2f}ms exceeded budget of {args.latency_threshold_ms:.2f}ms",
)
if error_rate > args.error_threshold:
failures.append(
f"Error rate {error_rate:.4f} exceeded budget of {args.error_threshold:.4f}",
)

if baseline:
baseline_latency = float(baseline.get("latency_p95_ms", math.inf))
baseline_error = float(baseline.get("error_rate", math.inf))
tolerance_multiplier = 1 + args.regression_tolerance
if latency > baseline_latency * tolerance_multiplier:
failures.append(
"Latency regression "
f"({latency:.2f}ms vs baseline {baseline_latency:.2f}ms, { _format_delta(latency, baseline_latency) }) exceeded tolerance",
)
if error_rate > baseline_error * tolerance_multiplier:
failures.append(
"Error-rate regression "
f"({error_rate:.4f} vs baseline {baseline_error:.4f}, { _format_delta(error_rate, baseline_error) }) exceeded tolerance",
)

if args.output:
args.output.parent.mkdir(parents=True, exist_ok=True)
with args.output.open("w", encoding="utf-8") as handle:
json.dump(summary, handle, indent=2)
handle.write("\n")

print("Canary metrics summary:")
print(json.dumps(summary, indent=2))

if failures:
for failure in failures:
print(f"::error::{failure}")
return 1

return 0


if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
Loading
Loading