sqdshguy
diff --git a/‎docs/BENCHMARK.md‎
Lines changed: 40 additions & 0 deletions b/‎docs/BENCHMARK.md‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎package.json‎
Lines changed: 3 additions & 0 deletions b/‎package.json‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎scripts/aws-perf/cleanup-stale.sh‎
Lines changed: 43 additions & 0 deletions b/‎scripts/aws-perf/cleanup-stale.sh‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎scripts/aws-perf/compare-bench.mjs‎
Lines changed: 168 additions & 0 deletions b/‎scripts/aws-perf/compare-bench.mjs‎
Lines changed: 168 additions & 0 deletions
@@ -61,3 +61,43 @@ npm run bench:run -- --scenario wreq.session.get.small --scenario wreq.session.g
 - Run on the same machine, on AC power, with minimal background load.
 - Prefer longer `--duration-ms` and more `--samples` until the reported CI margin is comfortably smaller than the improvement you’re targeting.
 - Keep parameters constant when comparing optimizations.
+
+## AWS isolated perf runs (recommended for low-noise gating)
+
+If you do not want to benchmark on your laptop/network, use the AWS CLI harness:
+
+1. Create the EC2 instance profile once (requires IAM permissions):
+
+```bash
+./scripts/aws-perf/setup-iam.sh
+```
+
+2. Run base vs head comparison on an ephemeral EC2 runner (Spot by default, auto-terminates):
+
+```bash
+./scripts/aws-perf/ec2-compare.sh --region us-east-1
+```
+
+Note: the runner clones from `origin` by default, so both refs must exist in the remote repository (push your branch/commit first if needed).
+
+The script:
+
+- Launches a short-lived EC2 instance with SSM (no inbound SSH required).
+- Runs the same benchmark scenarios for `--base-ref` and `--head-ref` on the same host.
+- Produces `tmp/aws-perf/<run-id>/summary.json` with per-scenario deltas and a pass/fail gate.
+- Terminates the instance automatically unless `--keep-instance` is passed.
+
+Useful options:
+
+- `--on-demand` to avoid Spot interruptions.
+- `--instance-type c6i.large` (default) for low cost and stable throughput.
+- `--threshold-pct 5` to set the regression gate.
+- `--scenarios 'wreq.session.get.small;wreq.session.get.4kb;wreq.isolated.get.small'` to limit scope.
+
+Safety cleanup:
+
+```bash
+./scripts/aws-perf/cleanup-stale.sh us-east-1
+```
+
+This terminates old perf instances tagged with expired TTL metadata.
@@ -26,6 +26,9 @@
     "bench": "npm run build:rust && tsx --expose-gc src/bench/run.ts",
     "bench:run": "tsx --expose-gc src/bench/run.ts",
     "bench:quick": "npm run build:rust && tsx --expose-gc src/bench/run.ts --scenario wreq.session.get.small",
+    "perf:aws:setup-iam": "bash ./scripts/aws-perf/setup-iam.sh",
+    "perf:aws:compare": "bash ./scripts/aws-perf/ec2-compare.sh",
+    "perf:aws:cleanup": "bash ./scripts/aws-perf/cleanup-stale.sh",
     "check": "biome check .",
     "check:fix": "biome check --write .",
     "clean": "rimraf dist rust/target rust/*.node",
 
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
+  cat <<'EOF'
+Usage: scripts/aws-perf/cleanup-stale.sh <region>
+
+Example:
+  scripts/aws-perf/cleanup-stale.sh us-east-1
+EOF
+  exit 0
+fi
+
+REGION="${1:-${AWS_REGION:-${AWS_DEFAULT_REGION:-}}}"
+if [[ -z "$REGION" ]]; then
+  echo "[aws-perf-cleanup] ERROR: region is required (arg1 or AWS_REGION)" >&2
+  exit 1
+fi
+
+NOW_EPOCH="$(date +%s)"
+
+echo "[aws-perf-cleanup] Region: $REGION"
+echo "[aws-perf-cleanup] Now: $NOW_EPOCH"
+
+candidate_ids="$(
+  aws ec2 describe-instances \
+    --region "$REGION" \
+    --filters "Name=tag:Purpose,Values=perf-benchmark" "Name=instance-state-name,Values=pending,running,stopping,stopped" \
+    --query 'Reservations[].Instances[].{Id:InstanceId,Expiry:Tags[?Key==`ExpiresEpoch`]|[0].Value}' \
+    --output text \
+  | awk -v now="$NOW_EPOCH" 'NF>=2 { if ($2+0 <= now) print $1 }'
+)"
+
+if [[ -z "$candidate_ids" ]]; then
+  echo "[aws-perf-cleanup] No stale perf instances found"
+  exit 0
+fi
+
+echo "[aws-perf-cleanup] Terminating stale instances:"
+echo "$candidate_ids" | sed 's/^/  - /'
+
+aws ec2 terminate-instances --region "$REGION" --instance-ids $candidate_ids >/dev/null
+echo "[aws-perf-cleanup] Terminate request submitted"
@@ -0,0 +1,168 @@
+#!/usr/bin/env node
+
+import { readFileSync, writeFileSync } from "node:fs";
+
+function parseArgs(argv) {
+  const args = {
+    thresholdPct: 5,
+    failOnRegression: true,
+  };
+
+  for (let i = 0; i < argv.length; i += 1) {
+    const arg = argv[i];
+    const next = argv[i + 1];
+
+    if (arg === "--base") {
+      if (!next) throw new Error("Missing value for --base");
+      args.basePath = next;
+      i += 1;
+      continue;
+    }
+    if (arg === "--head") {
+      if (!next) throw new Error("Missing value for --head");
+      args.headPath = next;
+      i += 1;
+      continue;
+    }
+    if (arg === "--threshold-pct") {
+      if (!next) throw new Error("Missing value for --threshold-pct");
+      args.thresholdPct = Number(next);
+      i += 1;
+      continue;
+    }
+    if (arg === "--markdown") {
+      if (!next) throw new Error("Missing value for --markdown");
+      args.markdownPath = next;
+      i += 1;
+      continue;
+    }
+    if (arg === "--json") {
+      if (!next) throw new Error("Missing value for --json");
+      args.jsonPath = next;
+      i += 1;
+      continue;
+    }
+    if (arg === "--no-fail") {
+      args.failOnRegression = false;
+      continue;
+    }
+
+    throw new Error(`Unknown argument: ${arg}`);
+  }
+
+  if (!args.basePath || !args.headPath) {
+    throw new Error("Usage: compare-bench.mjs --base <path> --head <path> [--threshold-pct <n>]");
+  }
+
+  if (!Number.isFinite(args.thresholdPct) || args.thresholdPct < 0) {
+    throw new Error("--threshold-pct must be a non-negative number");
+  }
+
+  return args;
+}
+
+function formatNum(value) {
+  return value.toLocaleString("en-US", { maximumFractionDigits: 2 });
+}
+
+function formatPct(value) {
+  const sign = value > 0 ? "+" : "";
+  return `${sign}${value.toFixed(2)}%`;
+}
+
+function loadBench(path) {
+  return JSON.parse(readFileSync(path, "utf8"));
+}
+
+function compare(baseRun, headRun, thresholdPct) {
+  const byName = (run) => new Map(run.results.map((result) => [result.name, result]));
+  const baseMap = byName(baseRun);
+  const headMap = byName(headRun);
+
+  const names = [...baseMap.keys()].filter((name) => headMap.has(name));
+  names.sort();
+
+  const scenarios = names.map((name) => {
+    const base = baseMap.get(name);
+    const head = headMap.get(name);
+
+    const deltaPct = ((head.mean - base.mean) / base.mean) * 100;
+    const regression = deltaPct <= -thresholdPct;
+    const improvement = deltaPct >= thresholdPct;
+
+    return {
+      name,
+      baseMean: base.mean,
+      headMean: head.mean,
+      deltaPct,
+      baseCiPct: base.ci95.marginPct,
+      headCiPct: head.ci95.marginPct,
+      baseErrors: base.errors,
+      headErrors: head.errors,
+      status: regression ? "REGRESSION" : improvement ? "IMPROVEMENT" : "OK",
+    };
+  });
+
+  const regressions = scenarios.filter((item) => item.status === "REGRESSION");
+
+  return {
+    generatedAt: new Date().toISOString(),
+    thresholdPct,
+    baseCommit: baseRun.git?.commit,
+    headCommit: headRun.git?.commit,
+    regressions: regressions.map((item) => item.name),
+    pass: regressions.length === 0,
+    scenarios,
+  };
+}
+
+function toMarkdown(report) {
+  const lines = [];
+  lines.push("# AWS Perf Compare");
+  lines.push("");
+  lines.push(`- Generated: ${report.generatedAt}`);
+  lines.push(`- Threshold: ${report.thresholdPct}% throughput drop => regression`);
+  if (report.baseCommit) lines.push(`- Base commit: ${report.baseCommit}`);
+  if (report.headCommit) lines.push(`- Head commit: ${report.headCommit}`);
+  lines.push("");
+  lines.push("| Scenario | Base req/s | Head req/s | Delta | Status | Base CI | Head CI |");
+  lines.push("|---|---:|---:|---:|---|---:|---:|");
+
+  for (const scenario of report.scenarios) {
+    lines.push(
+      `| ${scenario.name} | ${formatNum(scenario.baseMean)} | ${formatNum(scenario.headMean)} | ${formatPct(scenario.deltaPct)} | ${scenario.status} | ±${scenario.baseCiPct.toFixed(2)}% | ±${scenario.headCiPct.toFixed(2)}% |`,
+    );
+  }
+
+  lines.push("");
+  lines.push(`## Gate: ${report.pass ? "PASS" : "FAIL"}`);
+  if (!report.pass) {
+    lines.push(`Regressions: ${report.regressions.join(", ")}`);
+  }
+
+  return lines.join("\n");
+}
+
+function main() {
+  const args = parseArgs(process.argv.slice(2));
+  const baseRun = loadBench(args.basePath);
+  const headRun = loadBench(args.headPath);
+  const report = compare(baseRun, headRun, args.thresholdPct);
+  const markdown = toMarkdown(report);
+
+  if (args.markdownPath) {
+    writeFileSync(args.markdownPath, markdown, "utf8");
+  }
+
+  if (args.jsonPath) {
+    writeFileSync(args.jsonPath, JSON.stringify(report, null, 2), "utf8");
+  }
+
+  console.log(markdown);
+
+  if (!report.pass && args.failOnRegression) {
+    process.exit(2);
+  }
+}
+
+main();