Skip to content

Commit 0da2200

Browse files
committed
feat: add aws ec2 perf regression harness
1 parent 2a0c941 commit 0da2200

File tree

7 files changed

+890
-0
lines changed

7 files changed

+890
-0
lines changed

docs/BENCHMARK.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,43 @@ npm run bench:run -- --scenario wreq.session.get.small --scenario wreq.session.g
6161
- Run on the same machine, on AC power, with minimal background load.
6262
- Prefer longer `--duration-ms` and more `--samples` until the reported CI margin is comfortably smaller than the improvement you’re targeting.
6363
- Keep parameters constant when comparing optimizations.
64+
65+
## AWS isolated perf runs (recommended for low-noise gating)
66+
67+
If you do not want to benchmark on your laptop/network, use the AWS CLI harness:
68+
69+
1. Create the EC2 instance profile once (requires IAM permissions):
70+
71+
```bash
72+
./scripts/aws-perf/setup-iam.sh
73+
```
74+
75+
2. Run base vs head comparison on an ephemeral EC2 runner (Spot by default, auto-terminates):
76+
77+
```bash
78+
./scripts/aws-perf/ec2-compare.sh --region us-east-1
79+
```
80+
81+
Note: the runner clones from `origin` by default, so both refs must exist in the remote repository (push your branch/commit first if needed).
82+
83+
The script:
84+
85+
- Launches a short-lived EC2 instance with SSM (no inbound SSH required).
86+
- Runs the same benchmark scenarios for `--base-ref` and `--head-ref` on the same host.
87+
- Produces `tmp/aws-perf/<run-id>/summary.json` with per-scenario deltas and a pass/fail gate.
88+
- Terminates the instance automatically unless `--keep-instance` is passed.
89+
90+
Useful options:
91+
92+
- `--on-demand` to avoid Spot interruptions.
93+
- `--instance-type c6i.large` (default) for low cost and stable throughput.
94+
- `--threshold-pct 5` to set the regression gate.
95+
- `--scenarios 'wreq.session.get.small;wreq.session.get.4kb;wreq.isolated.get.small'` to limit scope.
96+
97+
Safety cleanup:
98+
99+
```bash
100+
./scripts/aws-perf/cleanup-stale.sh us-east-1
101+
```
102+
103+
This terminates old perf instances tagged with expired TTL metadata.

package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@
2626
"bench": "npm run build:rust && tsx --expose-gc src/bench/run.ts",
2727
"bench:run": "tsx --expose-gc src/bench/run.ts",
2828
"bench:quick": "npm run build:rust && tsx --expose-gc src/bench/run.ts --scenario wreq.session.get.small",
29+
"perf:aws:setup-iam": "bash ./scripts/aws-perf/setup-iam.sh",
30+
"perf:aws:compare": "bash ./scripts/aws-perf/ec2-compare.sh",
31+
"perf:aws:cleanup": "bash ./scripts/aws-perf/cleanup-stale.sh",
2932
"check": "biome check .",
3033
"check:fix": "biome check --write .",
3134
"clean": "rimraf dist rust/target rust/*.node",

scripts/aws-perf/cleanup-stale.sh

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
5+
cat <<'EOF'
6+
Usage: scripts/aws-perf/cleanup-stale.sh <region>
7+
8+
Example:
9+
scripts/aws-perf/cleanup-stale.sh us-east-1
10+
EOF
11+
exit 0
12+
fi
13+
14+
REGION="${1:-${AWS_REGION:-${AWS_DEFAULT_REGION:-}}}"
15+
if [[ -z "$REGION" ]]; then
16+
echo "[aws-perf-cleanup] ERROR: region is required (arg1 or AWS_REGION)" >&2
17+
exit 1
18+
fi
19+
20+
NOW_EPOCH="$(date +%s)"
21+
22+
echo "[aws-perf-cleanup] Region: $REGION"
23+
echo "[aws-perf-cleanup] Now: $NOW_EPOCH"
24+
25+
candidate_ids="$(
26+
aws ec2 describe-instances \
27+
--region "$REGION" \
28+
--filters "Name=tag:Purpose,Values=perf-benchmark" "Name=instance-state-name,Values=pending,running,stopping,stopped" \
29+
--query 'Reservations[].Instances[].{Id:InstanceId,Expiry:Tags[?Key==`ExpiresEpoch`]|[0].Value}' \
30+
--output text \
31+
| awk -v now="$NOW_EPOCH" 'NF>=2 { if ($2+0 <= now) print $1 }'
32+
)"
33+
34+
if [[ -z "$candidate_ids" ]]; then
35+
echo "[aws-perf-cleanup] No stale perf instances found"
36+
exit 0
37+
fi
38+
39+
echo "[aws-perf-cleanup] Terminating stale instances:"
40+
echo "$candidate_ids" | sed 's/^/ - /'
41+
42+
aws ec2 terminate-instances --region "$REGION" --instance-ids $candidate_ids >/dev/null
43+
echo "[aws-perf-cleanup] Terminate request submitted"

scripts/aws-perf/compare-bench.mjs

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
#!/usr/bin/env node
2+
3+
import { readFileSync, writeFileSync } from "node:fs";
4+
5+
function parseArgs(argv) {
6+
const args = {
7+
thresholdPct: 5,
8+
failOnRegression: true,
9+
};
10+
11+
for (let i = 0; i < argv.length; i += 1) {
12+
const arg = argv[i];
13+
const next = argv[i + 1];
14+
15+
if (arg === "--base") {
16+
if (!next) throw new Error("Missing value for --base");
17+
args.basePath = next;
18+
i += 1;
19+
continue;
20+
}
21+
if (arg === "--head") {
22+
if (!next) throw new Error("Missing value for --head");
23+
args.headPath = next;
24+
i += 1;
25+
continue;
26+
}
27+
if (arg === "--threshold-pct") {
28+
if (!next) throw new Error("Missing value for --threshold-pct");
29+
args.thresholdPct = Number(next);
30+
i += 1;
31+
continue;
32+
}
33+
if (arg === "--markdown") {
34+
if (!next) throw new Error("Missing value for --markdown");
35+
args.markdownPath = next;
36+
i += 1;
37+
continue;
38+
}
39+
if (arg === "--json") {
40+
if (!next) throw new Error("Missing value for --json");
41+
args.jsonPath = next;
42+
i += 1;
43+
continue;
44+
}
45+
if (arg === "--no-fail") {
46+
args.failOnRegression = false;
47+
continue;
48+
}
49+
50+
throw new Error(`Unknown argument: ${arg}`);
51+
}
52+
53+
if (!args.basePath || !args.headPath) {
54+
throw new Error("Usage: compare-bench.mjs --base <path> --head <path> [--threshold-pct <n>]");
55+
}
56+
57+
if (!Number.isFinite(args.thresholdPct) || args.thresholdPct < 0) {
58+
throw new Error("--threshold-pct must be a non-negative number");
59+
}
60+
61+
return args;
62+
}
63+
64+
function formatNum(value) {
65+
return value.toLocaleString("en-US", { maximumFractionDigits: 2 });
66+
}
67+
68+
function formatPct(value) {
69+
const sign = value > 0 ? "+" : "";
70+
return `${sign}${value.toFixed(2)}%`;
71+
}
72+
73+
function loadBench(path) {
74+
return JSON.parse(readFileSync(path, "utf8"));
75+
}
76+
77+
function compare(baseRun, headRun, thresholdPct) {
78+
const byName = (run) => new Map(run.results.map((result) => [result.name, result]));
79+
const baseMap = byName(baseRun);
80+
const headMap = byName(headRun);
81+
82+
const names = [...baseMap.keys()].filter((name) => headMap.has(name));
83+
names.sort();
84+
85+
const scenarios = names.map((name) => {
86+
const base = baseMap.get(name);
87+
const head = headMap.get(name);
88+
89+
const deltaPct = ((head.mean - base.mean) / base.mean) * 100;
90+
const regression = deltaPct <= -thresholdPct;
91+
const improvement = deltaPct >= thresholdPct;
92+
93+
return {
94+
name,
95+
baseMean: base.mean,
96+
headMean: head.mean,
97+
deltaPct,
98+
baseCiPct: base.ci95.marginPct,
99+
headCiPct: head.ci95.marginPct,
100+
baseErrors: base.errors,
101+
headErrors: head.errors,
102+
status: regression ? "REGRESSION" : improvement ? "IMPROVEMENT" : "OK",
103+
};
104+
});
105+
106+
const regressions = scenarios.filter((item) => item.status === "REGRESSION");
107+
108+
return {
109+
generatedAt: new Date().toISOString(),
110+
thresholdPct,
111+
baseCommit: baseRun.git?.commit,
112+
headCommit: headRun.git?.commit,
113+
regressions: regressions.map((item) => item.name),
114+
pass: regressions.length === 0,
115+
scenarios,
116+
};
117+
}
118+
119+
function toMarkdown(report) {
120+
const lines = [];
121+
lines.push("# AWS Perf Compare");
122+
lines.push("");
123+
lines.push(`- Generated: ${report.generatedAt}`);
124+
lines.push(`- Threshold: ${report.thresholdPct}% throughput drop => regression`);
125+
if (report.baseCommit) lines.push(`- Base commit: ${report.baseCommit}`);
126+
if (report.headCommit) lines.push(`- Head commit: ${report.headCommit}`);
127+
lines.push("");
128+
lines.push("| Scenario | Base req/s | Head req/s | Delta | Status | Base CI | Head CI |");
129+
lines.push("|---|---:|---:|---:|---|---:|---:|");
130+
131+
for (const scenario of report.scenarios) {
132+
lines.push(
133+
`| ${scenario.name} | ${formatNum(scenario.baseMean)} | ${formatNum(scenario.headMean)} | ${formatPct(scenario.deltaPct)} | ${scenario.status} | ±${scenario.baseCiPct.toFixed(2)}% | ±${scenario.headCiPct.toFixed(2)}% |`,
134+
);
135+
}
136+
137+
lines.push("");
138+
lines.push(`## Gate: ${report.pass ? "PASS" : "FAIL"}`);
139+
if (!report.pass) {
140+
lines.push(`Regressions: ${report.regressions.join(", ")}`);
141+
}
142+
143+
return lines.join("\n");
144+
}
145+
146+
function main() {
147+
const args = parseArgs(process.argv.slice(2));
148+
const baseRun = loadBench(args.basePath);
149+
const headRun = loadBench(args.headPath);
150+
const report = compare(baseRun, headRun, args.thresholdPct);
151+
const markdown = toMarkdown(report);
152+
153+
if (args.markdownPath) {
154+
writeFileSync(args.markdownPath, markdown, "utf8");
155+
}
156+
157+
if (args.jsonPath) {
158+
writeFileSync(args.jsonPath, JSON.stringify(report, null, 2), "utf8");
159+
}
160+
161+
console.log(markdown);
162+
163+
if (!report.pass && args.failOnRegression) {
164+
process.exit(2);
165+
}
166+
}
167+
168+
main();

0 commit comments

Comments
 (0)