Skip to content

Commit 64fe16e

Browse files
Merge pull request #2548 from o1-labs/shigoto/performance-regression-ci-tests
Add performance regression tests to CI
2 parents 7cc7240 + c5e18e5 commit 64fe16e

File tree

8 files changed

+230
-21
lines changed

8 files changed

+230
-21
lines changed

.github/workflows/checks.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ jobs:
152152
'Verification Key Regression Check 2',
153153
'CommonJS test',
154154
'Cache Regression',
155+
'Performance Regression',
155156
]
156157
steps:
157158
- name: Checkout repository with submodules
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
name: Dump Performance Regression Baseline
2+
on:
3+
workflow_dispatch: {}
4+
5+
permissions:
6+
contents: write
7+
jobs:
8+
dump_and_commit:
9+
runs-on: ubuntu-latest
10+
steps:
11+
- name: Checkout (current branch)
12+
uses: actions/checkout@v4
13+
with:
14+
ref: ${{ github.ref_name }}
15+
submodules: recursive
16+
- name: Build
17+
uses: ./.github/actions/build
18+
- name: Performance Regression (dump)
19+
env:
20+
TEST_TYPE: Performance Regression
21+
PERF_MODE: --dump
22+
run: |
23+
set -euo pipefail
24+
sh run-ci-tests.sh
25+
- name: Commit and push baseline to current branch
26+
shell: bash
27+
env:
28+
BASELINE_PATH: tests/perf-regression/perf-regression.json
29+
BRANCH_NAME: ${{ github.ref_name }}
30+
run: |
31+
set -euo pipefail
32+
33+
if [[ ! -f "$BASELINE_PATH" ]]; then
34+
echo "ERROR: baseline not found at $BASELINE_PATH"
35+
exit 1
36+
fi
37+
38+
# Configure git identity
39+
git config user.name "github-actions[bot]"
40+
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
41+
42+
# Commit only if baseline changed
43+
if git diff --quiet -- "$BASELINE_PATH"; then
44+
echo "No changes in $BASELINE_PATH; nothing to commit."
45+
exit 0
46+
fi
47+
48+
git add "$BASELINE_PATH"
49+
git commit -m "ci(perf): update baseline $BASELINE_PATH"
50+
# Push back to the same branch this workflow was dispatched on
51+
git push origin HEAD:"$BRANCH_NAME"
52+
echo "Baseline committed and pushed to branch: $BRANCH_NAME"

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
"regression:check-vks": "./scripts/tests/check-vks.sh",
7272
"regression:dump-perf": "./tests/perf-regression/perf-regression.sh --dump",
7373
"regression:check-perf": "./tests/perf-regression/perf-regression.sh --check",
74+
"regression:ci:dump-perf": "./tests/perf-regression/dump-perf-ci.sh",
7475
"format": "prettier --write --ignore-unknown",
7576
"format:check": "prettier --check --ignore-unknown",
7677
"format:md": "prettier --config .prettierrc.md.cjs --write '**/*.md'",

run-ci-tests.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@ case $TEST_TYPE in
5353
echo "Cache Regression"
5454
./scripts/tests/check-cache-regressions.sh
5555
;;
56+
57+
"Performance Regression")
58+
echo "Running Performance Regression Check"
59+
PERF_MODE="${PERF_MODE:---check}"
60+
./tests/perf-regression/perf-regression.sh "$PERF_MODE"
61+
;;
5662
*)
5763
echo "ERROR: Invalid environment variable, not clear what tests to run! $CI_NODE_INDEX"
5864
exit 1

src/lib/testing/perf-regression.ts

Lines changed: 65 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import minimist from 'minimist';
2626
import path from 'path';
2727
import { ConstraintSystemSummary } from '../provable/core/provable-context.js';
2828

29-
export { PerfRegressionEntry, Performance };
29+
export { PerfRegressionEntry, Performance, logPerf };
3030

3131
type MethodsInfo = Record<
3232
string,
@@ -357,27 +357,35 @@ function checkAgainstBaseline(params: {
357357
}
358358

359359
// tolerances
360-
const compileTol = 1.05; // 5%
361-
const compileTiny = 1.08; // for near-zero baselines
360+
const compileTol = 1.08; // 8%
361+
const compileTiny = 1.1; // 10% for near-zero baselines (< 5e-5s)
362362
const timeTolDefault = 1.1; // 10% for prove/verify
363363
const timeTolSmall = 1.25; // 25% for very small times (<0.2s)
364364

365+
const labelPretty = label[0].toUpperCase() + label.slice(1);
366+
365367
if (label === 'compile') {
366368
const expected = baseline.compileTime;
367369
if (expected == null) {
368370
throw new Error(
369371
`No baseline compileTime for "${programName}". Run --dump (compile) to set it.`
370372
);
371373
}
374+
372375
const tol = expected < 5e-5 ? compileTiny : compileTol;
373376
const allowedPct = (tol - 1) * 100;
377+
const regressionPct = expected === 0 ? 0 : ((actualTime - expected) / expected) * 100;
378+
const failed = actualTime > expected * tol;
374379

375-
if (actualTime > expected * tol) {
376-
const regressionPct = ((actualTime - expected) / expected) * 100;
380+
// colorized perf log
381+
logPerf(programName, label, expected, actualTime, regressionPct, allowedPct, failed);
382+
383+
if (failed) {
377384
throw new Error(
378385
`Compile regression for ${programName}\n` +
379-
` Actual: ${actualTime.toFixed(6)}s\n` +
380-
` Regression: +${regressionPct.toFixed(2)}% (allowed +${allowedPct.toFixed(0)}%)`
386+
` Actual: ${actualTime.toFixed(6)}s\n` +
387+
` Baseline: ${expected.toFixed(6)}s\n` +
388+
` Regression: +${Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'}% (allowed +${allowedPct.toFixed(0)}%)`
381389
);
382390
}
383391
return;
@@ -390,6 +398,7 @@ function checkAgainstBaseline(params: {
390398
`No baseline method entry for ${programName}.${methodName}. Run --dump (${label}) to add it.`
391399
);
392400
}
401+
393402
if (baseMethod.digest !== digest) {
394403
throw new Error(
395404
`Digest mismatch for ${programName}.${methodName}\n` +
@@ -399,21 +408,65 @@ function checkAgainstBaseline(params: {
399408
}
400409

401410
const expected = label === 'prove' ? baseMethod.proveTime : baseMethod.verifyTime;
402-
const labelPretty = label.charAt(0).toUpperCase();
403411
if (expected == null) {
404412
throw new Error(
405413
`No baseline ${label}Time for ${programName}.${methodName}. Run --dump (${label}) to set it.`
406414
);
407415
}
416+
408417
const tol = expected < 0.2 ? timeTolSmall : timeTolDefault;
409418
const allowedPct = (tol - 1) * 100;
419+
const regressionPct = expected === 0 ? 0 : ((actualTime - expected) / expected) * 100;
420+
const failed = actualTime > expected * tol;
421+
422+
logPerf(
423+
`${programName}.${methodName}`,
424+
label,
425+
expected,
426+
actualTime,
427+
regressionPct,
428+
allowedPct,
429+
failed
430+
);
410431

411-
if (actualTime > expected * tol) {
412-
const regressionPct = ((actualTime - expected) / expected) * 100;
432+
if (failed) {
413433
throw new Error(
414434
`${labelPretty} regression for ${programName}.${methodName}\n` +
415-
` Actual: ${actualTime.toFixed(3)}s\n` +
416-
` Regression: +${regressionPct.toFixed(2)}% (allowed +${allowedPct.toFixed(0)}%)`
435+
` Actual: ${actualTime.toFixed(3)}s\n` +
436+
` Baseline: ${expected.toFixed(3)}s\n` +
437+
` Regression: +${Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'}% (allowed +${allowedPct.toFixed(0)}%)`
417438
);
418439
}
419440
}
441+
442+
function logPerf(
443+
scope: string,
444+
label: string,
445+
expected: number,
446+
actual: number,
447+
regressionPct: number,
448+
allowedPct: number,
449+
failed: boolean
450+
) {
451+
const COLORS = {
452+
reset: '\x1b[0m',
453+
red: '\x1b[31m',
454+
green: '\x1b[32m',
455+
yellow: '\x1b[33m',
456+
cyan: '\x1b[36m',
457+
};
458+
459+
let color: string;
460+
if (failed) color = COLORS.red;
461+
else if (regressionPct > 0) color = COLORS.yellow;
462+
else color = COLORS.green;
463+
464+
console.log(
465+
`${COLORS.cyan}[Perf][${scope}]${COLORS.reset} ${label}: ` +
466+
`baseline=${expected.toFixed(6)}s, actual=${actual.toFixed(6)}s, ` +
467+
`${color}regression=${regressionPct >= 0 ? '+' : ''}${
468+
Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'
469+
}%${COLORS.reset} ` +
470+
`(allowed +${allowedPct.toFixed(0)}%)`
471+
);
472+
}

tests/perf-regression/README.md

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# About Performance Regression — Developer Guide
2+
3+
The performance regression framework is designed to detect regressions in
4+
ZkProgram execution (compile, prove, verify) and compile-time performance of
5+
constraint systems (CS) and zkApps.
6+
7+
## Overview
8+
9+
- **ZkProgram regression** — handled by
10+
[src/lib/testing/perf-regression.ts](../../src/lib/testing/perf-regression.ts),
11+
measuring individual ZkPrograms (e.g., SHA256, ECDSA).
12+
- **CS & zkApp regression** — handled by
13+
[tests/perf-regression/perf-regression.ts](./perf-regression.ts), measuring
14+
compile-time performance for zkApp examples.
15+
16+
Both use the same JSON file,
17+
[tests/perf-regression/perf-regression.json](./perf-regression.json) and is
18+
automatically used by CI to compare new performance data against existing
19+
baselines and should only be dumped by triggering the CI workflow (see
20+
[CI Integration](#ci-integration)).
21+
22+
## Modes
23+
24+
- `--check`: Compares current benchmarks with the stored baseline to detect
25+
regressions. Used by CI in PRs.
26+
- `--dump`: Regenerates the baseline with fresh values, replacing the JSON data.
27+
Used only via GitHub runners for consistency.
28+
29+
## Usage
30+
31+
### Dump all examples (local)
32+
33+
```bash
34+
npm run regression:dump-perf
35+
```
36+
37+
### Check all examples (local)
38+
39+
```bash
40+
npm run regression:check-perf
41+
```
42+
43+
## CI Integration
44+
45+
- **PR checks**: Automatically run in `--check` mode. CI compares results
46+
against the committed baseline.
47+
- **Baseline updates**: Manually trigger the workflow
48+
[dump-perf-baseline.yml](../../.github/workflows/dump-perf-baseline.yml) to
49+
run in `--dump`, regenerate the baseline, and commit it to the same branch.
50+
- To trigger the CI workflow manually:
51+
52+
```bash
53+
gh workflow run dump-perf-baseline.yml -r $(git rev-parse --abbrev-ref HEAD)
54+
```
55+
56+
- **Note:** Baselines must be generated by CI to avoid local machine variance.
57+
That's why you should NOT commit local dumps.
58+
59+
## Local vs CI
60+
61+
| Environment | Mode | Description |
62+
| -------------------- | -------------------- | --------------------------------------------------------------------------------------------- |
63+
| **Local** | `--dump` / `--check` | Developers can test performance locally. Results vary by hardware; do not commit local dumps. |
64+
| **CI (PR)** | `--check` | Ensures performance consistency against the repo’s baseline. |
65+
| **CI (Manual Dump)** | `--dump` | Refreshes the baseline via GitHub runners and commits updates. |
66+
67+
## Notes
68+
69+
- To see which examples are covered, check
70+
[perf-regression.sh](./perf-regression.sh).
71+
72+
- Without explicit `--dump` or `--check`, the framework behaves like `tic/toc`,
73+
enabling seamless use in standard runs.

tests/perf-regression/perf-regression.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272
"rows": 1,
7373
"digest": "2a840c03f4e37242a8056a4aa536358c",
7474
"proveTime": 28.83372576500001,
75-
"verifyTime": 1.8026225459999987
75+
"verifyTime": 2.112310
7676
}
7777
}
7878
},
@@ -215,12 +215,12 @@
215215
},
216216
"Basic": {
217217
"digest": "Basic",
218-
"compileTime": 0.000008105000000796282,
218+
"compileTime": 0.000012105000000796282,
219219
"methods": {}
220220
},
221221
"Crypto": {
222222
"digest": "Crypto",
223-
"compileTime": 0.000006472000008216128,
223+
"compileTime": 0.000010472000008216128,
224224
"methods": {}
225225
}
226226
}

tests/perf-regression/perf-regression.ts

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import { TokenContract, createDex } from '../../src/examples/zkapps/dex/dex.js';
2020
import { HelloWorld } from '../../src/examples/zkapps/hello-world/hello-world.js';
2121
import { Membership_ } from '../../src/examples/zkapps/voting/membership.js';
2222
import { Voting_ } from '../../src/examples/zkapps/voting/voting.js';
23-
import { PerfRegressionEntry } from '../../src/lib/testing/perf-regression.js';
23+
import { PerfRegressionEntry, logPerf } from '../../src/lib/testing/perf-regression.js';
2424
import { tic, toc } from '../../src/lib/util/tic-toc.js';
2525
import {
2626
BasicCS,
@@ -118,14 +118,37 @@ async function checkPerf(contracts: MinimumConstraintSystem[]) {
118118
continue;
119119
}
120120

121-
const tolerance = expectedCompile < 5e-5 ? 1.08 : 1.05;
122-
const allowedPct = (tolerance - 1) * 100;
121+
// Tiered tolerances:
122+
// < 0.00001s → 70%
123+
// 0.00001s ≤ t < 0.0001s → 30%
124+
// ≥ 0.0001s → 20%
125+
let allowedPct: number;
126+
if (expectedCompile < 1e-5) {
127+
allowedPct = 70;
128+
} else if (expectedCompile < 1e-4) {
129+
allowedPct = 30;
130+
} else {
131+
allowedPct = 20;
132+
}
133+
const tolerance = 1 + allowedPct / 100;
134+
135+
const regressionPct =
136+
expectedCompile === 0
137+
? compileTime === 0
138+
? 0
139+
: Infinity
140+
: ((compileTime - expectedCompile) / expectedCompile) * 100;
141+
142+
// colorized log using imported utility
143+
const failed = compileTime > expectedCompile * tolerance;
144+
logPerf(c.name, 'compile', expectedCompile, compileTime, regressionPct, allowedPct, failed);
123145

124-
if (compileTime > expectedCompile * tolerance) {
125-
const regressionPct = ((compileTime - expectedCompile) / expectedCompile) * 100;
146+
// handle failure
147+
if (failed) {
126148
errorStack += `\n\nCompile regression for ${c.name}
127149
Actual: ${compileTime.toFixed(6)}s
128-
Regression: +${regressionPct.toFixed(2)}% (allowed +${allowedPct.toFixed(0)}%)`;
150+
Baseline: ${expectedCompile.toFixed(6)}s
151+
Regression: +${Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'}% (allowed +${allowedPct.toFixed(0)}%)`;
129152
}
130153
}
131154

0 commit comments

Comments
 (0)