Merge pull request #2548 from o1-labs/shigoto/performance-regression-ci-tests

Shigoto-dev19 · web-flow · commit 64fe16ea7c5e · 2025-10-22T16:24:04.000+03:00
Add performance regression tests to CI
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -152,6 +152,7 @@ jobs:
             'Verification Key Regression Check 2',
             'CommonJS test',
             'Cache Regression',
+            'Performance Regression',
           ]
     steps:
       - name: Checkout repository with submodules
diff --git a/.github/workflows/dump-perf-baseline.yml b/.github/workflows/dump-perf-baseline.yml
@@ -0,0 +1,52 @@
+name: Dump Performance Regression Baseline
+on:
+  workflow_dispatch: {}
+
+permissions:
+  contents: write
+jobs:
+  dump_and_commit:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout (current branch)
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.ref_name }}
+          submodules: recursive
+      - name: Build
+        uses: ./.github/actions/build
+      - name: Performance Regression (dump)
+        env:
+          TEST_TYPE: Performance Regression
+          PERF_MODE: --dump
+        run: |
+          set -euo pipefail
+          sh run-ci-tests.sh
+      - name: Commit and push baseline to current branch
+        shell: bash
+        env:
+          BASELINE_PATH: tests/perf-regression/perf-regression.json
+          BRANCH_NAME: ${{ github.ref_name }}
+        run: |
+          set -euo pipefail
+
+          if [[ ! -f "$BASELINE_PATH" ]]; then
+            echo "ERROR: baseline not found at $BASELINE_PATH"
+            exit 1
+          fi
+
+          # Configure git identity
+          git config user.name  "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+
+          # Commit only if baseline changed
+          if git diff --quiet -- "$BASELINE_PATH"; then
+            echo "No changes in $BASELINE_PATH; nothing to commit."
+            exit 0
+          fi
+
+          git add "$BASELINE_PATH"
+          git commit -m "ci(perf): update baseline $BASELINE_PATH"
+          # Push back to the same branch this workflow was dispatched on
+          git push origin HEAD:"$BRANCH_NAME"
+          echo "Baseline committed and pushed to branch: $BRANCH_NAME"
diff --git a/package.json b/package.json
@@ -71,6 +71,7 @@
     "regression:check-vks": "./scripts/tests/check-vks.sh",
     "regression:dump-perf": "./tests/perf-regression/perf-regression.sh --dump",
     "regression:check-perf": "./tests/perf-regression/perf-regression.sh --check",
+    "regression:ci:dump-perf": "./tests/perf-regression/dump-perf-ci.sh",
     "format": "prettier --write --ignore-unknown",
     "format:check": "prettier --check --ignore-unknown",
     "format:md": "prettier --config .prettierrc.md.cjs --write '**/*.md'",
diff --git a/run-ci-tests.sh b/run-ci-tests.sh
@@ -53,6 +53,12 @@ case $TEST_TYPE in
   echo "Cache Regression"
   ./scripts/tests/check-cache-regressions.sh
   ;;
+  
+"Performance Regression")
+  echo "Running Performance Regression Check"
+  PERF_MODE="${PERF_MODE:---check}"
+  ./tests/perf-regression/perf-regression.sh "$PERF_MODE"
+  ;;
 *)
   echo "ERROR: Invalid environment variable, not clear what tests to run! $CI_NODE_INDEX"
   exit 1
diff --git a/src/lib/testing/perf-regression.ts b/src/lib/testing/perf-regression.ts
@@ -26,7 +26,7 @@ import minimist from 'minimist';
 import path from 'path';
 import { ConstraintSystemSummary } from '../provable/core/provable-context.js';
 
-export { PerfRegressionEntry, Performance };
+export { PerfRegressionEntry, Performance, logPerf };
 
 type MethodsInfo = Record<
   string,
@@ -357,27 +357,35 @@ function checkAgainstBaseline(params: {
   }
 
   // tolerances
-  const compileTol = 1.05; // 5%
-  const compileTiny = 1.08; // for near-zero baselines
+  const compileTol = 1.08; // 8%
+  const compileTiny = 1.1; // 10% for near-zero baselines (< 5e-5s)
   const timeTolDefault = 1.1; // 10% for prove/verify
   const timeTolSmall = 1.25; // 25% for very small times (<0.2s)
 
+  const labelPretty = label[0].toUpperCase() + label.slice(1);
+
   if (label === 'compile') {
     const expected = baseline.compileTime;
     if (expected == null) {
       throw new Error(
         `No baseline compileTime for "${programName}". Run --dump (compile) to set it.`
       );
     }
+
     const tol = expected < 5e-5 ? compileTiny : compileTol;
     const allowedPct = (tol - 1) * 100;
+    const regressionPct = expected === 0 ? 0 : ((actualTime - expected) / expected) * 100;
+    const failed = actualTime > expected * tol;
 
-    if (actualTime > expected * tol) {
-      const regressionPct = ((actualTime - expected) / expected) * 100;
+    // colorized perf log
+    logPerf(programName, label, expected, actualTime, regressionPct, allowedPct, failed);
+
+    if (failed) {
       throw new Error(
         `Compile regression for ${programName}\n` +
-          `  Actual:   ${actualTime.toFixed(6)}s\n` +
-          `  Regression: +${regressionPct.toFixed(2)}% (allowed +${allowedPct.toFixed(0)}%)`
+          `  Actual:     ${actualTime.toFixed(6)}s\n` +
+          `  Baseline:   ${expected.toFixed(6)}s\n` +
+          `  Regression: +${Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'}% (allowed +${allowedPct.toFixed(0)}%)`
       );
     }
     return;
@@ -390,6 +398,7 @@ function checkAgainstBaseline(params: {
       `No baseline method entry for ${programName}.${methodName}. Run --dump (${label}) to add it.`
     );
   }
+
   if (baseMethod.digest !== digest) {
     throw new Error(
       `Digest mismatch for ${programName}.${methodName}\n` +
@@ -399,21 +408,65 @@ function checkAgainstBaseline(params: {
   }
 
   const expected = label === 'prove' ? baseMethod.proveTime : baseMethod.verifyTime;
-  const labelPretty = label.charAt(0).toUpperCase();
   if (expected == null) {
     throw new Error(
       `No baseline ${label}Time for ${programName}.${methodName}. Run --dump (${label}) to set it.`
     );
   }
+
   const tol = expected < 0.2 ? timeTolSmall : timeTolDefault;
   const allowedPct = (tol - 1) * 100;
+  const regressionPct = expected === 0 ? 0 : ((actualTime - expected) / expected) * 100;
+  const failed = actualTime > expected * tol;
+
+  logPerf(
+    `${programName}.${methodName}`,
+    label,
+    expected,
+    actualTime,
+    regressionPct,
+    allowedPct,
+    failed
+  );
 
-  if (actualTime > expected * tol) {
-    const regressionPct = ((actualTime - expected) / expected) * 100;
+  if (failed) {
     throw new Error(
       `${labelPretty} regression for ${programName}.${methodName}\n` +
-        `  Actual:   ${actualTime.toFixed(3)}s\n` +
-        `  Regression: +${regressionPct.toFixed(2)}% (allowed +${allowedPct.toFixed(0)}%)`
+        `  Actual:     ${actualTime.toFixed(3)}s\n` +
+        `  Baseline:   ${expected.toFixed(3)}s\n` +
+        `  Regression: +${Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'}% (allowed +${allowedPct.toFixed(0)}%)`
     );
   }
 }
+
+function logPerf(
+  scope: string,
+  label: string,
+  expected: number,
+  actual: number,
+  regressionPct: number,
+  allowedPct: number,
+  failed: boolean
+) {
+  const COLORS = {
+    reset: '\x1b[0m',
+    red: '\x1b[31m',
+    green: '\x1b[32m',
+    yellow: '\x1b[33m',
+    cyan: '\x1b[36m',
+  };
+
+  let color: string;
+  if (failed) color = COLORS.red;
+  else if (regressionPct > 0) color = COLORS.yellow;
+  else color = COLORS.green;
+
+  console.log(
+    `${COLORS.cyan}[Perf][${scope}]${COLORS.reset} ${label}: ` +
+      `baseline=${expected.toFixed(6)}s, actual=${actual.toFixed(6)}s, ` +
+      `${color}regression=${regressionPct >= 0 ? '+' : ''}${
+        Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'
+      }%${COLORS.reset} ` +
+      `(allowed +${allowedPct.toFixed(0)}%)`
+  );
+}
diff --git a/tests/perf-regression/README.md b/tests/perf-regression/README.md
@@ -0,0 +1,73 @@
+# About Performance Regression — Developer Guide
+
+The performance regression framework is designed to detect regressions in
+ZkProgram execution (compile, prove, verify) and compile-time performance of
+constraint systems (CS) and zkApps.
+
+## Overview
+
+- **ZkProgram regression** — handled by
+  [src/lib/testing/perf-regression.ts](../../src/lib/testing/perf-regression.ts),
+  measuring individual ZkPrograms (e.g., SHA256, ECDSA).
+- **CS & zkApp regression** — handled by
+  [tests/perf-regression/perf-regression.ts](./perf-regression.ts), measuring
+  compile-time performance for zkApp examples.
+
+Both use the same JSON file,
+[tests/perf-regression/perf-regression.json](./perf-regression.json) and is
+automatically used by CI to compare new performance data against existing
+baselines and should only be dumped by triggering the CI workflow (see
+[CI Integration](#ci-integration)).
+
+## Modes
+
+- `--check`: Compares current benchmarks with the stored baseline to detect
+  regressions. Used by CI in PRs.
+- `--dump`: Regenerates the baseline with fresh values, replacing the JSON data.
+  Used only via GitHub runners for consistency.
+
+## Usage
+
+### Dump all examples (local)
+
+```bash
+npm run regression:dump-perf
+```
+
+### Check all examples (local)
+
+```bash
+npm run regression:check-perf
+```
+
+## CI Integration
+
+- **PR checks**: Automatically run in `--check` mode. CI compares results
+  against the committed baseline.
+- **Baseline updates**: Manually trigger the workflow
+  [dump-perf-baseline.yml](../../.github/workflows/dump-perf-baseline.yml) to
+  run in `--dump`, regenerate the baseline, and commit it to the same branch.
+  - To trigger the CI workflow manually:
+
+    ```bash
+    gh workflow run dump-perf-baseline.yml -r $(git rev-parse --abbrev-ref HEAD)
+    ```
+
+  - **Note:** Baselines must be generated by CI to avoid local machine variance.
+    That's why you should NOT commit local dumps.
+
+## Local vs CI
+
+| Environment          | Mode                 | Description                                                                                   |
+| -------------------- | -------------------- | --------------------------------------------------------------------------------------------- |
+| **Local**            | `--dump` / `--check` | Developers can test performance locally. Results vary by hardware; do not commit local dumps. |
+| **CI (PR)**          | `--check`            | Ensures performance consistency against the repo’s baseline.                                  |
+| **CI (Manual Dump)** | `--dump`             | Refreshes the baseline via GitHub runners and commits updates.                                |
+
+## Notes
+
+- To see which examples are covered, check
+  [perf-regression.sh](./perf-regression.sh).
+
+- Without explicit `--dump` or `--check`, the framework behaves like `tic/toc`,
+  enabling seamless use in standard runs.
diff --git a/tests/perf-regression/perf-regression.json b/tests/perf-regression/perf-regression.json
@@ -72,7 +72,7 @@
         "rows": 1,
         "digest": "2a840c03f4e37242a8056a4aa536358c",
         "proveTime": 28.83372576500001,
-        "verifyTime": 1.8026225459999987
+        "verifyTime": 2.112310
       }
     }
   },
@@ -215,12 +215,12 @@
   },
   "Basic": {
     "digest": "Basic",
-    "compileTime": 0.000008105000000796282,
+    "compileTime": 0.000012105000000796282,
     "methods": {}
   },
   "Crypto": {
     "digest": "Crypto",
-    "compileTime": 0.000006472000008216128,
+    "compileTime": 0.000010472000008216128,
     "methods": {}
   }
 }
diff --git a/tests/perf-regression/perf-regression.ts b/tests/perf-regression/perf-regression.ts
@@ -20,7 +20,7 @@ import { TokenContract, createDex } from '../../src/examples/zkapps/dex/dex.js';
 import { HelloWorld } from '../../src/examples/zkapps/hello-world/hello-world.js';
 import { Membership_ } from '../../src/examples/zkapps/voting/membership.js';
 import { Voting_ } from '../../src/examples/zkapps/voting/voting.js';
-import { PerfRegressionEntry } from '../../src/lib/testing/perf-regression.js';
+import { PerfRegressionEntry, logPerf } from '../../src/lib/testing/perf-regression.js';
 import { tic, toc } from '../../src/lib/util/tic-toc.js';
 import {
   BasicCS,
@@ -118,14 +118,37 @@ async function checkPerf(contracts: MinimumConstraintSystem[]) {
       continue;
     }
 
-    const tolerance = expectedCompile < 5e-5 ? 1.08 : 1.05;
-    const allowedPct = (tolerance - 1) * 100;
+    // Tiered tolerances:
+    // < 0.00001s → 70%
+    // 0.00001s ≤ t < 0.0001s → 30%
+    // ≥ 0.0001s → 20%
+    let allowedPct: number;
+    if (expectedCompile < 1e-5) {
+      allowedPct = 70;
+    } else if (expectedCompile < 1e-4) {
+      allowedPct = 30;
+    } else {
+      allowedPct = 20;
+    }
+    const tolerance = 1 + allowedPct / 100;
+
+    const regressionPct =
+      expectedCompile === 0
+        ? compileTime === 0
+          ? 0
+          : Infinity
+        : ((compileTime - expectedCompile) / expectedCompile) * 100;
+
+    // colorized log using imported utility
+    const failed = compileTime > expectedCompile * tolerance;
+    logPerf(c.name, 'compile', expectedCompile, compileTime, regressionPct, allowedPct, failed);
 
-    if (compileTime > expectedCompile * tolerance) {
-      const regressionPct = ((compileTime - expectedCompile) / expectedCompile) * 100;
+    // handle failure
+    if (failed) {
       errorStack += `\n\nCompile regression for ${c.name}
   Actual:     ${compileTime.toFixed(6)}s
-  Regression: +${regressionPct.toFixed(2)}% (allowed +${allowedPct.toFixed(0)}%)`;
+  Baseline:   ${expectedCompile.toFixed(6)}s
+  Regression: +${Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'}% (allowed +${allowedPct.toFixed(0)}%)`;
     }
   }
 

Original file line number	Diff line number	Diff line change
`@@ -152,6 +152,7 @@ jobs:`
`152`	`152`	`'Verification Key Regression Check 2',`
`153`	`153`	`'CommonJS test',`
`154`	`154`	`'Cache Regression',`
	`155`	`+ 'Performance Regression',`
`155`	`156`	`]`
`156`	`157`	`steps:`
`157`	`158`	`- name: Checkout repository with submodules`
Original file line number	Diff line number	Diff line change
`@@ -72,7 +72,7 @@`
`72`	`72`	`"rows": 1,`
`73`	`73`	`"digest": "2a840c03f4e37242a8056a4aa536358c",`
`74`	`74`	`"proveTime": 28.83372576500001,`
`75`		`- "verifyTime": 1.8026225459999987`
	`75`	`+ "verifyTime": 2.112310`
`76`	`76`	`}`
`77`	`77`	`}`
`78`	`78`	`},`
`@@ -215,12 +215,12 @@`
`215`	`215`	`},`
`216`	`216`	`"Basic": {`
`217`	`217`	`"digest": "Basic",`
`218`		`- "compileTime": 0.000008105000000796282,`
	`218`	`+ "compileTime": 0.000012105000000796282,`
`219`	`219`	`"methods": {}`
`220`	`220`	`},`
`221`	`221`	`"Crypto": {`
`222`	`222`	`"digest": "Crypto",`
`223`		`- "compileTime": 0.000006472000008216128,`
	`223`	`+ "compileTime": 0.000010472000008216128,`
`224`	`224`	`"methods": {}`
`225`	`225`	`}`
`226`	`226`	`}`