diff --git a/.github/actions/perf-regression/action.yml b/.github/actions/perf-regression/action.yml new file mode 100644 index 0000000000..1930665cc3 --- /dev/null +++ b/.github/actions/perf-regression/action.yml @@ -0,0 +1,211 @@ +name: Performance Regression (Dump or Check) +description: Runs performance regression tests in dump or check mode. Auto mode decides via PR label on PRs, and via merged PR labels on push to main. + +inputs: + mode: + description: "'dump' | 'check' | 'auto' (auto uses labels)" + required: false + default: auto + label_name: + description: "Label that enables dump mode when present (auto mode)" + required: false + default: dump-performance + + artifact_name: + description: "Artifact name used to store the performance baseline JSON" + required: false + default: perf-regression-json + baseline_path: + description: "Path where the performance baseline JSON is stored" + required: false + default: tests/perf-regression/perf-regression.json + + workflow_name: + description: "Workflow name to search when locating prior runs (defaults to current)" + required: false + default: "" + branch: + description: "Branch to search prior runs on (defaults to main)" + required: false + default: "main" + target_repo: + description: "owner/repo to search in (defaults to current)" + required: false + default: "" + + run_command: + description: "Shell command to run performance tests (reads PERF_MODE env)" + required: false + default: sh run-ci-tests.sh + +runs: + using: "composite" + steps: + - name: Resolve workflow and repository context + shell: bash + env: + IN_WORKFLOW: ${{ inputs.workflow_name }} + IN_BRANCH: ${{ inputs.branch }} + IN_REPO: ${{ inputs.target_repo }} + WF_CTX: ${{ github.workflow }} + REPO_CTX: ${{ github.repository }} + run: | + set -euo pipefail + WF_NAME="${IN_WORKFLOW:-$WF_CTX}" + BRANCH="${IN_BRANCH}" # default is "main" unless caller overrides + REPO="${IN_REPO:-$REPO_CTX}" + + echo "WF_NAME=$WF_NAME" >> "$GITHUB_ENV" + echo "BRANCH=$BRANCH" >> "$GITHUB_ENV" + echo "TARGET_REPO=$REPO" >> "$GITHUB_ENV" + + echo "Using workflow: $WF_NAME" + echo "Searching performance baselines on branch: $BRANCH" + echo "Target repository: $REPO" + + - name: Determine dump or check mode + id: resolve-mode + shell: bash + env: + MODE: ${{ inputs.mode }} + LABEL_NAME: ${{ inputs.label_name }} + GH_TOKEN: ${{ github.token }} + EVENT_NAME: ${{ github.event_name }} + OWNER_REPO: ${{ github.repository }} + PR_NUMBER: ${{ github.event.pull_request.number }} + GIT_REF: ${{ github.ref }} + GIT_SHA: ${{ github.sha }} + run: | + set -euo pipefail + + decide_from_pr_labels () { + local repo="$1" pr="$2" label="$3" + local labels + labels=$(gh api "repos/$repo/issues/$pr/labels" --jq '.[].name' | tr '\n' ' ') + if echo "$labels" | grep -q -w "$label"; then + echo "--dump" + else + echo "--check" + fi + } + + decide_from_merged_pr_labels () { + local repo="$1" sha="$2" label="$3" + local prs should_dump=0 + prs=$(gh api "repos/$repo/commits/$sha/pulls" --jq '.[].number' || true) + for n in $prs; do + local ls + ls=$(gh api "repos/$repo/issues/$n/labels" --jq '.[].name' | tr '\n' ' ') + if echo "$ls" | grep -q -w "$label"; then + should_dump=1; break + fi + done + if [[ $should_dump -eq 1 ]]; then echo "--dump"; else echo "--check"; fi + } + + MODE="${MODE:-auto}" + REASON="" + + if [[ "$MODE" == "dump" ]]; then + PERF_MODE="--dump"; REASON="explicit input" + elif [[ "$MODE" == "check" ]]; then + PERF_MODE="--check"; REASON="explicit input" + else + if [[ "$EVENT_NAME" == "pull_request" ]]; then + PERF_MODE="$(decide_from_pr_labels "$OWNER_REPO" "$PR_NUMBER" "$LABEL_NAME")" + REASON="PR label check" + elif [[ "$EVENT_NAME" == "push" && "$GIT_REF" == "refs/heads/main" ]]; then + PERF_MODE="$(decide_from_merged_pr_labels "$OWNER_REPO" "$GIT_SHA" "$LABEL_NAME")" + REASON="merged PR label check" + else + PERF_MODE="--check"; REASON="default for non-PR, non-main push" + fi + fi + + echo "PERF_MODE=$PERF_MODE" | tee -a "$GITHUB_ENV" + echo "perf_mode=$PERF_MODE" >> "$GITHUB_OUTPUT" + echo "Mode: $PERF_MODE ($REASON)" + + - name: Find prior run with performance baseline artifact (from target branch) + id: find-baseline + if: ${{ steps.resolve-mode.outputs.perf_mode == '--check' }} + shell: bash + env: + GH_TOKEN: ${{ github.token }} + TARGET_REPO: ${{ env.TARGET_REPO }} + WORKFLOW_NAME: ${{ env.WF_NAME }} + BRANCH: ${{ env.BRANCH }} + ARTIFACT_NAME: ${{ inputs.artifact_name }} + run: | + set -euo pipefail + echo "Searching $TARGET_REPO (workflow: $WORKFLOW_NAME, branch: $BRANCH) for artifact $ARTIFACT_NAME" + + RUN_IDS=$(gh run list \ + --repo "$TARGET_REPO" \ + --workflow "$WORKFLOW_NAME" \ + --json databaseId,headBranch,status,conclusion \ + --limit 50 \ + --jq "[.[] | select(.status==\"completed\" and .headBranch==\"$BRANCH\") | .databaseId] | .[]" \ + || true) + + if [[ -z "${RUN_IDS:-}" ]]; then + echo "No completed runs found on branch $BRANCH." + exit 1 + fi + + FOUND="" + for RID in $RUN_IDS; do + HAS=$(gh api "repos/$TARGET_REPO/actions/runs/$RID/artifacts" \ + --jq ".artifacts | map(select(.name==\"$ARTIFACT_NAME\" and .expired==false)) | length") + echo "Run $RID has $HAS matching artifact(s)" + if [[ "$HAS" -gt 0 ]]; then FOUND="$RID"; break; fi + done + + if [[ -z "${FOUND:-}" ]]; then + echo "No suitable run with artifact $ARTIFACT_NAME found on $BRANCH." + exit 1 + fi + + echo "run_id=$FOUND" >> "$GITHUB_OUTPUT" + echo "Using performance baseline from run: $FOUND" + + - name: Download performance baseline artifact + if: ${{ steps.resolve-mode.outputs.perf_mode == '--check' }} + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.artifact_name }} + run-id: ${{ steps.find-baseline.outputs.run_id }} + path: tests/perf-regression + github-token: ${{ github.token }} + + - name: Normalize performance baseline path + if: ${{ steps.resolve-mode.outputs.perf_mode == '--check' }} + shell: bash + env: + DEST: ${{ inputs.baseline_path }} + run: | + set -euo pipefail + FILE="$(find tests/perf-regression -name 'perf-regression.json' -print -quit || true)" + if [[ -z "${FILE:-}" ]]; then + echo "ERROR: perf-regression.json not found in downloaded artifact." + ls -R tests/perf-regression || true + exit 1 + fi + mkdir -p "$(dirname "$DEST")" + [[ "$FILE" != "$DEST" ]] && mv -f "$FILE" "$DEST" + echo "Performance Baseline ready at: $DEST" + + - name: Run performance regression tests + shell: bash + env: + PERF_MODE: ${{ env.PERF_MODE }} + run: ${{ inputs.run_command }} + + - name: Upload new performance baseline (dump mode) + if: ${{ steps.resolve-mode.outputs.perf_mode == '--dump' }} + uses: actions/upload-artifact@v4 + with: + name: ${{ inputs.artifact_name }} + path: ${{ inputs.baseline_path }} + if-no-files-found: error + retention-days: 30 diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 546dc719c0..bb5f0daa21 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -33,6 +33,7 @@ concurrency: permissions: contents: write + actions: read jobs: Prepare: @@ -152,6 +153,7 @@ jobs: 'Verification Key Regression Check 2', 'CommonJS test', 'Cache Regression', + 'Performance Regression', ] steps: - name: Checkout repository with submodules @@ -174,9 +176,24 @@ jobs: - name: Prepare for tests run: touch profiling.md - name: Execute tests + if: matrix.test_type != 'Performance Regression' env: TEST_TYPE: ${{ matrix.test_type }} run: sh run-ci-tests.sh + - name: Performance Regression (dump or check) + if: matrix.test_type == 'Performance Regression' + uses: ./.github/actions/perf-regression + env: + TEST_TYPE: Performance Regression + with: + mode: auto + label_name: dump-performance + artifact_name: perf-regression-json + baseline_path: tests/perf-regression/perf-regression.json + workflow_name: ${{ github.workflow }} + branch: main + target_repo: ${{ inputs.target_repo || github.repository }} + run_command: sh run-ci-tests.sh - name: Add to job summary if: always() run: | diff --git a/run-ci-tests.sh b/run-ci-tests.sh index 213514dc0c..dbc1ee71d3 100755 --- a/run-ci-tests.sh +++ b/run-ci-tests.sh @@ -53,6 +53,12 @@ case $TEST_TYPE in echo "Cache Regression" ./scripts/tests/check-cache-regressions.sh ;; + +"Performance Regression") + echo "Running Performance Regression Check" + PERF_MODE="${PERF_MODE:---check}" + ./tests/perf-regression/perf-regression.sh "$PERF_MODE" + ;; *) echo "ERROR: Invalid environment variable, not clear what tests to run! $CI_NODE_INDEX" exit 1 diff --git a/src/lib/testing/perf-regression.ts b/src/lib/testing/perf-regression.ts index cd754ef085..86bf415eef 100644 --- a/src/lib/testing/perf-regression.ts +++ b/src/lib/testing/perf-regression.ts @@ -26,7 +26,7 @@ import minimist from 'minimist'; import path from 'path'; import { ConstraintSystemSummary } from '../provable/core/provable-context.js'; -export { PerfRegressionEntry, Performance }; +export { PerfRegressionEntry, Performance, logPerf }; type MethodsInfo = Record< string, @@ -358,10 +358,12 @@ function checkAgainstBaseline(params: { // tolerances const compileTol = 1.05; // 5% - const compileTiny = 1.08; // for near-zero baselines + const compileTiny = 1.08; // for near-zero baselines (< 5e-5s) const timeTolDefault = 1.1; // 10% for prove/verify const timeTolSmall = 1.25; // 25% for very small times (<0.2s) + const labelPretty = label[0].toUpperCase() + label.slice(1); + if (label === 'compile') { const expected = baseline.compileTime; if (expected == null) { @@ -369,15 +371,21 @@ function checkAgainstBaseline(params: { `No baseline compileTime for "${programName}". Run --dump (compile) to set it.` ); } + const tol = expected < 5e-5 ? compileTiny : compileTol; const allowedPct = (tol - 1) * 100; + const regressionPct = expected === 0 ? 0 : ((actualTime - expected) / expected) * 100; + const failed = actualTime > expected * tol; - if (actualTime > expected * tol) { - const regressionPct = ((actualTime - expected) / expected) * 100; + // colorized perf log + logPerf(programName, label, expected, actualTime, regressionPct, allowedPct, failed); + + if (failed) { throw new Error( `Compile regression for ${programName}\n` + - ` Actual: ${actualTime.toFixed(6)}s\n` + - ` Regression: +${regressionPct.toFixed(2)}% (allowed +${allowedPct.toFixed(0)}%)` + ` Actual: ${actualTime.toFixed(6)}s\n` + + ` Baseline: ${expected.toFixed(6)}s\n` + + ` Regression: +${Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'}% (allowed +${allowedPct.toFixed(0)}%)` ); } return; @@ -390,6 +398,7 @@ function checkAgainstBaseline(params: { `No baseline method entry for ${programName}.${methodName}. Run --dump (${label}) to add it.` ); } + if (baseMethod.digest !== digest) { throw new Error( `Digest mismatch for ${programName}.${methodName}\n` + @@ -399,21 +408,65 @@ function checkAgainstBaseline(params: { } const expected = label === 'prove' ? baseMethod.proveTime : baseMethod.verifyTime; - const labelPretty = label.charAt(0).toUpperCase(); if (expected == null) { throw new Error( `No baseline ${label}Time for ${programName}.${methodName}. Run --dump (${label}) to set it.` ); } + const tol = expected < 0.2 ? timeTolSmall : timeTolDefault; const allowedPct = (tol - 1) * 100; + const regressionPct = expected === 0 ? 0 : ((actualTime - expected) / expected) * 100; + const failed = actualTime > expected * tol; + + logPerf( + `${programName}.${methodName}`, + label, + expected, + actualTime, + regressionPct, + allowedPct, + failed + ); - if (actualTime > expected * tol) { - const regressionPct = ((actualTime - expected) / expected) * 100; + if (failed) { throw new Error( `${labelPretty} regression for ${programName}.${methodName}\n` + - ` Actual: ${actualTime.toFixed(3)}s\n` + - ` Regression: +${regressionPct.toFixed(2)}% (allowed +${allowedPct.toFixed(0)}%)` + ` Actual: ${actualTime.toFixed(3)}s\n` + + ` Baseline: ${expected.toFixed(3)}s\n` + + ` Regression: +${Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'}% (allowed +${allowedPct.toFixed(0)}%)` ); } } + +function logPerf( + scope: string, + label: string, + expected: number, + actual: number, + regressionPct: number, + allowedPct: number, + failed: boolean +) { + const COLORS = { + reset: '\x1b[0m', + red: '\x1b[31m', + green: '\x1b[32m', + yellow: '\x1b[33m', + cyan: '\x1b[36m', + }; + + let color: string; + if (failed) color = COLORS.red; + else if (regressionPct > 0) color = COLORS.yellow; + else color = COLORS.green; + + console.log( + `${COLORS.cyan}[Perf][${scope}]${COLORS.reset} ${label}: ` + + `baseline=${expected.toFixed(6)}s, actual=${actual.toFixed(6)}s, ` + + `${color}regression=${regressionPct >= 0 ? '+' : ''}${ + Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞' + }%${COLORS.reset} ` + + `(allowed +${allowedPct.toFixed(0)}%)` + ); +} diff --git a/tests/perf-regression/perf-regression.ts b/tests/perf-regression/perf-regression.ts index 171f050ac7..3f445657f9 100644 --- a/tests/perf-regression/perf-regression.ts +++ b/tests/perf-regression/perf-regression.ts @@ -20,7 +20,7 @@ import { TokenContract, createDex } from '../../src/examples/zkapps/dex/dex.js'; import { HelloWorld } from '../../src/examples/zkapps/hello-world/hello-world.js'; import { Membership_ } from '../../src/examples/zkapps/voting/membership.js'; import { Voting_ } from '../../src/examples/zkapps/voting/voting.js'; -import { PerfRegressionEntry } from '../../src/lib/testing/perf-regression.js'; +import { PerfRegressionEntry, logPerf } from '../../src/lib/testing/perf-regression.js'; import { tic, toc } from '../../src/lib/util/tic-toc.js'; import { BasicCS, @@ -118,14 +118,37 @@ async function checkPerf(contracts: MinimumConstraintSystem[]) { continue; } - const tolerance = expectedCompile < 5e-5 ? 1.08 : 1.05; - const allowedPct = (tolerance - 1) * 100; + // Tiered tolerances: + // < 0.00001s → 45% + // 0.00001s ≤ t < 0.0001s → 30% + // ≥ 0.0001s → 20% + let allowedPct: number; + if (expectedCompile < 1e-5) { + allowedPct = 45; + } else if (expectedCompile < 1e-4) { + allowedPct = 30; + } else { + allowedPct = 20; + } + const tolerance = 1 + allowedPct / 100; + + const regressionPct = + expectedCompile === 0 + ? compileTime === 0 + ? 0 + : Infinity + : ((compileTime - expectedCompile) / expectedCompile) * 100; + + // colorized log using imported utility + const failed = compileTime > expectedCompile * tolerance; + logPerf(c.name, 'compile', expectedCompile, compileTime, regressionPct, allowedPct, failed); - if (compileTime > expectedCompile * tolerance) { - const regressionPct = ((compileTime - expectedCompile) / expectedCompile) * 100; + // handle failure + if (failed) { errorStack += `\n\nCompile regression for ${c.name} Actual: ${compileTime.toFixed(6)}s - Regression: +${regressionPct.toFixed(2)}% (allowed +${allowedPct.toFixed(0)}%)`; + Baseline: ${expectedCompile.toFixed(6)}s + Regression: +${Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'}% (allowed +${allowedPct.toFixed(0)}%)`; } }