Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
211 changes: 211 additions & 0 deletions .github/actions/perf-regression/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
name: Performance Regression (Dump or Check)
description: Runs performance regression tests in dump or check mode. Auto mode decides via PR label on PRs, and via merged PR labels on push to main.

inputs:
mode:
description: "'dump' | 'check' | 'auto' (auto uses labels)"
required: false
default: auto
label_name:
description: "Label that enables dump mode when present (auto mode)"
required: false
default: dump-performance

artifact_name:
description: "Artifact name used to store the performance baseline JSON"
required: false
default: perf-regression-json
baseline_path:
description: "Path where the performance baseline JSON is stored"
required: false
default: tests/perf-regression/perf-regression.json

workflow_name:
description: "Workflow name to search when locating prior runs (defaults to current)"
required: false
default: ""
branch:
description: "Branch to search prior runs on (defaults to main)"
required: false
default: "main"
target_repo:
description: "owner/repo to search in (defaults to current)"
required: false
default: ""

run_command:
description: "Shell command to run performance tests (reads PERF_MODE env)"
required: false
default: sh run-ci-tests.sh

runs:
using: "composite"
steps:
- name: Resolve workflow and repository context
shell: bash
env:
IN_WORKFLOW: ${{ inputs.workflow_name }}
IN_BRANCH: ${{ inputs.branch }}
IN_REPO: ${{ inputs.target_repo }}
WF_CTX: ${{ github.workflow }}
REPO_CTX: ${{ github.repository }}
run: |
set -euo pipefail

Check warning on line 53 in .github/actions/perf-regression/action.yml

View workflow job for this annotation

GitHub Actions / Lint-Format-and-TypoCheck

Unknown word (pipefail)
WF_NAME="${IN_WORKFLOW:-$WF_CTX}"
BRANCH="${IN_BRANCH}" # default is "main" unless caller overrides
REPO="${IN_REPO:-$REPO_CTX}"

echo "WF_NAME=$WF_NAME" >> "$GITHUB_ENV"
echo "BRANCH=$BRANCH" >> "$GITHUB_ENV"
echo "TARGET_REPO=$REPO" >> "$GITHUB_ENV"

echo "Using workflow: $WF_NAME"
echo "Searching performance baselines on branch: $BRANCH"
echo "Target repository: $REPO"

- name: Determine dump or check mode
id: resolve-mode
shell: bash
env:
MODE: ${{ inputs.mode }}
LABEL_NAME: ${{ inputs.label_name }}
GH_TOKEN: ${{ github.token }}
EVENT_NAME: ${{ github.event_name }}
OWNER_REPO: ${{ github.repository }}
PR_NUMBER: ${{ github.event.pull_request.number }}
GIT_REF: ${{ github.ref }}
GIT_SHA: ${{ github.sha }}
run: |
set -euo pipefail

Check warning on line 79 in .github/actions/perf-regression/action.yml

View workflow job for this annotation

GitHub Actions / Lint-Format-and-TypoCheck

Unknown word (pipefail)

decide_from_pr_labels () {
local repo="$1" pr="$2" label="$3"
local labels
labels=$(gh api "repos/$repo/issues/$pr/labels" --jq '.[].name' | tr '\n' ' ')
if echo "$labels" | grep -q -w "$label"; then
echo "--dump"
else
echo "--check"
fi
}

decide_from_merged_pr_labels () {
local repo="$1" sha="$2" label="$3"
local prs should_dump=0
prs=$(gh api "repos/$repo/commits/$sha/pulls" --jq '.[].number' || true)
for n in $prs; do
local ls
ls=$(gh api "repos/$repo/issues/$n/labels" --jq '.[].name' | tr '\n' ' ')
if echo "$ls" | grep -q -w "$label"; then
should_dump=1; break
fi
done
if [[ $should_dump -eq 1 ]]; then echo "--dump"; else echo "--check"; fi
}

MODE="${MODE:-auto}"
REASON=""

if [[ "$MODE" == "dump" ]]; then
PERF_MODE="--dump"; REASON="explicit input"
elif [[ "$MODE" == "check" ]]; then

Check warning on line 111 in .github/actions/perf-regression/action.yml

View workflow job for this annotation

GitHub Actions / Lint-Format-and-TypoCheck

Unknown word (elif)
PERF_MODE="--check"; REASON="explicit input"
else
if [[ "$EVENT_NAME" == "pull_request" ]]; then
PERF_MODE="$(decide_from_pr_labels "$OWNER_REPO" "$PR_NUMBER" "$LABEL_NAME")"
REASON="PR label check"
elif [[ "$EVENT_NAME" == "push" && "$GIT_REF" == "refs/heads/main" ]]; then

Check warning on line 117 in .github/actions/perf-regression/action.yml

View workflow job for this annotation

GitHub Actions / Lint-Format-and-TypoCheck

Unknown word (elif)
PERF_MODE="$(decide_from_merged_pr_labels "$OWNER_REPO" "$GIT_SHA" "$LABEL_NAME")"
REASON="merged PR label check"
else
PERF_MODE="--check"; REASON="default for non-PR, non-main push"
fi
fi

echo "PERF_MODE=$PERF_MODE" | tee -a "$GITHUB_ENV"
echo "perf_mode=$PERF_MODE" >> "$GITHUB_OUTPUT"
echo "Mode: $PERF_MODE ($REASON)"

- name: Find prior run with performance baseline artifact (from target branch)
id: find-baseline
if: ${{ steps.resolve-mode.outputs.perf_mode == '--check' }}
shell: bash
env:
GH_TOKEN: ${{ github.token }}
TARGET_REPO: ${{ env.TARGET_REPO }}
WORKFLOW_NAME: ${{ env.WF_NAME }}
BRANCH: ${{ env.BRANCH }}
ARTIFACT_NAME: ${{ inputs.artifact_name }}
run: |
set -euo pipefail

Check warning on line 140 in .github/actions/perf-regression/action.yml

View workflow job for this annotation

GitHub Actions / Lint-Format-and-TypoCheck

Unknown word (pipefail)
echo "Searching $TARGET_REPO (workflow: $WORKFLOW_NAME, branch: $BRANCH) for artifact $ARTIFACT_NAME"

RUN_IDS=$(gh run list \
--repo "$TARGET_REPO" \
--workflow "$WORKFLOW_NAME" \
--json databaseId,headBranch,status,conclusion \
--limit 50 \
--jq "[.[] | select(.status==\"completed\" and .headBranch==\"$BRANCH\") | .databaseId] | .[]" \
|| true)

if [[ -z "${RUN_IDS:-}" ]]; then
echo "No completed runs found on branch $BRANCH."
exit 1
fi

FOUND=""
for RID in $RUN_IDS; do
HAS=$(gh api "repos/$TARGET_REPO/actions/runs/$RID/artifacts" \
--jq ".artifacts | map(select(.name==\"$ARTIFACT_NAME\" and .expired==false)) | length")
echo "Run $RID has $HAS matching artifact(s)"
if [[ "$HAS" -gt 0 ]]; then FOUND="$RID"; break; fi
done

if [[ -z "${FOUND:-}" ]]; then
echo "No suitable run with artifact $ARTIFACT_NAME found on $BRANCH."
exit 1
fi

echo "run_id=$FOUND" >> "$GITHUB_OUTPUT"
echo "Using performance baseline from run: $FOUND"

- name: Download performance baseline artifact
if: ${{ steps.resolve-mode.outputs.perf_mode == '--check' }}
uses: actions/download-artifact@v4
with:
name: ${{ inputs.artifact_name }}
run-id: ${{ steps.find-baseline.outputs.run_id }}
path: tests/perf-regression
github-token: ${{ github.token }}

- name: Normalize performance baseline path
if: ${{ steps.resolve-mode.outputs.perf_mode == '--check' }}
shell: bash
env:
DEST: ${{ inputs.baseline_path }}
run: |
set -euo pipefail

Check warning on line 187 in .github/actions/perf-regression/action.yml

View workflow job for this annotation

GitHub Actions / Lint-Format-and-TypoCheck

Unknown word (pipefail)
FILE="$(find tests/perf-regression -name 'perf-regression.json' -print -quit || true)"
if [[ -z "${FILE:-}" ]]; then
echo "ERROR: perf-regression.json not found in downloaded artifact."
ls -R tests/perf-regression || true
exit 1
fi
mkdir -p "$(dirname "$DEST")"
[[ "$FILE" != "$DEST" ]] && mv -f "$FILE" "$DEST"
echo "Performance Baseline ready at: $DEST"

- name: Run performance regression tests
shell: bash
env:
PERF_MODE: ${{ env.PERF_MODE }}
run: ${{ inputs.run_command }}

- name: Upload new performance baseline (dump mode)
if: ${{ steps.resolve-mode.outputs.perf_mode == '--dump' }}
uses: actions/upload-artifact@v4
with:
name: ${{ inputs.artifact_name }}
path: ${{ inputs.baseline_path }}
if-no-files-found: error
retention-days: 30
17 changes: 17 additions & 0 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

permissions:
contents: write
actions: read

jobs:
Prepare:
Expand All @@ -44,7 +45,7 @@
sudo rm -rf /usr/local/lib/android \
/usr/share/dotnet \
/opt/ghc \
"$AGENT_TOOLSDIRECTORY" || true

Check warning on line 48 in .github/workflows/checks.yml

View workflow job for this annotation

GitHub Actions / Lint-Format-and-TypoCheck

Unknown word (TOOLSDIRECTORY)
docker system prune -af --volumes || true
df -h
- name: Checkout repository with submodules
Expand Down Expand Up @@ -119,7 +120,7 @@
sudo rm -rf /usr/local/lib/android \
/usr/share/dotnet \
/opt/ghc \
"$AGENT_TOOLSDIRECTORY" || true

Check warning on line 123 in .github/workflows/checks.yml

View workflow job for this annotation

GitHub Actions / Lint-Format-and-TypoCheck

Unknown word (TOOLSDIRECTORY)
docker system prune -af --volumes || true
df -h
- uses: actions/checkout@v4
Expand Down Expand Up @@ -152,6 +153,7 @@
'Verification Key Regression Check 2',
'CommonJS test',
'Cache Regression',
'Performance Regression',
]
steps:
- name: Checkout repository with submodules
Expand All @@ -174,9 +176,24 @@
- name: Prepare for tests
run: touch profiling.md
- name: Execute tests
if: matrix.test_type != 'Performance Regression'
env:
TEST_TYPE: ${{ matrix.test_type }}
run: sh run-ci-tests.sh
- name: Performance Regression (dump or check)
if: matrix.test_type == 'Performance Regression'
uses: ./.github/actions/perf-regression
env:
TEST_TYPE: Performance Regression
with:
mode: auto
label_name: dump-performance
artifact_name: perf-regression-json
baseline_path: tests/perf-regression/perf-regression.json
workflow_name: ${{ github.workflow }}
branch: main
target_repo: ${{ inputs.target_repo || github.repository }}
run_command: sh run-ci-tests.sh
- name: Add to job summary
if: always()
run: |
Expand Down Expand Up @@ -233,10 +250,10 @@

echo "Running tests from index $start_index to $end_index"

shopt -s globstar

Check warning on line 253 in .github/workflows/checks.yml

View workflow job for this annotation

GitHub Actions / Lint-Format-and-TypoCheck

Unknown word (shopt)
test_files=(./dist/node/**/*.unit-test.js)

set -o pipefail

Check warning on line 256 in .github/workflows/checks.yml

View workflow job for this annotation

GitHub Actions / Lint-Format-and-TypoCheck

Unknown word (pipefail)

for ((i=start_index; i<end_index && i<${#test_files[@]}; i++)); do
echo "Running test: ${test_files[$i]}"
Expand Down
6 changes: 6 additions & 0 deletions run-ci-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ case $TEST_TYPE in
echo "Cache Regression"
./scripts/tests/check-cache-regressions.sh
;;

"Performance Regression")
echo "Running Performance Regression Check"
PERF_MODE="${PERF_MODE:---check}"
./tests/perf-regression/perf-regression.sh "$PERF_MODE"
;;
*)
echo "ERROR: Invalid environment variable, not clear what tests to run! $CI_NODE_INDEX"
exit 1
Expand Down
75 changes: 64 additions & 11 deletions src/lib/testing/perf-regression.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import minimist from 'minimist';
import path from 'path';
import { ConstraintSystemSummary } from '../provable/core/provable-context.js';

export { PerfRegressionEntry, Performance };
export { PerfRegressionEntry, Performance, logPerf };

type MethodsInfo = Record<
string,
Expand Down Expand Up @@ -358,26 +358,34 @@ function checkAgainstBaseline(params: {

// tolerances
const compileTol = 1.05; // 5%
const compileTiny = 1.08; // for near-zero baselines
const compileTiny = 1.08; // for near-zero baselines (< 5e-5s)
const timeTolDefault = 1.1; // 10% for prove/verify
const timeTolSmall = 1.25; // 25% for very small times (<0.2s)

const labelPretty = label[0].toUpperCase() + label.slice(1);

if (label === 'compile') {
const expected = baseline.compileTime;
if (expected == null) {
throw new Error(
`No baseline compileTime for "${programName}". Run --dump (compile) to set it.`
);
}

const tol = expected < 5e-5 ? compileTiny : compileTol;
const allowedPct = (tol - 1) * 100;
const regressionPct = expected === 0 ? 0 : ((actualTime - expected) / expected) * 100;
const failed = actualTime > expected * tol;

if (actualTime > expected * tol) {
const regressionPct = ((actualTime - expected) / expected) * 100;
// colorized perf log
logPerf(programName, label, expected, actualTime, regressionPct, allowedPct, failed);

if (failed) {
throw new Error(
`Compile regression for ${programName}\n` +
` Actual: ${actualTime.toFixed(6)}s\n` +
` Regression: +${regressionPct.toFixed(2)}% (allowed +${allowedPct.toFixed(0)}%)`
` Actual: ${actualTime.toFixed(6)}s\n` +
` Baseline: ${expected.toFixed(6)}s\n` +
` Regression: +${Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'}% (allowed +${allowedPct.toFixed(0)}%)`
);
}
return;
Expand All @@ -390,6 +398,7 @@ function checkAgainstBaseline(params: {
`No baseline method entry for ${programName}.${methodName}. Run --dump (${label}) to add it.`
);
}

if (baseMethod.digest !== digest) {
throw new Error(
`Digest mismatch for ${programName}.${methodName}\n` +
Expand All @@ -399,21 +408,65 @@ function checkAgainstBaseline(params: {
}

const expected = label === 'prove' ? baseMethod.proveTime : baseMethod.verifyTime;
const labelPretty = label.charAt(0).toUpperCase();
if (expected == null) {
throw new Error(
`No baseline ${label}Time for ${programName}.${methodName}. Run --dump (${label}) to set it.`
);
}

const tol = expected < 0.2 ? timeTolSmall : timeTolDefault;
const allowedPct = (tol - 1) * 100;
const regressionPct = expected === 0 ? 0 : ((actualTime - expected) / expected) * 100;
const failed = actualTime > expected * tol;

logPerf(
`${programName}.${methodName}`,
label,
expected,
actualTime,
regressionPct,
allowedPct,
failed
);

if (actualTime > expected * tol) {
const regressionPct = ((actualTime - expected) / expected) * 100;
if (failed) {
throw new Error(
`${labelPretty} regression for ${programName}.${methodName}\n` +
` Actual: ${actualTime.toFixed(3)}s\n` +
` Regression: +${regressionPct.toFixed(2)}% (allowed +${allowedPct.toFixed(0)}%)`
` Actual: ${actualTime.toFixed(3)}s\n` +
` Baseline: ${expected.toFixed(3)}s\n` +
` Regression: +${Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'}% (allowed +${allowedPct.toFixed(0)}%)`
);
}
}

function logPerf(
scope: string,
label: string,
expected: number,
actual: number,
regressionPct: number,
allowedPct: number,
failed: boolean
) {
const COLORS = {
reset: '\x1b[0m',
red: '\x1b[31m',
green: '\x1b[32m',
yellow: '\x1b[33m',
cyan: '\x1b[36m',
};

let color: string;
if (failed) color = COLORS.red;
else if (regressionPct > 0) color = COLORS.yellow;
else color = COLORS.green;

console.log(
`${COLORS.cyan}[Perf][${scope}]${COLORS.reset} ${label}: ` +
`baseline=${expected.toFixed(6)}s, actual=${actual.toFixed(6)}s, ` +
`${color}regression=${regressionPct >= 0 ? '+' : ''}${
Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'
}%${COLORS.reset} ` +
`(allowed +${allowedPct.toFixed(0)}%)`
);
}
Loading
Loading