o1-labs · Shigoto-dev19 · Oct 14, 2025 · Oct 14, 2025 · Oct 15, 2025 · Oct 15, 2025
@@ -0,0 +1,211 @@
+name: Performance Regression (Dump or Check)
+description: Runs performance regression tests in dump or check mode. Auto mode decides via PR label on PRs, and via merged PR labels on push to main.
+
+inputs:
+  mode:
+    description: "'dump' | 'check' | 'auto' (auto uses labels)"
+    required: false
+    default: auto
+  label_name:
+    description: "Label that enables dump mode when present (auto mode)"
+    required: false
+    default: dump-performance
+
+  artifact_name:
+    description: "Artifact name used to store the performance baseline JSON"
+    required: false
+    default: perf-regression-json
+  baseline_path:
+    description: "Path where the performance baseline JSON is stored"
+    required: false
+    default: tests/perf-regression/perf-regression.json
+
+  workflow_name:
+    description: "Workflow name to search when locating prior runs (defaults to current)"
+    required: false
+    default: ""
+  branch:
+    description: "Branch to search prior runs on (defaults to main)"
+    required: false
+    default: "main"
+  target_repo:
+    description: "owner/repo to search in (defaults to current)"
+    required: false
+    default: ""
+
+  run_command:
+    description: "Shell command to run performance tests (reads PERF_MODE env)"
+    required: false
+    default: sh run-ci-tests.sh
+
+runs:
+  using: "composite"
+  steps:
+    - name: Resolve workflow and repository context
+      shell: bash
+      env:
+        IN_WORKFLOW: ${{ inputs.workflow_name }}
+        IN_BRANCH: ${{ inputs.branch }}
+        IN_REPO: ${{ inputs.target_repo }}
+        WF_CTX: ${{ github.workflow }}
+        REPO_CTX: ${{ github.repository }}
+      run: |
+        set -euo pipefail
+        WF_NAME="${IN_WORKFLOW:-$WF_CTX}"
+        BRANCH="${IN_BRANCH}"   # default is "main" unless caller overrides
+        REPO="${IN_REPO:-$REPO_CTX}"
+
+        echo "WF_NAME=$WF_NAME" >> "$GITHUB_ENV"
+        echo "BRANCH=$BRANCH" >> "$GITHUB_ENV"
+        echo "TARGET_REPO=$REPO" >> "$GITHUB_ENV"
+
+        echo "Using workflow: $WF_NAME"
+        echo "Searching performance baselines on branch: $BRANCH"
+        echo "Target repository: $REPO"
+
+    - name: Determine dump or check mode
+      id: resolve-mode
+      shell: bash
+      env:
+        MODE: ${{ inputs.mode }}
+        LABEL_NAME: ${{ inputs.label_name }}
+        GH_TOKEN: ${{ github.token }}
+        EVENT_NAME: ${{ github.event_name }}
+        OWNER_REPO: ${{ github.repository }}
+        PR_NUMBER: ${{ github.event.pull_request.number }}
+        GIT_REF: ${{ github.ref }}
+        GIT_SHA: ${{ github.sha }}
+      run: |
+        set -euo pipefail
+
+        decide_from_pr_labels () {
+          local repo="$1" pr="$2" label="$3"
+          local labels
+          labels=$(gh api "repos/$repo/issues/$pr/labels" --jq '.[].name' | tr '\n' ' ')
+          if echo "$labels" | grep -q -w "$label"; then
+            echo "--dump"
+          else
+            echo "--check"
+          fi
+        }
+
+        decide_from_merged_pr_labels () {
+          local repo="$1" sha="$2" label="$3"
+          local prs should_dump=0
+          prs=$(gh api "repos/$repo/commits/$sha/pulls" --jq '.[].number' || true)
+          for n in $prs; do
+            local ls
+            ls=$(gh api "repos/$repo/issues/$n/labels" --jq '.[].name' | tr '\n' ' ')
+            if echo "$ls" | grep -q -w "$label"; then
+              should_dump=1; break
+            fi
+          done
+          if [[ $should_dump -eq 1 ]]; then echo "--dump"; else echo "--check"; fi
+        }
+
+        MODE="${MODE:-auto}"
+        REASON=""
+
+        if [[ "$MODE" == "dump" ]]; then
+          PERF_MODE="--dump"; REASON="explicit input"
+        elif [[ "$MODE" == "check" ]]; then
+          PERF_MODE="--check"; REASON="explicit input"
+        else
+          if [[ "$EVENT_NAME" == "pull_request" ]]; then
+            PERF_MODE="$(decide_from_pr_labels "$OWNER_REPO" "$PR_NUMBER" "$LABEL_NAME")"
+            REASON="PR label check"
+          elif [[ "$EVENT_NAME" == "push" && "$GIT_REF" == "refs/heads/main" ]]; then
+            PERF_MODE="$(decide_from_merged_pr_labels "$OWNER_REPO" "$GIT_SHA" "$LABEL_NAME")"
+            REASON="merged PR label check"
+          else
+            PERF_MODE="--check"; REASON="default for non-PR, non-main push"
+          fi
+        fi
+
+        echo "PERF_MODE=$PERF_MODE" | tee -a "$GITHUB_ENV"
+        echo "perf_mode=$PERF_MODE" >> "$GITHUB_OUTPUT"
+        echo "Mode: $PERF_MODE ($REASON)"
+
+    - name: Find prior run with performance baseline artifact (from target branch)
+      id: find-baseline
+      if: ${{ steps.resolve-mode.outputs.perf_mode == '--check' }}
+      shell: bash
+      env:
+        GH_TOKEN: ${{ github.token }}
+        TARGET_REPO: ${{ env.TARGET_REPO }}
+        WORKFLOW_NAME: ${{ env.WF_NAME }}
+        BRANCH: ${{ env.BRANCH }}
+        ARTIFACT_NAME: ${{ inputs.artifact_name }}
+      run: |
+        set -euo pipefail
+        echo "Searching $TARGET_REPO (workflow: $WORKFLOW_NAME, branch: $BRANCH) for artifact $ARTIFACT_NAME"
+
+        RUN_IDS=$(gh run list \
+          --repo "$TARGET_REPO" \
+          --workflow "$WORKFLOW_NAME" \
+          --json databaseId,headBranch,status,conclusion \
+          --limit 50 \
+          --jq "[.[] | select(.status==\"completed\" and .headBranch==\"$BRANCH\") | .databaseId] | .[]" \
+          || true)
+
+        if [[ -z "${RUN_IDS:-}" ]]; then
+          echo "No completed runs found on branch $BRANCH."
+          exit 1
+        fi
+
+        FOUND=""
+        for RID in $RUN_IDS; do
+          HAS=$(gh api "repos/$TARGET_REPO/actions/runs/$RID/artifacts" \
+            --jq ".artifacts | map(select(.name==\"$ARTIFACT_NAME\" and .expired==false)) | length")
+          echo "Run $RID has $HAS matching artifact(s)"
+          if [[ "$HAS" -gt 0 ]]; then FOUND="$RID"; break; fi
+        done
+
+        if [[ -z "${FOUND:-}" ]]; then
+          echo "No suitable run with artifact $ARTIFACT_NAME found on $BRANCH."
+          exit 1
+        fi
+
+        echo "run_id=$FOUND" >> "$GITHUB_OUTPUT"
+        echo "Using performance baseline from run: $FOUND"
+
+    - name: Download performance baseline artifact
+      if: ${{ steps.resolve-mode.outputs.perf_mode == '--check' }}
+      uses: actions/download-artifact@v4
+      with:
+        name: ${{ inputs.artifact_name }}
+        run-id: ${{ steps.find-baseline.outputs.run_id }}
+        path: tests/perf-regression
+        github-token: ${{ github.token }}
+
+    - name: Normalize performance baseline path
+      if: ${{ steps.resolve-mode.outputs.perf_mode == '--check' }}
+      shell: bash
+      env:
+        DEST: ${{ inputs.baseline_path }}
+      run: |
+        set -euo pipefail
+        FILE="$(find tests/perf-regression -name 'perf-regression.json' -print -quit || true)"
+        if [[ -z "${FILE:-}" ]]; then
+          echo "ERROR: perf-regression.json not found in downloaded artifact."
+          ls -R tests/perf-regression || true
+          exit 1
+        fi
+        mkdir -p "$(dirname "$DEST")"
+        [[ "$FILE" != "$DEST" ]] && mv -f "$FILE" "$DEST"
+        echo "Performance Baseline ready at: $DEST"
+
+    - name: Run performance regression tests
+      shell: bash
+      env:
+        PERF_MODE: ${{ env.PERF_MODE }}
+      run: ${{ inputs.run_command }}
+
+    - name: Upload new performance baseline (dump mode)
+      if: ${{ steps.resolve-mode.outputs.perf_mode == '--dump' }}
+      uses: actions/upload-artifact@v4
+      with:
+        name: ${{ inputs.artifact_name }}
+        path: ${{ inputs.baseline_path }}
+        if-no-files-found: error
+        retention-days: 30
@@ -33,6 +33,7 @@
 
 permissions:
   contents: write
+  actions: read
 
 jobs:
   Prepare:
@@ -44,7 +45,7 @@
          sudo rm -rf /usr/local/lib/android \
                      /usr/share/dotnet \
                      /opt/ghc \
                      "$AGENT_TOOLSDIRECTORY" || true
          docker system prune -af --volumes || true
          df -h
      - name: Checkout repository with submodules
@@ -119,7 +120,7 @@
          sudo rm -rf /usr/local/lib/android \
                      /usr/share/dotnet \
                      /opt/ghc \
                      "$AGENT_TOOLSDIRECTORY" || true
          docker system prune -af --volumes || true
          df -h
      - uses: actions/checkout@v4
@@ -152,6 +153,7 @@
             'Verification Key Regression Check 2',
             'CommonJS test',
             'Cache Regression',
+            'Performance Regression',
           ]
     steps:
       - name: Checkout repository with submodules
@@ -174,9 +176,24 @@
       - name: Prepare for tests
         run: touch profiling.md
       - name: Execute tests
+        if: matrix.test_type != 'Performance Regression'
         env:
           TEST_TYPE: ${{ matrix.test_type }}
         run: sh run-ci-tests.sh
+      - name: Performance Regression (dump or check)
+        if: matrix.test_type == 'Performance Regression'
+        uses: ./.github/actions/perf-regression
+        env:
+          TEST_TYPE: Performance Regression
+        with:
+          mode: auto
+          label_name: dump-performance
+          artifact_name: perf-regression-json
+          baseline_path: tests/perf-regression/perf-regression.json
+          workflow_name: ${{ github.workflow }}
+          branch: main               
+          target_repo: ${{ inputs.target_repo || github.repository }}
+          run_command: sh run-ci-tests.sh
       - name: Add to job summary
         if: always()
         run: |
@@ -233,10 +250,10 @@

          echo "Running tests from index $start_index to $end_index"

          shopt -s globstar
          test_files=(./dist/node/**/*.unit-test.js)

          set -o pipefail

          for ((i=start_index; i<end_index && i<${#test_files[@]}; i++)); do
              echo "Running test: ${test_files[$i]}"

@@ -53,6 +53,12 @@ case $TEST_TYPE in
   echo "Cache Regression"
   ./scripts/tests/check-cache-regressions.sh
   ;;
+
+"Performance Regression")
+  echo "Running Performance Regression Check"
+  PERF_MODE="${PERF_MODE:---check}"
+  ./tests/perf-regression/perf-regression.sh "$PERF_MODE"
+  ;;
 *)
   echo "ERROR: Invalid environment variable, not clear what tests to run! $CI_NODE_INDEX"
   exit 1

@@ -26,7 +26,7 @@ import minimist from 'minimist';
 import path from 'path';
 import { ConstraintSystemSummary } from '../provable/core/provable-context.js';
 
-export { PerfRegressionEntry, Performance };
+export { PerfRegressionEntry, Performance, logPerf };
 
 type MethodsInfo = Record<
   string,
@@ -358,26 +358,34 @@ function checkAgainstBaseline(params: {
 
   // tolerances
   const compileTol = 1.05; // 5%
-  const compileTiny = 1.08; // for near-zero baselines
+  const compileTiny = 1.08; // for near-zero baselines (< 5e-5s)
   const timeTolDefault = 1.1; // 10% for prove/verify
   const timeTolSmall = 1.25; // 25% for very small times (<0.2s)
 
+  const labelPretty = label[0].toUpperCase() + label.slice(1);
+
   if (label === 'compile') {
     const expected = baseline.compileTime;
     if (expected == null) {
       throw new Error(
         `No baseline compileTime for "${programName}". Run --dump (compile) to set it.`
       );
     }
+
     const tol = expected < 5e-5 ? compileTiny : compileTol;
     const allowedPct = (tol - 1) * 100;
+    const regressionPct = expected === 0 ? 0 : ((actualTime - expected) / expected) * 100;
+    const failed = actualTime > expected * tol;
 
-    if (actualTime > expected * tol) {
-      const regressionPct = ((actualTime - expected) / expected) * 100;
+    // colorized perf log
+    logPerf(programName, label, expected, actualTime, regressionPct, allowedPct, failed);
+
+    if (failed) {
       throw new Error(
         `Compile regression for ${programName}\n` +
-          `  Actual:   ${actualTime.toFixed(6)}s\n` +
-          `  Regression: +${regressionPct.toFixed(2)}% (allowed +${allowedPct.toFixed(0)}%)`
+          `  Actual:     ${actualTime.toFixed(6)}s\n` +
+          `  Baseline:   ${expected.toFixed(6)}s\n` +
+          `  Regression: +${Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'}% (allowed +${allowedPct.toFixed(0)}%)`
       );
     }
     return;
@@ -390,6 +398,7 @@ function checkAgainstBaseline(params: {
       `No baseline method entry for ${programName}.${methodName}. Run --dump (${label}) to add it.`
     );
   }
+
   if (baseMethod.digest !== digest) {
     throw new Error(
       `Digest mismatch for ${programName}.${methodName}\n` +
@@ -399,21 +408,65 @@ function checkAgainstBaseline(params: {
   }
 
   const expected = label === 'prove' ? baseMethod.proveTime : baseMethod.verifyTime;
-  const labelPretty = label.charAt(0).toUpperCase();
   if (expected == null) {
     throw new Error(
       `No baseline ${label}Time for ${programName}.${methodName}. Run --dump (${label}) to set it.`
     );
   }
+
   const tol = expected < 0.2 ? timeTolSmall : timeTolDefault;
   const allowedPct = (tol - 1) * 100;
+  const regressionPct = expected === 0 ? 0 : ((actualTime - expected) / expected) * 100;
+  const failed = actualTime > expected * tol;
+
+  logPerf(
+    `${programName}.${methodName}`,
+    label,
+    expected,
+    actualTime,
+    regressionPct,
+    allowedPct,
+    failed
+  );
 
-  if (actualTime > expected * tol) {
-    const regressionPct = ((actualTime - expected) / expected) * 100;
+  if (failed) {
     throw new Error(
       `${labelPretty} regression for ${programName}.${methodName}\n` +
-        `  Actual:   ${actualTime.toFixed(3)}s\n` +
-        `  Regression: +${regressionPct.toFixed(2)}% (allowed +${allowedPct.toFixed(0)}%)`
+        `  Actual:     ${actualTime.toFixed(3)}s\n` +
+        `  Baseline:   ${expected.toFixed(3)}s\n` +
+        `  Regression: +${Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'}% (allowed +${allowedPct.toFixed(0)}%)`
     );
   }
 }
+
+function logPerf(
+  scope: string,
+  label: string,
+  expected: number,
+  actual: number,
+  regressionPct: number,
+  allowedPct: number,
+  failed: boolean
+) {
+  const COLORS = {
+    reset: '\x1b[0m',
+    red: '\x1b[31m',
+    green: '\x1b[32m',
+    yellow: '\x1b[33m',
+    cyan: '\x1b[36m',
+  };
+
+  let color: string;
+  if (failed) color = COLORS.red;
+  else if (regressionPct > 0) color = COLORS.yellow;
+  else color = COLORS.green;
+
+  console.log(
+    `${COLORS.cyan}[Perf][${scope}]${COLORS.reset} ${label}: ` +
+      `baseline=${expected.toFixed(6)}s, actual=${actual.toFixed(6)}s, ` +
+      `${color}regression=${regressionPct >= 0 ? '+' : ''}${
+        Number.isFinite(regressionPct) ? regressionPct.toFixed(2) : '∞'
+      }%${COLORS.reset} ` +
+      `(allowed +${allowedPct.toFixed(0)}%)`
+  );
+}