|
| 1 | +name: Manual MPMC Benchmarks |
| 2 | + |
| 3 | +on: |
| 4 | + workflow_dispatch: |
| 5 | + inputs: |
| 6 | + pr_number: |
| 7 | + description: 'Pull request number to benchmark' |
| 8 | + required: true |
| 9 | + type: number |
| 10 | + runner_label: |
| 11 | + description: 'Label for the self-hosted benchmark runner' |
| 12 | + required: true |
| 13 | + type: string |
| 14 | + |
| 15 | +permissions: |
| 16 | + contents: read |
| 17 | + pull-requests: write |
| 18 | + |
| 19 | +env: |
| 20 | + CARGO_TERM_COLOR: always |
| 21 | + CARGO_TARGET_DIR: ${{ github.workspace }}/.bench-target |
| 22 | + |
| 23 | +jobs: |
| 24 | + benchmark: |
| 25 | + name: Bench PR #${{ inputs.pr_number }} |
| 26 | + runs-on: [self-hosted, '${{ inputs.runner_label }}'] |
| 27 | + outputs: |
| 28 | + pr_number: ${{ steps.parse-pr.outputs.pr_number }} |
| 29 | + |
| 30 | + steps: |
| 31 | + - name: Harden Runner |
| 32 | + uses: step-security/harden-runner@95d9a5deda9de15063e7595e9719c11c38c90ae2 # v2.13.2 |
| 33 | + with: |
| 34 | + egress-policy: audit |
| 35 | + |
| 36 | + - name: Parse PR number as integer |
| 37 | + id: parse-pr |
| 38 | + run: | |
| 39 | + # Parse PR number as integer (handle float inputs like "10.0") |
| 40 | + pr_int=$(printf "%.0f" "${{ inputs.pr_number }}") |
| 41 | + echo "pr_number=${pr_int}" >> "$GITHUB_OUTPUT" |
| 42 | +
|
| 43 | + - name: Comment benchmark start on PR |
| 44 | + id: progress-comment |
| 45 | + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 |
| 46 | + with: |
| 47 | + script: | |
| 48 | + // Use the parsed PR number integer |
| 49 | + const prNumber = ${{ steps.parse-pr.outputs.pr_number }}; |
| 50 | + const body = `Benchmark run started on runner label \`${{ inputs.runner_label }}\`...`; |
| 51 | + const { data: comment } = await github.rest.issues.createComment({ |
| 52 | + owner: context.repo.owner, |
| 53 | + repo: context.repo.repo, |
| 54 | + issue_number: prNumber, |
| 55 | + body, |
| 56 | + }); |
| 57 | + core.setOutput('comment_id', comment.id); |
| 58 | +
|
| 59 | + - name: Checkout PR head |
| 60 | + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 |
| 61 | + with: |
| 62 | + ref: refs/pull/${{ steps.parse-pr.outputs.pr_number }}/head |
| 63 | + fetch-depth: 0 |
| 64 | + |
| 65 | + - name: Checkout main for baseline |
| 66 | + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 |
| 67 | + with: |
| 68 | + ref: main |
| 69 | + path: main-baseline |
| 70 | + fetch-depth: 0 |
| 71 | + |
| 72 | + - name: Record main commit |
| 73 | + id: main_sha |
| 74 | + run: echo "sha=$(git -C main-baseline rev-parse HEAD)" >> "$GITHUB_OUTPUT" |
| 75 | + |
| 76 | + - name: Install Rust toolchain |
| 77 | + uses: dtolnay/rust-toolchain@0b1efabc08b657293548b77fb76cc02d26091c7e # stable channel installer |
| 78 | + with: |
| 79 | + toolchain: stable |
| 80 | + |
| 81 | + - name: Cache Rust build artifacts |
| 82 | + uses: Swatinem/rust-cache@f13886b937689c021905a6b90929199931d60db1 # v2.8.1 |
| 83 | + with: |
| 84 | + shared-key: bench-${{ inputs.runner_label }} |
| 85 | + workspaces: | |
| 86 | + . |
| 87 | + main-baseline |
| 88 | + cache-on-failure: true |
| 89 | + |
| 90 | + - name: Restore cached main baseline measurements |
| 91 | + id: baseline-cache |
| 92 | + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 |
| 93 | + with: |
| 94 | + path: ${{ env.CARGO_TARGET_DIR }}/criterion |
| 95 | + key: criterion-main-${{ inputs.runner_label }}-${{ steps.main_sha.outputs.sha }} |
| 96 | + |
| 97 | + - name: Run baseline on main (only if missing) |
| 98 | + if: steps.baseline-cache.outputs.cache-hit != 'true' |
| 99 | + working-directory: main-baseline |
| 100 | + run: cargo bench --bench mpmc-throughput -- --measurement-time 20 --sample-size 50 --save-baseline main |
| 101 | + |
| 102 | + - name: Save baseline cache (before PR run) |
| 103 | + if: steps.baseline-cache.outputs.cache-hit != 'true' |
| 104 | + uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 |
| 105 | + with: |
| 106 | + path: ${{ env.CARGO_TARGET_DIR }}/criterion |
| 107 | + key: criterion-main-${{ inputs.runner_label }}-${{ steps.main_sha.outputs.sha }} |
| 108 | + |
| 109 | + - name: Run benchmarks for PR against main baseline |
| 110 | + id: bench-pr |
| 111 | + run: | |
| 112 | + cargo bench --bench mpmc-throughput -- --measurement-time 20 --sample-size 50 --baseline main 2>&1 | tee bench_output.txt |
| 113 | +
|
| 114 | + - name: Upload criterion reports |
| 115 | + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 |
| 116 | + with: |
| 117 | + name: pr-${{ steps.parse-pr.outputs.pr_number }}-bench-${{ github.run_id }} |
| 118 | + path: ${{ env.CARGO_TARGET_DIR }}/criterion |
| 119 | + |
| 120 | + - name: Parse benchmark results |
| 121 | + id: parse-results |
| 122 | + run: | |
| 123 | + # Parse Criterion output to extract benchmark results |
| 124 | + # Look for lines with "change:" to identify performance deltas |
| 125 | + |
| 126 | + { |
| 127 | + echo "## Benchmark Results" |
| 128 | + echo "" |
| 129 | + echo "| Benchmark | Runtime Change | Status |" |
| 130 | + echo "|-----------|----------------|--------|" |
| 131 | + } > results.md |
| 132 | + |
| 133 | + # Parse the benchmark output |
| 134 | + # Criterion outputs lines like: |
| 135 | + # "mpmc//cap32_payload32_batch1_prod2_cons2" |
| 136 | + # " time: [201.34 ms 206.26 ms 210.69 ms]" |
| 137 | + # " change:" |
| 138 | + # " time: [-8.2272% -5.7068% -3.1975%] (p = 0.00 < 0.05)" |
| 139 | + # " Performance has improved." |
| 140 | + |
| 141 | + awk ' |
| 142 | + BEGIN { |
| 143 | + # Default benchmark group prefix to strip from display names |
| 144 | + benchmark_group = "mpmc//" |
| 145 | + } |
| 146 | + # Match benchmark name lines (lines that start with non-whitespace and contain /) |
| 147 | + /^[^ \t]/ && /\// { |
| 148 | + # If we have a previous benchmark with change data, output it |
| 149 | + if (current_bench != "" && current_change != "") { |
| 150 | + display_name = current_bench |
| 151 | + if (index(display_name, benchmark_group) == 1) { |
| 152 | + display_name = substr(display_name, length(benchmark_group) + 1) |
| 153 | + } |
| 154 | + # Only output if we have a status (no random "Changed" emoji) |
| 155 | + if (current_status != "") { |
| 156 | + print "| `" display_name "` | " current_change " | " current_status " |" |
| 157 | + } |
| 158 | + } |
| 159 | + |
| 160 | + # Start tracking new benchmark |
| 161 | + current_bench = $0 |
| 162 | + current_change = "" |
| 163 | + current_status = "" |
| 164 | + current_time = "" |
| 165 | + time_unit = "" |
| 166 | + } |
| 167 | + # Look for current time in "time:" line before change section |
| 168 | + /^ *time: *\\[/ && !in_change_section { |
| 169 | + # Extract the middle value from [lower middle upper] |
| 170 | + # Format: [201.34 ms 206.26 ms 210.69 ms] |
| 171 | + # After split: [1]=201.34 [2]=ms [3]=206.26 [4]=ms [5]=210.69 [6]=ms |
| 172 | + match($0, /\\[([^]]+)\\]/, arr) |
| 173 | + split(arr[1], times, " ") |
| 174 | + # Verify we have enough elements (at least 4 for value and unit) |
| 175 | + if (length(times) >= 4) { |
| 176 | + current_time = times[3] |
| 177 | + time_unit = times[4] |
| 178 | + } |
| 179 | + } |
| 180 | + # Look for "change:" header line |
| 181 | + /^ *change:$/ { |
| 182 | + in_change_section = 1 |
| 183 | + next |
| 184 | + } |
| 185 | + # After "change:" header, look for the time line with percentages |
| 186 | + in_change_section == 1 && /^ *time: *\\[/ { |
| 187 | + # Extract change percentage range (the middle value from the three values) |
| 188 | + # Format: [-8.2272% -5.7068% -3.1975%] |
| 189 | + # After split: [1]=-8.2272% [2]=-5.7068% [3]=-3.1975% |
| 190 | + match($0, /\\[([^]]+)\\]/, arr) |
| 191 | + split(arr[1], changes, " ") |
| 192 | + |
| 193 | + # Verify we have at least 2 elements |
| 194 | + if (length(changes) >= 2) { |
| 195 | + change_pct = changes[2] |
| 196 | + |
| 197 | + # Calculate actual time change if we have current time |
| 198 | + # The "time:" line before "change:" shows the current (PR) time |
| 199 | + # The change% = (current - baseline) / baseline * 100 |
| 200 | + # So: baseline = current / (1 + change%/100) |
| 201 | + # And: time_delta = current - baseline |
| 202 | + if (current_time != "" && time_unit != "") { |
| 203 | + # Extract percentage value without % sign |
| 204 | + pct_val = change_pct |
| 205 | + gsub(/%/, "", pct_val) |
| 206 | + |
| 207 | + # Guard against division by zero (when change is exactly -100%) |
| 208 | + denominator = 1 + pct_val / 100.0 |
| 209 | + if (denominator != 0) { |
| 210 | + # Calculate the baseline time and delta |
| 211 | + baseline_val = current_time / denominator |
| 212 | + time_delta = current_time - baseline_val |
| 213 | + |
| 214 | + # Format the output with proper sign |
| 215 | + if (time_delta >= 0) { |
| 216 | + current_change = sprintf("+%.2f %s (%s)", time_delta, time_unit, change_pct) |
| 217 | + } else { |
| 218 | + current_change = sprintf("%.2f %s (%s)", time_delta, time_unit, change_pct) |
| 219 | + } |
| 220 | + } else { |
| 221 | + # Edge case: -100% change means time went to zero |
| 222 | + current_change = sprintf("%.2f %s (%s)", -current_time, time_unit, change_pct) |
| 223 | + } |
| 224 | + } else { |
| 225 | + # Fallback to just percentage if we cannot parse time |
| 226 | + current_change = change_pct |
| 227 | + } |
| 228 | + } |
| 229 | + |
| 230 | + in_change_section = 0 |
| 231 | + } |
| 232 | + # Capture status lines that follow change lines |
| 233 | + /Performance has regressed/ { |
| 234 | + current_status = "⚠️ Regressed" |
| 235 | + } |
| 236 | + /Performance has improved/ { |
| 237 | + current_status = "✅ Improved" |
| 238 | + } |
| 239 | + /No change in performance detected/ { |
| 240 | + current_status = "➖ No Change" |
| 241 | + } |
| 242 | + /Change within noise threshold/ { |
| 243 | + current_status = "➖ No Change (within noise)" |
| 244 | + } |
| 245 | + # Handle the last benchmark at end of file |
| 246 | + END { |
| 247 | + if (current_bench != "" && current_change != "") { |
| 248 | + display_name = current_bench |
| 249 | + if (index(display_name, benchmark_group) == 1) { |
| 250 | + display_name = substr(display_name, length(benchmark_group) + 1) |
| 251 | + } |
| 252 | + if (current_status != "") { |
| 253 | + print "| `" display_name "` | " current_change " | " current_status " |" |
| 254 | + } |
| 255 | + } |
| 256 | + } |
| 257 | + ' bench_output.txt >> results.md |
| 258 | + |
| 259 | + # If no results were found (only header lines remain), add a note |
| 260 | + # Count lines that look like benchmark results (containing backticks) |
| 261 | + result_count=$(tail -n +4 results.md | grep -c '`' || true) |
| 262 | + if [ "$result_count" -eq 0 ]; then |
| 263 | + echo "| No benchmark comparisons found | - | - |" >> results.md |
| 264 | + fi |
| 265 | + |
| 266 | + { |
| 267 | + echo "" |
| 268 | + echo "_Baseline: main @ \`${{ steps.main_sha.outputs.sha }}\`_" |
| 269 | + } >> results.md |
| 270 | + |
| 271 | + # Output for next step |
| 272 | + cat results.md |
| 273 | +
|
| 274 | + - name: Comment benchmark artifact on PR |
| 275 | + if: success() |
| 276 | + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 |
| 277 | + with: |
| 278 | + script: | |
| 279 | + const fs = require('fs'); |
| 280 | + // Use the parsed PR number integer |
| 281 | + const prNumber = ${{ steps.parse-pr.outputs.pr_number }}; |
| 282 | + const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; |
| 283 | + const artifactName = `pr-${prNumber}-bench-${context.runId}`; |
| 284 | + |
| 285 | + // Read parsed results |
| 286 | + let resultsTable = ''; |
| 287 | + try { |
| 288 | + resultsTable = fs.readFileSync('results.md', 'utf8'); |
| 289 | + } catch (err) { |
| 290 | + resultsTable = '_No benchmark results could be parsed._'; |
| 291 | + } |
| 292 | + |
| 293 | + const body = [ |
| 294 | + `Benchmark run completed for PR #${prNumber}.`, |
| 295 | + ``, |
| 296 | + resultsTable, |
| 297 | + ``, |
| 298 | + `---`, |
| 299 | + ``, |
| 300 | + `- Run: ${runUrl}`, |
| 301 | + `- Artifact: ${artifactName} (reports + baseline/compare data)` |
| 302 | + ].join('\\n'); |
| 303 | +
|
| 304 | + const commentId = "${{ steps.progress-comment.outputs.comment_id }}"; |
| 305 | + if (commentId) { |
| 306 | + await github.rest.issues.updateComment({ |
| 307 | + owner: context.repo.owner, |
| 308 | + repo: context.repo.repo, |
| 309 | + comment_id: commentId, |
| 310 | + body, |
| 311 | + }); |
| 312 | + } else { |
| 313 | + await github.rest.issues.createComment({ |
| 314 | + owner: context.repo.owner, |
| 315 | + repo: context.repo.repo, |
| 316 | + issue_number: prNumber, |
| 317 | + body, |
| 318 | + }); |
| 319 | + } |
| 320 | +
|
| 321 | + - name: Comment benchmark failure on PR |
| 322 | + if: failure() |
| 323 | + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 |
| 324 | + with: |
| 325 | + script: | |
| 326 | + // Use the parsed PR number integer |
| 327 | + const prNumber = ${{ steps.parse-pr.outputs.pr_number }}; |
| 328 | + const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; |
| 329 | + |
| 330 | + const body = [ |
| 331 | + `⚠️ Benchmark run failed for PR #${prNumber}.`, |
| 332 | + ``, |
| 333 | + `Please check the [workflow run](${runUrl}) for details.` |
| 334 | + ].join('\\n'); |
| 335 | +
|
| 336 | + const commentId = "${{ steps.progress-comment.outputs.comment_id }}"; |
| 337 | + if (commentId) { |
| 338 | + await github.rest.issues.updateComment({ |
| 339 | + owner: context.repo.owner, |
| 340 | + repo: context.repo.repo, |
| 341 | + comment_id: commentId, |
| 342 | + body, |
| 343 | + }); |
| 344 | + } else { |
| 345 | + await github.rest.issues.createComment({ |
| 346 | + owner: context.repo.owner, |
| 347 | + repo: context.repo.repo, |
| 348 | + issue_number: prNumber, |
| 349 | + body, |
| 350 | + }); |
| 351 | + } |
0 commit comments