Skip to content

Commit 6fbbd4d

Browse files
committed
chore: PR bechmarking
1 parent 02c1d7b commit 6fbbd4d

File tree

4 files changed

+1220
-0
lines changed

4 files changed

+1220
-0
lines changed

.github/workflows/benchmark.yml

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
name: Performance Benchmarks
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
alert_threshold:
7+
description: 'Alert threshold percentage (e.g., 150 for 150% degradation)'
8+
required: false
9+
default: '150'
10+
type: string
11+
baseline_tag:
12+
description: 'Baseline git tag (leave empty for latest tag)'
13+
required: false
14+
default: ''
15+
type: string
16+
target_branch:
17+
description: 'Target branch to benchmark (leave empty for current branch)'
18+
required: false
19+
default: ''
20+
type: string
21+
22+
jobs:
23+
benchmark:
24+
runs-on: ubuntu-latest
25+
permissions:
26+
contents: write
27+
pull-requests: write
28+
29+
steps:
30+
- name: Checkout repository
31+
uses: actions/checkout@v4
32+
with:
33+
fetch-depth: 0 # Fetch all history for tag identification
34+
35+
- name: Determine baseline tag
36+
id: baseline
37+
run: |
38+
if [ -n "${{ inputs.baseline_tag }}" ]; then
39+
BASELINE_TAG="${{ inputs.baseline_tag }}"
40+
echo "Using user-specified baseline tag: $BASELINE_TAG"
41+
else
42+
BASELINE_TAG=$(git tag --sort=-creatordate | head -1)
43+
if [ -z "$BASELINE_TAG" ]; then
44+
echo "Error: No git tags found. Please create a tag or specify baseline_tag input."
45+
exit 1
46+
fi
47+
echo "Using latest tag as baseline: $BASELINE_TAG"
48+
fi
49+
echo "tag=$BASELINE_TAG" >> $GITHUB_OUTPUT
50+
51+
# Verify tag exists
52+
if ! git rev-parse "$BASELINE_TAG" >/dev/null 2>&1; then
53+
echo "Error: Tag '$BASELINE_TAG' does not exist"
54+
exit 1
55+
fi
56+
57+
- name: Determine target reference
58+
id: target
59+
run: |
60+
if [ -n "${{ inputs.target_branch }}" ]; then
61+
TARGET_REF="${{ inputs.target_branch }}"
62+
else
63+
TARGET_REF="${{ github.ref_name }}"
64+
fi
65+
echo "ref=$TARGET_REF" >> $GITHUB_OUTPUT
66+
echo "Target reference: $TARGET_REF"
67+
68+
- name: Validate threshold
69+
id: threshold
70+
run: |
71+
THRESHOLD="${{ inputs.alert_threshold }}"
72+
# Validate threshold is a number between 100 and 1000
73+
if ! [[ "$THRESHOLD" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
74+
echo "Error: Threshold must be a number"
75+
exit 1
76+
fi
77+
if (( $(echo "$THRESHOLD < 100" | bc -l) )); then
78+
echo "Error: Threshold must be >= 100"
79+
exit 1
80+
fi
81+
if (( $(echo "$THRESHOLD > 1000" | bc -l) )); then
82+
echo "Error: Threshold must be <= 1000"
83+
exit 1
84+
fi
85+
echo "value=$THRESHOLD" >> $GITHUB_OUTPUT
86+
echo "Using alert threshold: $THRESHOLD%"
87+
88+
- name: Set up Python
89+
uses: actions/setup-python@v5
90+
with:
91+
python-version: '3.11'
92+
93+
- name: Cache polars-bio-bench repository
94+
uses: actions/cache@v4
95+
with:
96+
path: polars-bio-bench
97+
key: polars-bio-bench-${{ hashFiles('**/lockfiles') }}
98+
99+
- name: Clone polars-bio-bench repository
100+
run: |
101+
if [ ! -d "polars-bio-bench" ]; then
102+
git clone https://github.com/biodatageeks/polars-bio-bench.git
103+
else
104+
cd polars-bio-bench && git pull && cd ..
105+
fi
106+
107+
- name: Install Poetry
108+
run: |
109+
curl -sSL https://install.python-poetry.org | python3 -
110+
111+
- name: Install polars-bio-bench dependencies
112+
run: |
113+
cd polars-bio-bench
114+
poetry install
115+
116+
# ============================================
117+
# BASELINE BENCHMARK
118+
# ============================================
119+
120+
- name: Checkout baseline code
121+
run: |
122+
git checkout ${{ steps.baseline.outputs.tag }}
123+
124+
- name: Install baseline polars-bio
125+
run: |
126+
pip install -e .
127+
128+
- name: Set up benchmark environment
129+
run: |
130+
export BENCH_DATA_ROOT=/tmp/polars-bio-bench/
131+
export POLARS_MAX_THREADS=1
132+
mkdir -p $BENCH_DATA_ROOT
133+
134+
- name: Run baseline benchmarks
135+
run: |
136+
cd polars-bio-bench
137+
export BENCH_DATA_ROOT=/tmp/polars-bio-bench/
138+
export POLARS_MAX_THREADS=1
139+
# Run benchmarks using poetry
140+
poetry run python src/run-benchmarks.py --bench-config conf/benchmark_small.yaml
141+
# Copy results to parent directory with baseline prefix
142+
mkdir -p ../baseline_results
143+
cp -r results/* ../baseline_results/ 2>/dev/null || true
144+
continue-on-error: true
145+
146+
- name: Verify baseline results
147+
run: |
148+
if [ ! -d baseline_results ] || [ -z "$(ls -A baseline_results)" ]; then
149+
echo "Error: Baseline benchmark did not produce results"
150+
exit 1
151+
fi
152+
echo "Baseline results:"
153+
ls -la baseline_results/
154+
find baseline_results/ -name "*.csv" | head -5 | xargs -I {} sh -c 'echo "=== {} ===" && head -10 {}'
155+
156+
# ============================================
157+
# TARGET/PR BENCHMARK
158+
# ============================================
159+
160+
- name: Checkout target code
161+
run: |
162+
git checkout ${{ steps.target.outputs.ref }}
163+
164+
- name: Clean previous installation
165+
run: |
166+
pip uninstall -y polars-bio || true
167+
168+
- name: Install target polars-bio
169+
run: |
170+
pip install -e .
171+
172+
- name: Run target benchmarks
173+
run: |
174+
cd polars-bio-bench
175+
export BENCH_DATA_ROOT=/tmp/polars-bio-bench/
176+
export POLARS_MAX_THREADS=1
177+
# Run benchmarks using poetry
178+
poetry run python src/run-benchmarks.py --bench-config conf/benchmark_small.yaml
179+
# Copy results to parent directory with pr prefix
180+
mkdir -p ../pr_results
181+
cp -r results/* ../pr_results/ 2>/dev/null || true
182+
continue-on-error: true
183+
184+
- name: Verify target results
185+
run: |
186+
if [ ! -d pr_results ] || [ -z "$(ls -A pr_results)" ]; then
187+
echo "Error: Target benchmark did not produce results"
188+
exit 1
189+
fi
190+
echo "Target results:"
191+
ls -la pr_results/
192+
find pr_results/ -name "*.csv" | head -5 | xargs -I {} sh -c 'echo "=== {} ===" && head -10 {}'
193+
194+
# ============================================
195+
# COMPARISON AND REPORTING
196+
# ============================================
197+
198+
- name: Parse and compare benchmark results
199+
id: comparison
200+
run: |
201+
bash benchmarks/compare_benchmark_results.sh \
202+
baseline_results \
203+
pr_results \
204+
${{ steps.threshold.outputs.value }} \
205+
${{ steps.baseline.outputs.tag }} \
206+
${{ steps.target.outputs.ref }}
207+
208+
# Check if regressions were found
209+
REGRESSIONS=$(jq '.total_regressions' comparison_summary.json)
210+
echo "regressions=$REGRESSIONS" >> $GITHUB_OUTPUT
211+
212+
# Set exit status for later use
213+
if [ "$REGRESSIONS" -gt 0 ]; then
214+
echo "regression_detected=true" >> $GITHUB_OUTPUT
215+
else
216+
echo "regression_detected=false" >> $GITHUB_OUTPUT
217+
fi
218+
219+
- name: Upload benchmark results as artifact
220+
uses: actions/upload-artifact@v4
221+
with:
222+
name: benchmark-results
223+
path: |
224+
baseline_results/
225+
pr_results/
226+
comparison_results/
227+
comparison_reports/
228+
comparison_report_combined.md
229+
comparison_summary.json
230+
231+
- name: Combine benchmark results for gh-pages
232+
run: |
233+
# Combine all operation results into single JSON for gh-pages
234+
jq -s 'add' comparison_results/*_results.json > combined_benchmark_results.json 2>/dev/null || echo "[]" > combined_benchmark_results.json
235+
236+
- name: Store benchmark result to gh-pages
237+
uses: benchmark-action/github-action-benchmark@v1
238+
with:
239+
name: polars-bio Performance
240+
tool: 'customSmallerIsBetter'
241+
output-file-path: combined_benchmark_results.json
242+
github-token: ${{ secrets.GITHUB_TOKEN }}
243+
auto-push: true
244+
alert-threshold: ${{ steps.threshold.outputs.value }}%
245+
comment-on-alert: true
246+
fail-on-alert: false
247+
gh-pages-branch: gh-pages
248+
benchmark-data-dir-path: dev/bench
249+
continue-on-error: true
250+
251+
- name: Comment PR with comparison results
252+
if: github.event_name == 'pull_request' || github.event.pull_request
253+
uses: actions/github-script@v7
254+
with:
255+
github-token: ${{ secrets.GITHUB_TOKEN }}
256+
script: |
257+
const fs = require('fs');
258+
const report = fs.readFileSync('comparison_report_combined.md', 'utf8');
259+
260+
// Find existing comment
261+
const { data: comments } = await github.rest.issues.listComments({
262+
owner: context.repo.owner,
263+
repo: context.repo.repo,
264+
issue_number: context.issue.number
265+
});
266+
267+
const botComment = comments.find(comment =>
268+
comment.user.type === 'Bot' &&
269+
comment.body.includes('Benchmark Comparison')
270+
);
271+
272+
const commentBody = `${report}\n\n---\n*Benchmark comparison generated by polars-bio CI*`;
273+
274+
if (botComment) {
275+
await github.rest.issues.updateComment({
276+
owner: context.repo.owner,
277+
repo: context.repo.repo,
278+
comment_id: botComment.id,
279+
body: commentBody
280+
});
281+
} else {
282+
await github.rest.issues.createComment({
283+
owner: context.repo.owner,
284+
repo: context.repo.repo,
285+
issue_number: context.issue.number,
286+
body: commentBody
287+
});
288+
}
289+
continue-on-error: true
290+
291+
- name: Post comparison as workflow summary
292+
run: |
293+
cat comparison_report_combined.md >> $GITHUB_STEP_SUMMARY
294+
295+
- name: Fail if regressions detected
296+
if: steps.comparison.outputs.regression_detected == 'true'
297+
run: |
298+
echo "::warning::Performance regressions detected! Check the comparison report for details."
299+
# Don't fail the workflow, just warn
300+
# Uncomment the line below to fail on regressions:
301+
# exit 1

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ It provides a DataFrame API for genomics data and is designed to be blazing fast
3333
## Performance benchmarks
3434
![summary-results.png](docs/assets/summary-results.png)
3535

36+
For developers: See [`benchmarks/README_BENCHMARKS.md`](benchmarks/README_BENCHMARKS.md) for information about running performance benchmarks via GitHub Actions.
37+
3638

3739
## Citing
3840

0 commit comments

Comments
 (0)