Skip to content

Commit bf06cdd

Browse files
mwiewiorclaude
andcommitted
Add missing parse_benchmark_results.py script
The compare_benchmark_results.sh script was calling this file but it didn't exist. This script: - Parses baseline and PR CSV benchmark results - Compares polars_bio performance against threshold - Generates JSON and markdown reports - Detects performance regressions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 9dd8de6 commit bf06cdd

File tree

1 file changed

+240
-0
lines changed

1 file changed

+240
-0
lines changed
Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
#!/usr/bin/env python3
2+
"""Parse and compare benchmark CSV results between baseline and PR."""
3+
4+
import argparse
5+
import csv
6+
import json
7+
import sys
8+
from pathlib import Path
9+
from typing import Dict, List, Tuple
10+
11+
12+
def parse_csv(csv_path: Path) -> Dict[str, float]:
13+
"""Parse benchmark CSV and extract polars_bio mean time.
14+
15+
Expected format:
16+
Library,Min (s),Max (s),Mean (s),Speedup
17+
polars_bio,0.051166,0.066682,0.056735,1.00x
18+
...
19+
"""
20+
results = {}
21+
with open(csv_path) as f:
22+
reader = csv.DictReader(f)
23+
for row in reader:
24+
library = row["Library"]
25+
mean_time = float(row["Mean (s)"])
26+
results[library] = mean_time
27+
return results
28+
29+
30+
def compare_results(
31+
baseline: Dict[str, float],
32+
pr: Dict[str, float],
33+
threshold: float,
34+
) -> Tuple[Dict, List[str]]:
35+
"""Compare baseline and PR results, detecting regressions.
36+
37+
Returns:
38+
(summary_dict, regressions_list)
39+
"""
40+
summary = {
41+
"baseline": {},
42+
"pr": {},
43+
"changes": {},
44+
"regressions": 0,
45+
}
46+
47+
regressions = []
48+
49+
for library in baseline:
50+
if library not in pr:
51+
continue
52+
53+
baseline_time = baseline[library]
54+
pr_time = pr[library]
55+
56+
# Calculate percentage change
57+
if baseline_time > 0:
58+
change_pct = ((pr_time - baseline_time) / baseline_time) * 100
59+
else:
60+
change_pct = 0
61+
62+
summary["baseline"][library] = baseline_time
63+
summary["pr"][library] = pr_time
64+
summary["changes"][library] = {
65+
"absolute": pr_time - baseline_time,
66+
"percentage": change_pct,
67+
}
68+
69+
# Check for regression (only for polars_bio)
70+
if library == "polars_bio" and change_pct > threshold:
71+
summary["regressions"] += 1
72+
regressions.append(
73+
f"{library}: {baseline_time:.3f}s -> {pr_time:.3f}s ({change_pct:+.1f}%)"
74+
)
75+
76+
return summary, regressions
77+
78+
79+
def generate_report(
80+
operation: str,
81+
summary: Dict,
82+
regressions: List[str],
83+
baseline_tag: str,
84+
pr_ref: str,
85+
threshold: float,
86+
) -> str:
87+
"""Generate markdown report for this operation."""
88+
lines = [
89+
f"## {operation}",
90+
"",
91+
f"**Baseline:** {baseline_tag} ",
92+
f"**PR:** {pr_ref} ",
93+
f"**Threshold:** {threshold}%",
94+
"",
95+
]
96+
97+
if summary["regressions"] > 0:
98+
lines.extend(
99+
[
100+
f"⚠️ **{summary['regressions']} regression(s) detected**",
101+
"",
102+
"### Regressions",
103+
"",
104+
]
105+
)
106+
for reg in regressions:
107+
lines.append(f"- {reg}")
108+
lines.append("")
109+
else:
110+
lines.extend(
111+
[
112+
"✓ **No regressions detected**",
113+
"",
114+
]
115+
)
116+
117+
# Performance comparison table
118+
lines.extend(
119+
[
120+
"### Performance Comparison",
121+
"",
122+
"| Library | Baseline (s) | PR (s) | Change |",
123+
"|---------|--------------|--------|---------|",
124+
]
125+
)
126+
127+
for library in sorted(summary["baseline"].keys()):
128+
baseline_time = summary["baseline"][library]
129+
pr_time = summary["pr"][library]
130+
change = summary["changes"][library]
131+
132+
change_str = f"{change['percentage']:+.1f}%"
133+
if change["percentage"] > 0:
134+
change_str = f"🔴 {change_str}"
135+
elif change["percentage"] < -5: # Improvement > 5%
136+
change_str = f"🟢 {change_str}"
137+
138+
lines.append(
139+
f"| {library} | {baseline_time:.3f} | {pr_time:.3f} | {change_str} |"
140+
)
141+
142+
lines.append("")
143+
return "\n".join(lines)
144+
145+
146+
def main():
147+
parser = argparse.ArgumentParser(description="Compare benchmark CSV results")
148+
parser.add_argument("baseline_csv", type=Path, help="Baseline CSV file")
149+
parser.add_argument("pr_csv", type=Path, help="PR CSV file")
150+
parser.add_argument(
151+
"--threshold",
152+
type=float,
153+
default=150.0,
154+
help="Regression threshold percentage (default: 150)",
155+
)
156+
parser.add_argument(
157+
"--baseline-tag", default="baseline", help="Baseline version tag"
158+
)
159+
parser.add_argument("--pr-ref", default="PR", help="PR reference name")
160+
parser.add_argument(
161+
"--output-json", type=Path, help="Output JSON file for detailed results"
162+
)
163+
parser.add_argument(
164+
"--output-comparison", type=Path, help="Output JSON file for comparison summary"
165+
)
166+
parser.add_argument(
167+
"--output-report", type=Path, help="Output markdown report file"
168+
)
169+
170+
args = parser.parse_args()
171+
172+
# Parse CSVs
173+
try:
174+
baseline = parse_csv(args.baseline_csv)
175+
pr = parse_csv(args.pr_csv)
176+
except Exception as e:
177+
print(f"Error parsing CSV files: {e}", file=sys.stderr)
178+
return 1
179+
180+
# Compare results
181+
operation = args.baseline_csv.stem.split("_")[0]
182+
summary, regressions = compare_results(baseline, pr, args.threshold)
183+
184+
# Generate outputs
185+
if args.output_json:
186+
args.output_json.parent.mkdir(parents=True, exist_ok=True)
187+
with open(args.output_json, "w") as f:
188+
json.dump(
189+
{
190+
"baseline": baseline,
191+
"pr": pr,
192+
},
193+
f,
194+
indent=2,
195+
)
196+
197+
if args.output_comparison:
198+
args.output_comparison.parent.mkdir(parents=True, exist_ok=True)
199+
with open(args.output_comparison, "w") as f:
200+
json.dump(
201+
{
202+
"operation": operation,
203+
"summary": summary,
204+
"regressions": regressions,
205+
},
206+
f,
207+
indent=2,
208+
)
209+
210+
if args.output_report:
211+
args.output_report.parent.mkdir(parents=True, exist_ok=True)
212+
report = generate_report(
213+
operation,
214+
summary,
215+
regressions,
216+
args.baseline_tag,
217+
args.pr_ref,
218+
args.threshold,
219+
)
220+
with open(args.output_report, "w") as f:
221+
f.write(report)
222+
223+
# Print summary
224+
print(
225+
f" polars_bio: {baseline.get('polars_bio', 0):.3f}s -> "
226+
f"{pr.get('polars_bio', 0):.3f}s",
227+
end="",
228+
)
229+
230+
if summary["regressions"] > 0:
231+
print(f" ⚠️ {summary['regressions']} regression(s)")
232+
else:
233+
change = summary["changes"].get("polars_bio", {}).get("percentage", 0)
234+
print(f" ({change:+.1f}%)")
235+
236+
return 0
237+
238+
239+
if __name__ == "__main__":
240+
sys.exit(main())

0 commit comments

Comments
 (0)