1+ #!/usr/bin/env python3
2+ """
3+ Compare two ClickBench benchmark runs and show differences.
4+
5+ Usage:
6+ python compare_runs.py baseline.txt comparison.txt [--threshold 10]
7+
8+ Example:
9+ 10+ python compare_runs.py old_run.txt new_run.txt --threshold 5
11+ """
12+
13+ import argparse
14+ import re
15+ import sys
16+ from pathlib import Path
17+
18+
19+ def parse_benchmark (filename ):
20+ """Parse a benchmark file and extract version and timing data."""
21+ try :
22+ with open (filename , 'r' , encoding = 'utf-8' ) as f :
23+ content = f .read ()
24+ except FileNotFoundError :
25+ print (f"Error: File '{ filename } ' not found" , file = sys .stderr )
26+ sys .exit (1 )
27+
28+ # Extract version - try multiple patterns
29+ version = None
30+ for pattern in [
31+ r'0\.\d+\.\d+-beta\.\d+' , # 0.25.0-beta.1444
32+ r'v?\d+\.\d+\.\d+' , # v1.0.0 or 1.0.0
33+ r'\d+\.\d+\.\d+b\d+' , # 0.25.0b1444
34+ ]:
35+ version_match = re .search (pattern , content )
36+ if version_match :
37+ version = version_match .group (0 )
38+ break
39+
40+ if not version :
41+ # Use filename as fallback
42+ version = Path (filename ).stem
43+
44+ # Extract timing arrays
45+ times = []
46+ for line in content .split ('\n ' ):
47+ match = re .match (r'\[([0-9.,null]+)\]' , line )
48+ if match :
49+ values = match .group (1 ).split (',' )
50+ # Take the median (3rd value, index 2) or first if less values
51+ try :
52+ if 'null' in values :
53+ times .append (None )
54+ else :
55+ times .append (float (values [2 ]) if len (values ) >= 3 else float (values [0 ]))
56+ except (ValueError , IndexError ):
57+ times .append (None )
58+
59+ return version , times
60+
61+
62+ def main ():
63+ parser = argparse .ArgumentParser (
64+ description = 'Compare two ClickBench benchmark runs' ,
65+ formatter_class = argparse .RawDescriptionHelpFormatter ,
66+ epilog = __doc__
67+ )
68+ parser .add_argument ('baseline' , help = 'Baseline benchmark file' )
69+ parser .add_argument ('comparison' , help = 'Comparison benchmark file' )
70+ parser .add_argument ('--threshold' , '-t' , type = float , default = 10.0 ,
71+ help = 'Percentage threshold for flagging regressions/improvements (default: 10%%)' )
72+ parser .add_argument ('--top' , '-n' , type = int , default = 10 ,
73+ help = 'Number of top regressions/improvements to show (default: 10)' )
74+
75+ args = parser .parse_args ()
76+
77+ baseline_version , baseline_times = parse_benchmark (args .baseline )
78+ comparison_version , comparison_times = parse_benchmark (args .comparison )
79+
80+ if not baseline_times or not comparison_times :
81+ print ("Error: Could not parse timing data from one or both files" , file = sys .stderr )
82+ sys .exit (1 )
83+
84+ print (f'Comparing { baseline_version } (baseline) vs { comparison_version } (comparison)' )
85+ print (f'Found { len (baseline_times )} queries in baseline and { len (comparison_times )} queries in comparison' )
86+ print ()
87+ print ('Query # | Baseline | Compare | Delta | Change' )
88+ print ('--------|----------|---------|---------|--------' )
89+
90+ regressions = []
91+ improvements = []
92+
93+ for i , (t_base , t_comp ) in enumerate (zip (baseline_times , comparison_times ), 1 ):
94+ if t_base is None or t_comp is None :
95+ delta_str = 'N/A'
96+ change_str = 'N/A'
97+ else :
98+ delta = t_comp - t_base
99+ pct_change = ((t_comp / t_base ) - 1 ) * 100 if t_base > 0 else 0
100+ delta_str = f'{ delta :+.2f} s'
101+ change_str = f'{ pct_change :+.1f} %'
102+
103+ if pct_change > args .threshold :
104+ regressions .append ((i , t_base , t_comp , pct_change ))
105+ elif pct_change < - args .threshold :
106+ improvements .append ((i , t_base , t_comp , pct_change ))
107+
108+ print (f'{ i :7} | { t_base if t_base else "null" :8} | { t_comp if t_comp else "null" :7} | { delta_str :7} | { change_str :>7} ' )
109+
110+ print ()
111+ print (f'Total queries: { len (baseline_times )} ' )
112+ print (f'Regressions (>{ args .threshold } % slower): { len (regressions )} ' )
113+ print (f'Improvements (>{ args .threshold } % faster): { len (improvements )} ' )
114+ print ()
115+
116+ if regressions :
117+ print (f'Top { min (args .top , len (regressions ))} Regressions:' )
118+ regressions .sort (key = lambda x : x [3 ], reverse = True )
119+ for q , t_base , t_comp , pct in regressions [:args .top ]:
120+ print (f' Query { q :2} : { t_base :.2f} s → { t_comp :.2f} s ({ pct :+.1f} %)' )
121+
122+ if improvements :
123+ print ()
124+ print (f'Top { min (args .top , len (improvements ))} Improvements:' )
125+ improvements .sort (key = lambda x : x [3 ])
126+ for q , t_base , t_comp , pct in improvements [:args .top ]:
127+ print (f' Query { q :2} : { t_base :.2f} s → { t_comp :.2f} s ({ pct :+.1f} %)' )
128+
129+
130+ if __name__ == '__main__' :
131+ main ()
0 commit comments