1+ #!/usr/bin/env python3 
2+ """ 
3+ Compare two ClickBench benchmark runs and show differences. 
4+ 
5+ Usage: 
6+     python compare_runs.py baseline.txt comparison.txt [--threshold 10] 
7+      
8+ Example: 
9+ 10+     python compare_runs.py old_run.txt new_run.txt --threshold 5 
11+ """ 
12+ 
13+ import  argparse 
14+ import  re 
15+ import  sys 
16+ from  pathlib  import  Path 
17+ 
18+ 
19+ def  parse_benchmark (filename ):
20+     """Parse a benchmark file and extract version and timing data.""" 
21+     try :
22+         with  open (filename , 'r' , encoding = 'utf-8' ) as  f :
23+             content  =  f .read ()
24+     except  FileNotFoundError :
25+         print (f"Error: File '{ filename }  , file = sys .stderr )
26+         sys .exit (1 )
27+     
28+     # Extract version - try multiple patterns 
29+     version  =  None 
30+     for  pattern  in  [
31+         r'0\.\d+\.\d+-beta\.\d+' ,  # 0.25.0-beta.1444 
32+         r'v?\d+\.\d+\.\d+' ,          # v1.0.0 or 1.0.0 
33+         r'\d+\.\d+\.\d+b\d+' ,        # 0.25.0b1444 
34+     ]:
35+         version_match  =  re .search (pattern , content )
36+         if  version_match :
37+             version  =  version_match .group (0 )
38+             break 
39+     
40+     if  not  version :
41+         # Use filename as fallback 
42+         version  =  Path (filename ).stem 
43+     
44+     # Extract timing arrays 
45+     times  =  []
46+     for  line  in  content .split ('\n ' ):
47+         match  =  re .match (r'\[([0-9.,null]+)\]' , line )
48+         if  match :
49+             values  =  match .group (1 ).split (',' )
50+             # Take the median (3rd value, index 2) or first if less values 
51+             try :
52+                 if  'null'  in  values :
53+                     times .append (None )
54+                 else :
55+                     times .append (float (values [2 ]) if  len (values ) >=  3  else  float (values [0 ]))
56+             except  (ValueError , IndexError ):
57+                 times .append (None )
58+     
59+     return  version , times 
60+ 
61+ 
62+ def  main ():
63+     parser  =  argparse .ArgumentParser (
64+         description = 'Compare two ClickBench benchmark runs' ,
65+         formatter_class = argparse .RawDescriptionHelpFormatter ,
66+         epilog = __doc__ 
67+     )
68+     parser .add_argument ('baseline' , help = 'Baseline benchmark file' )
69+     parser .add_argument ('comparison' , help = 'Comparison benchmark file' )
70+     parser .add_argument ('--threshold' , '-t' , type = float , default = 10.0 ,
71+                        help = 'Percentage threshold for flagging regressions/improvements (default: 10%%)' )
72+     parser .add_argument ('--top' , '-n' , type = int , default = 10 ,
73+                        help = 'Number of top regressions/improvements to show (default: 10)' )
74+     
75+     args  =  parser .parse_args ()
76+     
77+     baseline_version , baseline_times  =  parse_benchmark (args .baseline )
78+     comparison_version , comparison_times  =  parse_benchmark (args .comparison )
79+     
80+     if  not  baseline_times  or  not  comparison_times :
81+         print ("Error: Could not parse timing data from one or both files" , file = sys .stderr )
82+         sys .exit (1 )
83+     
84+     print (f'Comparing { baseline_version } { comparison_version }  )
85+     print (f'Found { len (baseline_times )} { len (comparison_times )}  )
86+     print ()
87+     print ('Query # | Baseline | Compare | Delta   | Change' )
88+     print ('--------|----------|---------|---------|--------' )
89+     
90+     regressions  =  []
91+     improvements  =  []
92+     
93+     for  i , (t_base , t_comp ) in  enumerate (zip (baseline_times , comparison_times ), 1 ):
94+         if  t_base  is  None  or  t_comp  is  None :
95+             delta_str  =  'N/A' 
96+             change_str  =  'N/A' 
97+         else :
98+             delta  =  t_comp  -  t_base 
99+             pct_change  =  ((t_comp  /  t_base ) -  1 ) *  100  if  t_base  >  0  else  0 
100+             delta_str  =  f'{ delta :+.2f}  
101+             change_str  =  f'{ pct_change :+.1f}  
102+             
103+             if  pct_change  >  args .threshold :
104+                 regressions .append ((i , t_base , t_comp , pct_change ))
105+             elif  pct_change  <  - args .threshold :
106+                 improvements .append ((i , t_base , t_comp , pct_change ))
107+         
108+         print (f'{ i :7} { t_base  if  t_base  else  "null" :8} { t_comp  if  t_comp  else  "null" :7} { delta_str :7} { change_str :>7}  )
109+     
110+     print ()
111+     print (f'Total queries: { len (baseline_times )}  )
112+     print (f'Regressions (>{ args .threshold } { len (regressions )}  )
113+     print (f'Improvements (>{ args .threshold } { len (improvements )}  )
114+     print ()
115+     
116+     if  regressions :
117+         print (f'Top { min (args .top , len (regressions ))}  )
118+         regressions .sort (key = lambda  x : x [3 ], reverse = True )
119+         for  q , t_base , t_comp , pct  in  regressions [:args .top ]:
120+             print (f'  Query { q :2} { t_base :.2f} { t_comp :.2f} { pct :+.1f}  )
121+     
122+     if  improvements :
123+         print ()
124+         print (f'Top { min (args .top , len (improvements ))}  )
125+         improvements .sort (key = lambda  x : x [3 ])
126+         for  q , t_base , t_comp , pct  in  improvements [:args .top ]:
127+             print (f'  Query { q :2} { t_base :.2f} { t_comp :.2f} { pct :+.1f}  )
128+ 
129+ 
130+ if  __name__  ==  '__main__' :
131+     main ()
0 commit comments