@@ -33,8 +33,8 @@ def welch_ttest(mean1: float, std1: float, n1: int, mean2: float, std2: float, n
3333 return 0.0 , 1.0
3434
3535 # Standard error of difference
36- se1 = (std1 ** 2 ) / n1 if n1 > 0 else 0
37- se2 = (std2 ** 2 ) / n2 if n2 > 0 else 0
36+ se1 = (std1 ** 2 ) / n1 if n1 > 0 else 0
37+ se2 = (std2 ** 2 ) / n2 if n2 > 0 else 0
3838 se_diff = math .sqrt (se1 + se2 )
3939
4040 if se_diff == 0 :
@@ -48,8 +48,8 @@ def welch_ttest(mean1: float, std1: float, n1: int, mean2: float, std2: float, n
4848 df = 1
4949 else :
5050 num = (se1 + se2 ) ** 2
51- denom = (se1 ** 2 ) / (n1 - 1 ) if n1 > 1 else 0
52- denom += (se2 ** 2 ) / (n2 - 1 ) if n2 > 1 else 0
51+ denom = (se1 ** 2 ) / (n1 - 1 ) if n1 > 1 else 0
52+ denom += (se2 ** 2 ) / (n2 - 1 ) if n2 > 1 else 0
5353 df = num / denom if denom > 0 else 1
5454
5555 # Approximate p-value using normal distribution for large df
@@ -63,7 +63,7 @@ def welch_ttest(mean1: float, std1: float, n1: int, mean2: float, std2: float, n
6363 else :
6464 # For smaller df, use a conservative estimate
6565 # This is less accurate but avoids scipy dependency
66- z = abs (t_stat ) * math .sqrt (df / (df + t_stat ** 2 ))
66+ z = abs (t_stat ) * math .sqrt (df / (df + t_stat ** 2 ))
6767 p_value = 2 * (1 - 0.5 * (1 + math .erf (z / math .sqrt (2 ))))
6868
6969 return t_stat , p_value
@@ -72,6 +72,7 @@ def welch_ttest(mean1: float, std1: float, n1: int, mean2: float, std2: float, n
7272@dataclass
7373class ComparisonResult :
7474 """Result of comparing two benchmark results."""
75+
7576 test_name : str
7677 params : Dict
7778 baseline_mean : float
@@ -179,25 +180,27 @@ def compare_results(
179180 # 2. AND the percentage change exceeds threshold (practically significant)
180181 is_significant = p_value < alpha and abs (diff_percent ) >= threshold_percent
181182
182- results .append (ComparisonResult (
183- test_name = base ["name" ],
184- params = base ["params" ],
185- baseline_mean = base_mean ,
186- comparison_mean = comp_mean ,
187- baseline_trimmed_mean = base_trimmed ,
188- comparison_trimmed_mean = comp_trimmed ,
189- baseline_std = base_std ,
190- comparison_std = comp_std ,
191- baseline_runs = base_runs ,
192- comparison_runs = comp_runs ,
193- diff_seconds = diff_seconds ,
194- diff_percent = diff_percent ,
195- is_faster = diff_seconds < 0 ,
196- is_significant = is_significant ,
197- p_value = p_value ,
198- baseline_cv = base_cv ,
199- comparison_cv = comp_cv ,
200- ))
183+ results .append (
184+ ComparisonResult (
185+ test_name = base ["name" ],
186+ params = base ["params" ],
187+ baseline_mean = base_mean ,
188+ comparison_mean = comp_mean ,
189+ baseline_trimmed_mean = base_trimmed ,
190+ comparison_trimmed_mean = comp_trimmed ,
191+ baseline_std = base_std ,
192+ comparison_std = comp_std ,
193+ baseline_runs = base_runs ,
194+ comparison_runs = comp_runs ,
195+ diff_seconds = diff_seconds ,
196+ diff_percent = diff_percent ,
197+ is_faster = diff_seconds < 0 ,
198+ is_significant = is_significant ,
199+ p_value = p_value ,
200+ baseline_cv = base_cv ,
201+ comparison_cv = comp_cv ,
202+ )
203+ )
201204
202205 return results
203206
@@ -296,10 +299,7 @@ def format_text(
296299
297300
298301def format_markdown (
299- results : List [ComparisonResult ],
300- baseline_info : Dict ,
301- comparison_info : Dict ,
302- threshold : float
302+ results : List [ComparisonResult ], baseline_info : Dict , comparison_info : Dict , threshold : float
303303) -> str :
304304 """Format comparison results as markdown."""
305305 lines = []
@@ -311,9 +311,15 @@ def format_markdown(
311311 lines .append ("" )
312312 lines .append ("| | Baseline | Comparison |" )
313313 lines .append ("|---|---|---|" )
314- lines .append (f"| Timestamp | { baseline_info .get ('timestamp' , 'unknown' )} | { comparison_info .get ('timestamp' , 'unknown' )} |" )
315- lines .append (f"| Git commit | `{ baseline_info .get ('git_commit' , 'unknown' )[:8 ]} ` | `{ comparison_info .get ('git_commit' , 'unknown' )[:8 ]} ` |" )
316- lines .append (f"| PyEMD version | { baseline_info .get ('pyemd_version' , 'unknown' )} | { comparison_info .get ('pyemd_version' , 'unknown' )} |" )
314+ lines .append (
315+ f"| Timestamp | { baseline_info .get ('timestamp' , 'unknown' )} | { comparison_info .get ('timestamp' , 'unknown' )} |"
316+ )
317+ lines .append (
318+ f"| Git commit | `{ baseline_info .get ('git_commit' , 'unknown' )[:8 ]} ` | `{ comparison_info .get ('git_commit' , 'unknown' )[:8 ]} ` |"
319+ )
320+ lines .append (
321+ f"| PyEMD version | { baseline_info .get ('pyemd_version' , 'unknown' )} | { comparison_info .get ('pyemd_version' , 'unknown' )} |"
322+ )
317323 lines .append ("" )
318324
319325 # Summary
@@ -351,12 +357,7 @@ def format_markdown(
351357 return "\n " .join (lines )
352358
353359
354- def format_json (
355- results : List [ComparisonResult ],
356- baseline_info : Dict ,
357- comparison_info : Dict ,
358- threshold : float
359- ) -> str :
360+ def format_json (results : List [ComparisonResult ], baseline_info : Dict , comparison_info : Dict , threshold : float ) -> str :
360361 """Format comparison results as JSON."""
361362 data = {
362363 "baseline_info" : baseline_info ,
@@ -400,35 +401,18 @@ def main():
400401 python compare_results.py baseline.json comparison.json --threshold 10
401402 python compare_results.py old/ new/ --format markdown > comparison.md
402403 python compare_results.py old/ new/ --alpha 0.01 # stricter significance
403- """
404- )
405- parser .add_argument (
406- "baseline" ,
407- type = Path ,
408- help = "Path to baseline results (directory or JSON file)"
409- )
410- parser .add_argument (
411- "comparison" ,
412- type = Path ,
413- help = "Path to comparison results (directory or JSON file)"
404+ """ ,
414405 )
406+ parser .add_argument ("baseline" , type = Path , help = "Path to baseline results (directory or JSON file)" )
407+ parser .add_argument ("comparison" , type = Path , help = "Path to comparison results (directory or JSON file)" )
415408 parser .add_argument (
416- "--threshold" ,
417- type = float ,
418- default = 5.0 ,
419- help = "Minimum percentage change to consider significant (default: 5)"
409+ "--threshold" , type = float , default = 5.0 , help = "Minimum percentage change to consider significant (default: 5)"
420410 )
421411 parser .add_argument (
422- "--alpha" ,
423- type = float ,
424- default = 0.05 ,
425- help = "Significance level for t-test (default: 0.05 = 95%% confidence)"
412+ "--alpha" , type = float , default = 0.05 , help = "Significance level for t-test (default: 0.05 = 95%% confidence)"
426413 )
427414 parser .add_argument (
428- "--format" ,
429- choices = ["text" , "json" , "markdown" ],
430- default = "text" ,
431- help = "Output format (default: text)"
415+ "--format" , choices = ["text" , "json" , "markdown" ], default = "text" , help = "Output format (default: text)"
432416 )
433417
434418 args = parser .parse_args ()
0 commit comments