@@ -56,13 +56,20 @@ def extract_dataset_key(df):
5656
5757# assert df3["unit_base"].equals(df3["unit_pr"]), (df3["unit_base"], df3["unit_pr"])
5858
59+ # Determine threshold based on benchmark name
60+ # Use 30% threshold for S3 benchmarks, 10% for others
61+ is_s3_benchmark = "s3" in benchmark_name .lower ()
62+ threshold_pct = 30 if is_s3_benchmark else 10
63+ improvement_threshold = 1.0 - (threshold_pct / 100.0 ) # e.g., 0.7 for 30%, 0.9 for 10%
64+ regression_threshold = 1.0 + (threshold_pct / 100.0 ) # e.g., 1.3 for 30%, 1.1 for 10%
65+
5966# Generate summary statistics
6067df3 ["ratio" ] = df3 ["value_pr" ] / df3 ["value_base" ]
6168df3 ["remark" ] = pd .Series (["" ] * len (df3 ))
6269df3 ["remark" ] = df3 ["remark" ].case_when (
6370 [
64- (df3 ["ratio" ] >= 1.3 , "🚨" ),
65- (df3 ["ratio" ] <= 0.7 , "🚀" ),
71+ (df3 ["ratio" ] >= regression_threshold , "🚨" ),
72+ (df3 ["ratio" ] <= improvement_threshold , "🚀" ),
6673 ]
6774)
6875
@@ -115,13 +122,6 @@ def calculate_geo_mean(df):
115122 best_improvement = "No valid vortex comparisons"
116123 worst_regression = "No valid vortex comparisons"
117124
118- # Determine threshold based on benchmark name
119- # Use 30% threshold for S3 benchmarks, 10% for others
120- is_s3_benchmark = "s3" in benchmark_name .lower ()
121- threshold_pct = 30 if is_s3_benchmark else 10
122- improvement_threshold = 1.0 - (threshold_pct / 100.0 ) # e.g., 0.7 for 30%, 0.9 for 10%
123- regression_threshold = 1.0 + (threshold_pct / 100.0 ) # e.g., 1.3 for 30%, 1.1 for 10%
124-
125125# Count significant changes for vortex-only results
126126significant_improvements = (vortex_df ["ratio" ] < improvement_threshold ).sum ()
127127significant_regressions = (vortex_df ["ratio" ] > regression_threshold ).sum ()
0 commit comments