Skip to content

Commit 8d0e5e7

Browse files
committed
chore[ci]: unify the benchmark table analysis thresholds
Signed-off-by: Joe Isaacs <[email protected]>
1 parent 4968607 commit 8d0e5e7

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

scripts/compare-benchmark-jsons.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,20 @@ def extract_dataset_key(df):
5656

5757
# assert df3["unit_base"].equals(df3["unit_pr"]), (df3["unit_base"], df3["unit_pr"])
5858

59+
# Determine threshold based on benchmark name
60+
# Use 30% threshold for S3 benchmarks, 10% for others
61+
is_s3_benchmark = "s3" in benchmark_name.lower()
62+
threshold_pct = 30 if is_s3_benchmark else 10
63+
improvement_threshold = 1.0 - (threshold_pct / 100.0) # e.g., 0.7 for 30%, 0.9 for 10%
64+
regression_threshold = 1.0 + (threshold_pct / 100.0) # e.g., 1.3 for 30%, 1.1 for 10%
65+
5966
# Generate summary statistics
6067
df3["ratio"] = df3["value_pr"] / df3["value_base"]
6168
df3["remark"] = pd.Series([""] * len(df3))
6269
df3["remark"] = df3["remark"].case_when(
6370
[
64-
(df3["ratio"] >= 1.3, "🚨"),
65-
(df3["ratio"] <= 0.7, "🚀"),
71+
(df3["ratio"] >= regression_threshold, "🚨"),
72+
(df3["ratio"] <= improvement_threshold, "🚀"),
6673
]
6774
)
6875

@@ -115,13 +122,6 @@ def calculate_geo_mean(df):
115122
best_improvement = "No valid vortex comparisons"
116123
worst_regression = "No valid vortex comparisons"
117124

118-
# Determine threshold based on benchmark name
119-
# Use 30% threshold for S3 benchmarks, 10% for others
120-
is_s3_benchmark = "s3" in benchmark_name.lower()
121-
threshold_pct = 30 if is_s3_benchmark else 10
122-
improvement_threshold = 1.0 - (threshold_pct / 100.0) # e.g., 0.7 for 30%, 0.9 for 10%
123-
regression_threshold = 1.0 + (threshold_pct / 100.0) # e.g., 1.3 for 30%, 1.1 for 10%
124-
125125
# Count significant changes for vortex-only results
126126
significant_improvements = (vortex_df["ratio"] < improvement_threshold).sum()
127127
significant_regressions = (vortex_df["ratio"] > regression_threshold).sum()

0 commit comments

Comments
 (0)