Skip to content

Commit faf6346

Browse files
authored
fix: pr benchmark results (#4141)
To disambiguate different TPC-H runs, include the scale factor in the dataset name. In addition, we filter duplicate entries which can result from nightly runs if the commit is identical to the base commit of the PR to avoid duplicate result rows. Signed-off-by: Alexander Droste <[email protected]>
1 parent 78e085e commit faf6346

File tree

2 files changed

+9
-5
lines changed

2 files changed

+9
-5
lines changed

bench-vortex/src/datasets/mod.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,12 @@ pub enum BenchmarkDataset {
3939
}
4040

4141
impl BenchmarkDataset {
42-
pub fn name(&self) -> &str {
42+
pub fn name(&self) -> String {
4343
match self {
44-
BenchmarkDataset::TpcH { .. } => "tpch",
45-
BenchmarkDataset::TpcDS { .. } => "tpcds",
46-
BenchmarkDataset::ClickBench { .. } => "clickbench",
47-
BenchmarkDataset::PublicBi { .. } => "public-bi",
44+
BenchmarkDataset::TpcH { scale_factor } => format!("tpch_sf{scale_factor}"),
45+
BenchmarkDataset::TpcDS { scale_factor } => format!("tpcds_sf{scale_factor}"),
46+
BenchmarkDataset::ClickBench { .. } => "clickbench".to_string(),
47+
BenchmarkDataset::PublicBi { .. } => "public-bi".to_string(),
4848
}
4949
}
5050
}

scripts/compare-benchmark-jsons.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
base = pd.read_json(sys.argv[1], lines=True)
1717
pr = pd.read_json(sys.argv[2], lines=True)
1818

19+
# Filter duplicate entries which can result from nightly
20+
# runs if the commit is identical to the base commit of the PR.
21+
base = base.drop_duplicates(subset=["name", "storage"], keep="last")
22+
1923
base_commit_id = set(base["commit_id"].unique())
2024
pr_commit_id = set(pr["commit_id"].unique())
2125

0 commit comments

Comments
 (0)