Skip to content

Commit 232d231

Browse files
authored
feat: verify duckdb tpch results (#3365)
Signed-off-by: Alexander Droste <[email protected]>
1 parent 76e1e2d commit 232d231

File tree

28 files changed

+20188
-7
lines changed

28 files changed

+20188
-7
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ rustc-hash = "2"
152152
serde = "1.0.203"
153153
serde_json = "1.0.116"
154154
serde_test = "1.0.176"
155+
similar = "2.7.0"
155156
simplelog = "0.12"
156157
sketches-ddsketch = "0.3.0"
157158
static_assertions = "1.1"

bench-vortex/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ regex = { workspace = true }
5151
reqwest = { workspace = true }
5252
serde = { workspace = true }
5353
serde_json = { workspace = true }
54+
similar = { workspace = true }
5455
tabled = { workspace = true, features = ["std"] }
5556
tar = { workspace = true }
5657
tempfile = { workspace = true }

bench-vortex/src/bin/tpcds.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ async fn bench_main(
181181
.path()
182182
.join(format!("duckdb-file-{}.db", format.name()));
183183

184-
let executor = DuckDBExecutor::new(duckdb_resolved_path.to_owned(), duckdb_file);
184+
let executor = DuckDBExecutor::new(duckdb_resolved_path, duckdb_file);
185185
register_tables(&executor, &url, format, BenchmarkDataset::TpcDS)?;
186186

187187
for (query_idx, sql_query) in tpch_queries.clone() {

bench-vortex/src/bin/tpch.rs

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::path::PathBuf;
22
use std::sync::Arc;
33
use std::time::{Duration, Instant};
4+
use std::{env, fs};
45

56
use anyhow::anyhow;
67
use bench_vortex::ddb::{DuckDBExecutor, register_tables};
@@ -25,6 +26,7 @@ use datafusion::physical_plan::metrics::{Label, MetricsSet};
2526
use indicatif::ProgressBar;
2627
use itertools::Itertools;
2728
use log::{info, warn};
29+
use similar::{ChangeTag, TextDiff};
2830
use tempfile::tempdir;
2931
use url::Url;
3032
use vortex::aliases::hash_map::HashMap;
@@ -455,10 +457,78 @@ async fn bench_main(
455457
}
456458

457459
if verify_row_counts(&row_counts, expected_row_counts, &queries, &exclude_queries) {
458-
Err(anyhow!("Mismatched row counts. See logs for details."))
459-
} else {
460-
anyhow::Ok(())
460+
return Err(anyhow!("Mismatched row counts. See logs for details."));
461+
}
462+
463+
if targets.iter().any(|t| t.engine() == Engine::DuckDB) {
464+
verify_duckdb_tpch_results(scale_factor, duckdb_resolved_path)?;
465+
}
466+
467+
anyhow::Ok(())
468+
}
469+
470+
fn verify_duckdb_tpch_results(scale_factor: u8, duckdb_path: PathBuf) -> anyhow::Result<()> {
471+
let query_dir = PathBuf::from("duckdb-vortex/duckdb/extension/tpch/dbgen/queries");
472+
let tmp_dir = format!(
473+
"{}/spiral-tpch",
474+
// $RUNNER_TEMP is defined by GitHub Actions.
475+
env::var("TMPDIR").unwrap_or(env::var("RUNNER_TEMP")?)
476+
);
477+
if PathBuf::from(&tmp_dir).exists() {
478+
fs::remove_dir_all(&tmp_dir)?;
479+
}
480+
fs::create_dir(&tmp_dir)?;
481+
let db_path = format!("{tmp_dir}/tpch_results_sf.db");
482+
483+
let executor = DuckDBExecutor::new(duckdb_path, &db_path);
484+
ddb::execute_tpch_query(&[format!("CALL dbgen(sf={})", scale_factor)], &executor)?;
485+
486+
let query_files = fs::read_dir(query_dir)?
487+
.filter_map(Result::ok)
488+
.filter(|entry| entry.path().extension().is_some_and(|ext| ext == "sql"))
489+
.collect::<Vec<_>>();
490+
491+
for query_file in &query_files {
492+
let query_file_path = query_file.path();
493+
let query_name = query_file_path
494+
.file_stem()
495+
.and_then(|stem| stem.to_str())
496+
.ok_or_else(|| anyhow!("Invalid query filename"))?;
497+
498+
let create_table = format!(
499+
"CREATE OR REPLACE TABLE {query_name}_result AS {};",
500+
fs::read_to_string(&query_file_path)?
501+
);
502+
503+
let csv_actual = format!("{tmp_dir}/{query_name}.csv");
504+
let write_csv =
505+
format!("COPY {query_name}_result TO '{csv_actual}' (HEADER, DELIMITER '|');",);
506+
507+
ddb::execute_tpch_query(&[create_table, write_csv], &executor)?;
508+
509+
let csv_expected = format!("bench-vortex/tpch_results/duckdb/{query_name}.csv");
510+
let expected = fs::read_to_string(csv_expected)?;
511+
let actual = fs::read_to_string(csv_actual)?;
512+
513+
if expected != actual {
514+
let diff = TextDiff::from_lines(&expected, &actual);
515+
516+
for change in diff.iter_all_changes() {
517+
let sign = match change.tag() {
518+
ChangeTag::Delete => "-",
519+
ChangeTag::Insert => "+",
520+
ChangeTag::Equal => " ",
521+
};
522+
print!("{}{}", sign, change);
523+
}
524+
525+
return Err(anyhow!(format!(
526+
"query output does not match the reference for {query_name}"
527+
)));
528+
}
461529
}
530+
531+
Ok(())
462532
}
463533

464534
fn validate_args(engines: &[Engine], args: &Args) {

bench-vortex/src/engines/ddb/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ impl DuckDBExecutor {
3030
command
3131
}
3232

33-
pub fn new(duckdb_path: PathBuf, duckdb_file: PathBuf) -> Self {
33+
pub fn new(duckdb_path: impl AsRef<Path>, duckdb_file: impl AsRef<Path>) -> Self {
3434
Self {
35-
duckdb_path,
36-
duckdb_file,
35+
duckdb_path: duckdb_path.as_ref().to_path_buf(),
36+
duckdb_file: duckdb_file.as_ref().to_path_buf(),
3737
}
3838
}
3939
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
l_returnflag|l_linestatus|sum_qty|sum_base_price|sum_disc_price|sum_charge|avg_qty|avg_price|avg_disc|count_order
2+
A|F|37734107.00|56586554400.73|53758257134.8700|55909065222.827692|25.522005853257337|38273.129734621674|0.049985295838397614|1478493
3+
N|F|991417.00|1487504710.38|1413082168.0541|1469649223.194375|25.516471920522985|38284.4677608483|0.0500934266742163|38854
4+
N|O|74476040.00|111701729697.74|106118230307.6056|110367043872.497010|25.50222676958499|38249.11798890827|0.04999658605370408|2920374
5+
R|F|37719753.00|56568041380.90|53741292684.6040|55889619119.831932|25.50579361269077|38250.85462609966|0.05000940583012706|1478870

0 commit comments

Comments
 (0)