Skip to content

Commit 9671c53

Browse files
committed
fixup
Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent 9564f43 commit 9671c53

File tree

10 files changed

+34
-241
lines changed

10 files changed

+34
-241
lines changed

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

benchmarks/datafusion-bench/src/main.rs

Lines changed: 10 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ use std::time::Instant;
88
use clap::Parser;
99
use clap::value_parser;
1010
use custom_labels::asynchronous::Label;
11-
use datafusion::arrow::array::Array;
1211
use datafusion::arrow::array::RecordBatch;
1312
use datafusion::arrow::util::display::ArrayFormatter;
1413
use datafusion::arrow::util::display::FormatOptions;
@@ -420,23 +419,19 @@ impl BenchmarkQueryResult for DataFusionQueryResult {
420419
.unwrap_or_else(|e| format!("<error: {e}>"))
421420
}
422421

423-
fn normalized_result(&self) -> (Vec<String>, Vec<Vec<String>>) {
424-
normalize_record_batches(&self.0)
422+
fn result_rows(&self) -> (Vec<String>, Vec<Vec<String>>) {
423+
extract_record_batch_rows(&self.0)
425424
}
426425
}
427426

428-
/// Convert Arrow `RecordBatch`es into normalized column names and row values.
427+
/// Extract raw string values from Arrow `RecordBatch`es.
429428
///
430-
/// Uses [`vortex_bench::validation`] normalization for floats and strings to
431-
/// match the sqllogictest conventions used by DuckDB's result normalization.
432-
fn normalize_record_batches(batches: &[RecordBatch]) -> (Vec<String>, Vec<Vec<String>>) {
433-
use datafusion::arrow::datatypes::DataType;
429+
/// Uses `ArrayFormatter` to produce `to_string()` values for every cell.
430+
/// NULL cells are represented as `"NULL"`. No type-specific normalization is
431+
/// applied — each engine's per-engine `.slt.no` reference files contain the
432+
/// exact expected output.
433+
fn extract_record_batch_rows(batches: &[RecordBatch]) -> (Vec<String>, Vec<Vec<String>>) {
434434
use vortex::error::VortexExpect;
435-
use vortex_bench::validation::normalize_decimal;
436-
use vortex_bench::validation::normalize_f32;
437-
use vortex_bench::validation::normalize_f64;
438-
use vortex_bench::validation::normalize_string;
439-
use vortex_bench::validation::normalize_timestamp;
440435

441436
let column_names = batches
442437
.first()
@@ -462,48 +457,8 @@ fn normalize_record_batches(batches: &[RecordBatch]) -> (Vec<String>, Vec<Vec<St
462457

463458
for row_idx in 0..batch.num_rows() {
464459
let mut row = Vec::with_capacity(batch.num_columns());
465-
for (col_idx, formatter) in formatters.iter().enumerate() {
466-
let col = batch.column(col_idx);
467-
if col.is_null(row_idx) {
468-
row.push("NULL".to_string());
469-
} else {
470-
let dt = col.data_type();
471-
let cell = match dt {
472-
DataType::Float32 => {
473-
let arr = col
474-
.as_any()
475-
.downcast_ref::<datafusion::arrow::array::Float32Array>()
476-
.vortex_expect("Float32 downcast");
477-
normalize_f32(arr.value(row_idx))
478-
}
479-
DataType::Float64 => {
480-
let arr = col
481-
.as_any()
482-
.downcast_ref::<datafusion::arrow::array::Float64Array>()
483-
.vortex_expect("Float64 downcast");
484-
normalize_f64(arr.value(row_idx))
485-
}
486-
DataType::Decimal128(_, scale) => {
487-
let arr = col
488-
.as_any()
489-
.downcast_ref::<datafusion::arrow::array::Decimal128Array>()
490-
.vortex_expect("Decimal128 downcast");
491-
normalize_decimal(arr.value(row_idx), *scale)
492-
}
493-
DataType::Utf8
494-
| DataType::LargeUtf8
495-
| DataType::Utf8View
496-
| DataType::Dictionary(..) => {
497-
let s = formatter.value(row_idx).to_string();
498-
normalize_string(&s)
499-
}
500-
DataType::Timestamp(..) | DataType::Date32 | DataType::Date64 => {
501-
normalize_timestamp(&formatter.value(row_idx).to_string())
502-
}
503-
_ => formatter.value(row_idx).to_string(),
504-
};
505-
row.push(cell);
506-
}
460+
for formatter in &formatters {
461+
row.push(formatter.value(row_idx).to_string());
507462
}
508463
rows.push(row);
509464
}

benchmarks/duckdb-bench/src/lib.rs

Lines changed: 3 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,10 @@ use vortex_bench::Format;
1616
use vortex_bench::IdempotentPath;
1717
use vortex_bench::generate_duckdb_registration_sql;
1818
use vortex_bench::runner::BenchmarkQueryResult;
19-
use vortex_bench::validation;
2019
use vortex_duckdb::duckdb::Config;
2120
use vortex_duckdb::duckdb::Connection;
2221
use vortex_duckdb::duckdb::Database;
23-
use vortex_duckdb::duckdb::ExtractedValue;
2422
use vortex_duckdb::duckdb::QueryResult;
25-
use vortex_duckdb::duckdb::Value;
2623

2724
/// DuckDB context for benchmarks.
2825
pub struct DuckClient {
@@ -209,7 +206,7 @@ impl DuckClient {
209206
/// Eagerly materialized wrapper around DuckDB query results.
210207
///
211208
/// Materializes the result on construction so that both `row_count()`,
212-
/// `display()`, and `normalized_result()` can be called via shared reference.
209+
/// `display()`, and `result_rows()` can be called via shared reference.
213210
pub struct DuckQueryResult {
214211
row_count: usize,
215212
display_string: String,
@@ -246,7 +243,7 @@ impl DuckQueryResult {
246243
for col_idx in 0..chunk.column_count() {
247244
let vector = chunk.get_vector(col_idx);
248245
let cell = match vector.get_value(row_idx, chunk.len()) {
249-
Some(value) => normalize_duckdb_value(&value),
246+
Some(value) => value.to_string(),
250247
None => "NULL".to_string(),
251248
};
252249
row.push(cell);
@@ -273,43 +270,7 @@ impl BenchmarkQueryResult for DuckQueryResult {
273270
self.display_string
274271
}
275272

276-
fn normalized_result(&self) -> (Vec<String>, Vec<Vec<String>>) {
273+
fn result_rows(&self) -> (Vec<String>, Vec<Vec<String>>) {
277274
(self.column_names.clone(), self.normalized_rows.clone())
278275
}
279276
}
280-
281-
/// Normalize a DuckDB value to a canonical string representation.
282-
///
283-
/// Uses the same normalization as `vortex-sqllogictest`'s `ValueDisplayAdapter`
284-
/// and the shared [`vortex_bench::validation`] helpers so that results are
285-
/// comparable with DataFusion output.
286-
fn normalize_duckdb_value(value: &Value) -> String {
287-
match value.extract() {
288-
ExtractedValue::Null => "NULL".to_string(),
289-
ExtractedValue::TinyInt(v) => v.to_string(),
290-
ExtractedValue::SmallInt(v) => v.to_string(),
291-
ExtractedValue::Integer(v) => v.to_string(),
292-
ExtractedValue::BigInt(v) => v.to_string(),
293-
ExtractedValue::HugeInt(v) => v.to_string(),
294-
ExtractedValue::UTinyInt(v) => v.to_string(),
295-
ExtractedValue::USmallInt(v) => v.to_string(),
296-
ExtractedValue::UInteger(v) => v.to_string(),
297-
ExtractedValue::UBigInt(v) => v.to_string(),
298-
ExtractedValue::UHugeInt(v) => v.to_string(),
299-
ExtractedValue::Float(v) => validation::normalize_f32(v),
300-
ExtractedValue::Double(v) => validation::normalize_f64(v),
301-
ExtractedValue::Boolean(v) => v.to_string(),
302-
ExtractedValue::Varchar(s) => validation::normalize_string(s.as_str()),
303-
ExtractedValue::Decimal(_, scale, v) => validation::normalize_decimal(v, scale),
304-
// Normalize timestamps to a canonical format for cross-engine comparison.
305-
ExtractedValue::Date(_)
306-
| ExtractedValue::TimestampNs(_)
307-
| ExtractedValue::Timestamp(_)
308-
| ExtractedValue::TimestampMs(_)
309-
| ExtractedValue::TimestampS(_) => validation::normalize_timestamp(&value.to_string()),
310-
// Delegate to DuckDB's native string representation for other types.
311-
ExtractedValue::Blob(_) | ExtractedValue::Time(_) | ExtractedValue::List(_) => {
312-
value.to_string()
313-
}
314-
}
315-
}

benchmarks/lance-bench/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ impl BenchmarkQueryResult for LanceQueryResult {
168168
.unwrap_or_else(|e| format!("<error: {e}>"))
169169
}
170170

171-
fn normalized_result(&self) -> (Vec<String>, Vec<Vec<String>>) {
171+
fn result_rows(&self) -> (Vec<String>, Vec<Vec<String>>) {
172172
unimplemented!("Lance benchmarks do not support result validation")
173173
}
174174
}

vortex-bench/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ arrow-array = { workspace = true }
2222
arrow-schema = { workspace = true }
2323
arrow-select = { workspace = true }
2424
async-trait = { workspace = true }
25-
bigdecimal = { workspace = true }
2625
bytes = { workspace = true }
2726
bzip2 = { workspace = true }
2827
clap = { workspace = true, features = ["derive"] }

vortex-bench/src/benchmark.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
//! Core benchmark trait and types.
55
6+
use std::path::Path;
67
use std::path::PathBuf;
78

89
use arrow_schema::Schema;
@@ -81,7 +82,15 @@ pub trait Benchmark: Send + Sync {
8182
/// Reference files are stored in engine-specific subdirectories as
8283
/// `{dir}/{engine}/q{idx:02}.slt.no` in sqllogictest format (e.g. `results/duckdb/q01.slt.no`).
8384
/// Use `--validate` to check results against them.
85+
///
86+
/// The default implementation uses `dataset_name()` to locate the files at
87+
/// `{CARGO_MANIFEST_DIR}/{dataset_name}/slt/results/`.
8488
fn expected_results_dir(&self) -> Option<PathBuf> {
85-
None
89+
Some(
90+
Path::new(env!("CARGO_MANIFEST_DIR"))
91+
.join(self.dataset_name())
92+
.join("slt")
93+
.join("results"),
94+
)
8695
}
8796
}

vortex-bench/src/clickbench/benchmark.rs

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
use std::env;
55
use std::fs;
66
use std::path::Path;
7-
use std::path::PathBuf;
8-
97
use anyhow::Result;
108
use reqwest::Client;
119
use url::Url;
@@ -124,14 +122,6 @@ impl Benchmark for ClickBenchBenchmark {
124122
vec![TableSpec::new("hits", Some(HITS_SCHEMA.clone()))]
125123
}
126124

127-
fn expected_results_dir(&self) -> Option<PathBuf> {
128-
Some(
129-
Path::new(env!("CARGO_MANIFEST_DIR"))
130-
.join("clickbench")
131-
.join("slt")
132-
.join("results"),
133-
)
134-
}
135125
}
136126

137127
fn clickbench_flavor(flavor: Flavor) -> String {

vortex-bench/src/runner.rs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,11 @@ pub trait BenchmarkQueryResult {
4343
fn row_count(&self) -> usize;
4444
/// Human-readable representation of the result (used by Explain mode).
4545
fn display(self) -> String;
46-
/// Normalized result for cross-engine validation.
46+
/// Raw result rows for validation.
4747
///
48-
/// Returns column names and rows of normalized string values suitable for
49-
/// comparison across different query engines. Values are normalized using
50-
/// sqllogictest conventions (floats rounded to 12 decimal places, etc.)
51-
/// via [`crate::validation`].
52-
fn normalized_result(&self) -> (Vec<String>, Vec<Vec<String>>);
48+
/// Returns column names and rows of string values extracted from the
49+
/// query result. No cross-engine normalization is applied.
50+
fn result_rows(&self) -> (Vec<String>, Vec<Vec<String>>);
5351
}
5452
use crate::display::DisplayFormat;
5553
use crate::display::print_measurements_json;
@@ -362,7 +360,7 @@ impl SqlBenchmarkRunner {
362360

363361
// Validate the last iteration's result against the reference file.
364362
if validate && self.expected_results_dir.is_some() {
365-
let (_cols, mut rows) = result.normalized_result();
363+
let (_cols, mut rows) = result.result_rows();
366364
if !self.validate_query_result(query_idx, &mut rows) {
367365
validation_failures.push((query_idx, format));
368366
}
@@ -481,7 +479,7 @@ impl SqlBenchmarkRunner {
481479

482480
// Validate the last iteration's result against the reference file.
483481
if validate && self.expected_results_dir.is_some() {
484-
let (_cols, mut rows) = result.normalized_result();
482+
let (_cols, mut rows) = result.result_rows();
485483
if !self.validate_query_result(query_idx, &mut rows) {
486484
validation_failures.push((query_idx, format));
487485
}

vortex-bench/src/tpch/benchmark.rs

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,6 @@
33

44
//! TPCH benchmark implementation
55
6-
use std::env;
7-
use std::path::Path;
8-
use std::path::PathBuf;
9-
106
use glob::Pattern;
117
use tracing::info;
128
use tracing::warn;
@@ -143,15 +139,6 @@ impl Benchmark for TpcHBenchmark {
143139
]
144140
}
145141

146-
fn expected_results_dir(&self) -> Option<PathBuf> {
147-
Some(
148-
Path::new(env!("CARGO_MANIFEST_DIR"))
149-
.join("tpch")
150-
.join("slt")
151-
.join("results"),
152-
)
153-
}
154-
155142
#[expect(clippy::expect_used, clippy::unwrap_in_result)]
156143
fn pattern(&self, table_name: &str, format: Format) -> Option<Pattern> {
157144
Some(

0 commit comments

Comments
 (0)