Skip to content

Commit c2142b7

Browse files
committed
Better timing?
Signed-off-by: Adam Gutglick <[email protected]>
1 parent d40f5f8 commit c2142b7

File tree

6 files changed

+59
-35
lines changed

6 files changed

+59
-35
lines changed

benchmarks/ddb-bench/src/lib.rs

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
//! DuckDB context for benchmarks.
55
66
use std::path::PathBuf;
7+
use std::time::Duration;
8+
use std::time::Instant;
79

810
use anyhow::Result;
911
use tracing::trace;
@@ -32,14 +34,14 @@ impl DuckDBObject {
3234
}
3335

3436
/// DuckDB context for benchmarks.
35-
pub struct DuckDBCtx {
37+
pub struct DuckClient {
3638
pub db: Database,
3739
pub connection: Connection,
3840
pub db_path: Option<PathBuf>,
3941
pub threads: Option<usize>,
4042
}
4143

42-
impl DuckDBCtx {
44+
impl DuckClient {
4345
/// Create a new DuckDB context with a database at `{data_url}/{format}/duckdb.db`.
4446
pub fn new(
4547
data_url: &Url,
@@ -135,17 +137,21 @@ impl DuckDBCtx {
135137
}
136138

137139
/// Execute DuckDB queries for benchmarks using the internal connection.
138-
/// Returns the row count.
139-
pub fn execute_query(&self, query: &str) -> Result<usize> {
140+
/// Returns `(row_count, optional_timing)` where `optional_timing` is the query's
141+
/// internal execution time if available.
142+
pub fn execute_query(&self, query: &str) -> Result<(usize, Option<Duration>)> {
140143
trace!("execute duckdb query: {query}");
144+
let time_instant = Instant::now();
141145
let result = self.connection.query(query)?;
142-
Ok(usize::try_from(result.row_count()).vortex_expect("row count overflow"))
146+
let query_time = time_instant.elapsed();
147+
148+
let row_count = usize::try_from(result.row_count()).vortex_expect("row count overflow");
149+
150+
// TODO: Extract DuckDB's internal timing from profiling info if available
151+
Ok((row_count, Some(query_time)))
143152
}
144153

145154
/// Register tables for benchmarks using the internal connection.
146-
///
147-
/// Uses `benchmark.table_specs()` to get table names and `benchmark.pattern()`
148-
/// to get file glob patterns.
149155
pub fn register_tables<B: Benchmark + ?Sized>(
150156
&self,
151157
benchmark: &B,
@@ -198,8 +204,8 @@ impl DuckDBCtx {
198204
}
199205

200206
trace!("Executing table registration commands: {commands}");
201-
// Ignore the row count result from registration commands
202-
let _ = self.execute_query(&commands)?;
207+
208+
self.execute_query(&commands)?;
203209

204210
Ok(())
205211
}

benchmarks/ddb-bench/src/main.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use std::path::PathBuf;
77

88
use clap::Parser;
99
use clap::value_parser;
10-
use ddb_bench::DuckDBCtx;
10+
use ddb_bench::DuckClient;
1111
use vortex_bench::BenchmarkArg;
1212
use vortex_bench::CompactionStrategy;
1313
use vortex_bench::Engine;
@@ -120,7 +120,7 @@ async fn main() -> anyhow::Result<()> {
120120
&filtered_queries,
121121
args.iterations,
122122
|format| {
123-
let ctx = DuckDBCtx::new(
123+
let ctx = DuckClient::new(
124124
benchmark.data_url(),
125125
format,
126126
args.delete_duckdb_database,

benchmarks/ddb-bench/src/validation.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use std::fs;
88
use std::path::Path;
99
use std::path::PathBuf;
1010

11-
use ddb_bench::DuckDBCtx;
11+
use ddb_bench::DuckClient;
1212
use similar::ChangeTag;
1313
use similar::TextDiff;
1414
use vortex_bench::Format;
@@ -44,8 +44,8 @@ pub fn verify_duckdb_tpch_results(
4444
}
4545
fs::create_dir(&tmp_dir)?;
4646

47-
let duckdb_ctx = DuckDBCtx::new_in_memory()?;
48-
duckdb_ctx.register_tables(benchmark, Format::OnDiskVortex)?;
47+
let duckdb = DuckClient::new_in_memory()?;
48+
duckdb.register_tables(benchmark, Format::OnDiskVortex)?;
4949

5050
let mut query_files = fs::read_dir(query_dir)?
5151
.filter_map(Result::ok)
@@ -76,8 +76,8 @@ pub fn verify_duckdb_tpch_results(
7676
let write_csv =
7777
format!("COPY {query_name}_result TO '{csv_actual}' (HEADER, DELIMITER '|');",);
7878

79-
duckdb_ctx.execute_query(&create_table)?;
80-
duckdb_ctx.execute_query(&write_csv)?;
79+
duckdb.execute_query(&create_table)?;
80+
duckdb.execute_query(&write_csv)?;
8181

8282
let csv_expected = Path::new(env!("CARGO_MANIFEST_DIR")).join(format!(
8383
"../../vortex-bench/tpch/results/duckdb/{query_name}.csv"

benchmarks/df-bench/src/main.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
use std::path::PathBuf;
55
use std::sync::Arc;
6+
use std::time::Instant;
67

78
use clap::Parser;
89
use clap::value_parser;
@@ -155,9 +156,11 @@ async fn main() -> anyhow::Result<()> {
155156
},
156157
|session, _query_idx, query| {
157158
Box::pin(async move {
159+
let timer = Instant::now();
158160
let (batches, plan) = execute_query(session, query).await?;
161+
let time = timer.elapsed();
159162
let row_count = batches.iter().map(|batch| batch.num_rows()).sum::<usize>();
160-
anyhow::Ok((row_count, plan))
163+
anyhow::Ok((row_count, Some(time), plan))
161164
})
162165
},
163166
)

benchmarks/lance-bench/src/main.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
use std::path::PathBuf;
55
use std::sync::Arc;
6+
use std::time::Instant;
67

78
use clap::Parser;
89
use clap::value_parser;
@@ -109,9 +110,11 @@ async fn main() -> anyhow::Result<()> {
109110
},
110111
|session, _query_idx, query| {
111112
Box::pin(async move {
113+
let timer = Instant::now();
112114
let (batches, plan) = execute_query(session, query).await?;
115+
let time = timer.elapsed();
113116
let row_count = batches.iter().map(|batch| batch.num_rows()).sum::<usize>();
114-
anyhow::Ok((row_count, plan))
117+
anyhow::Ok((row_count, Some(time), plan))
115118
})
116119
},
117120
)

vortex-bench/src/runner.rs

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,10 @@ impl SqlBenchmarkRunner {
107107
/// Run a synchronous query benchmark.
108108
///
109109
/// Executes the query function `iterations` times, collecting timing information.
110-
/// The function should return `(row_count, result)` where `row_count` is used for validation.
110+
/// The function should return `(row_count, optional_timing, result)` where:
111+
/// - `row_count` is used for validation
112+
/// - `optional_timing` can be `Some(Duration)` if the callback wants to report its own timing
113+
/// (e.g., DuckDB's internal timing), or `None` to use external wall-clock measurement
111114
///
112115
/// This handles:
113116
/// - Memory tracking (start/end)
@@ -123,7 +126,7 @@ impl SqlBenchmarkRunner {
123126
mut f: F,
124127
) -> T
125128
where
126-
F: FnMut() -> (usize, T),
129+
F: FnMut() -> (usize, Option<Duration>, T),
127130
{
128131
self.start_query();
129132

@@ -132,8 +135,9 @@ impl SqlBenchmarkRunner {
132135

133136
for _ in 0..iterations {
134137
let start = Instant::now();
135-
let (row_count, iter_result) = f();
136-
runs.push(start.elapsed());
138+
let (row_count, timing, iter_result) = f();
139+
let elapsed = timing.unwrap_or_else(|| start.elapsed());
140+
runs.push(elapsed);
137141

138142
if result.is_none() {
139143
result = Some((row_count, iter_result));
@@ -149,7 +153,10 @@ impl SqlBenchmarkRunner {
149153
/// Run an asynchronous query benchmark.
150154
///
151155
/// Executes the async query function `iterations` times, collecting timing information.
152-
/// The function should return `(row_count, result)` where `row_count` is used for validation.
156+
/// The function should return `(row_count, optional_timing, result)` where:
157+
/// - `row_count` is used for validation
158+
/// - `optional_timing` can be `Some(Duration)` if the callback wants to report its own timing
159+
/// (e.g., DuckDB's internal timing), or `None` to use external wall-clock measurement
153160
///
154161
/// This handles:
155162
/// - Memory tracking (start/end)
@@ -166,7 +173,7 @@ impl SqlBenchmarkRunner {
166173
) -> T
167174
where
168175
F: FnMut() -> Fut,
169-
Fut: Future<Output = (usize, T)>,
176+
Fut: Future<Output = (usize, Option<Duration>, T)>,
170177
{
171178
self.start_query();
172179

@@ -175,8 +182,9 @@ impl SqlBenchmarkRunner {
175182

176183
for _ in 0..iterations {
177184
let start = Instant::now();
178-
let (row_count, iter_result) = f().await;
179-
runs.push(start.elapsed());
185+
let (row_count, timing, iter_result) = f().await;
186+
let elapsed = timing.unwrap_or_else(|| start.elapsed());
187+
runs.push(elapsed);
180188

181189
if result.is_none() {
182190
result = Some((row_count, iter_result));
@@ -306,7 +314,8 @@ impl SqlBenchmarkRunner {
306314
/// 2. Iterates over all queries, calling `execute` for each
307315
///
308316
/// The `execute` callback receives the context, query index, and query string,
309-
/// and should return the row count.
317+
/// and should return `(row_count, optional_timing)` where `optional_timing` can be
318+
/// `Some(Duration)` if the callback wants to report its own timing.
310319
pub fn run_all<Ctx, S, E>(
311320
&mut self,
312321
queries: &[(usize, String)],
@@ -316,19 +325,19 @@ impl SqlBenchmarkRunner {
316325
) -> anyhow::Result<()>
317326
where
318327
S: FnMut(Format) -> anyhow::Result<Ctx>,
319-
E: FnMut(&mut Ctx, usize, &str) -> anyhow::Result<usize>,
328+
E: FnMut(&mut Ctx, usize, &str) -> anyhow::Result<(usize, Option<Duration>)>,
320329
{
321330
for format in self.formats.clone() {
322331
let mut ctx = setup(format)?;
323332

324333
for (query_idx, query) in queries.iter() {
325334
let query_idx = *query_idx;
326335
self.run_query(query_idx, format, iterations, || {
327-
let row_count =
336+
let (row_count, timing) =
328337
execute(&mut ctx, query_idx, query.as_str()).unwrap_or_else(|err| {
329338
vortex_panic!("query {query_idx} failed: {err}");
330339
});
331-
(row_count, ())
340+
(row_count, timing, ())
332341
});
333342
}
334343
}
@@ -343,7 +352,9 @@ impl SqlBenchmarkRunner {
343352
/// 2. Iterates over all queries, calling `execute` for each
344353
///
345354
/// The `execute` callback receives the context, query index, and query string,
346-
/// and should return `(row_count, result)`. Use `Box::pin(async move { ... })` in the closure.
355+
/// and should return `(row_count, optional_timing, result)` where `optional_timing` can be
356+
/// `Some(Duration)` if the callback wants to report its own timing.
357+
/// Use `Box::pin(async move { ... })` in the closure.
347358
pub async fn run_all_async<Ctx, S, SFut, E, T>(
348359
&mut self,
349360
queries: &[(usize, String)],
@@ -358,7 +369,7 @@ impl SqlBenchmarkRunner {
358369
&'c Ctx,
359370
usize,
360371
&'c str,
361-
) -> Pin<Box<dyn Future<Output = anyhow::Result<(usize, T)>> + 'c>>,
372+
) -> Pin<Box<dyn Future<Output = anyhow::Result<(usize, Option<Duration>, T)>> + 'c>>,
362373
{
363374
for format in self.formats.clone() {
364375
let ctx = setup(format).await?;
@@ -373,12 +384,13 @@ impl SqlBenchmarkRunner {
373384

374385
for _ in 0..iterations {
375386
let start = Instant::now();
376-
let (row_count, iter_result) = execute(&ctx, query_idx, query.as_str())
387+
let (row_count, timing, iter_result) = execute(&ctx, query_idx, query.as_str())
377388
.await
378389
.unwrap_or_else(|err| {
379390
vortex_panic!("query {query_idx} failed: {err}");
380391
});
381-
runs.push(start.elapsed());
392+
let elapsed = timing.unwrap_or_else(|| start.elapsed());
393+
runs.push(elapsed);
382394

383395
if result.is_none() {
384396
result = Some((row_count, iter_result));

0 commit comments

Comments
 (0)