Skip to content

Commit 634aa11

Browse files
committed
feat(bench-vortex): Add time-based benchmarking to random access benchmarks
- Add run_timed_with_setup() to bench_run.rs for time-based benchmarking - Add --time-limit flag to random_access benchmark CLI - Update TimingMeasurement to store Vec<Duration> runs and calculate mean - Ensure at least one run even if it exceeds time limit - Fall back to iteration-based mode if --time-limit is not specified This allows different benchmark targets (Vortex, Lance, Parquet) to run for the same time period, collecting as many samples as possible. This is more fair when targets have significantly different performance characteristics (e.g., Parquet is much slower than Vortex/Lance). The mean of all runs is reported. Fixes #5066
1 parent 99101b1 commit 634aa11

File tree

3 files changed

+82
-6
lines changed

3 files changed

+82
-6
lines changed

bench-vortex/src/bench_run.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,41 @@ where
4141

4242
fastest_result
4343
}
44+
45+
/// Run a benchmark for a specified time limit, collecting all run durations
46+
/// At least one run is guaranteed even if it exceeds the time limit
47+
pub fn run_timed_with_setup<I, O, S, R, F>(
48+
runtime: &Runtime,
49+
time_limit_secs: u64,
50+
mut setup: S,
51+
mut routine: R,
52+
) -> Vec<Duration>
53+
where
54+
S: FnMut() -> I,
55+
R: FnMut(I) -> F,
56+
F: Future<Output = O>,
57+
{
58+
let time_limit = Duration::from_secs(time_limit_secs);
59+
let overall_start = Instant::now();
60+
let mut runs = Vec::new();
61+
62+
// Ensure at least one run
63+
loop {
64+
let state = black_box(setup());
65+
let elapsed = runtime.block_on(async {
66+
let start = Instant::now();
67+
let output = routine(state).await;
68+
let elapsed = start.elapsed();
69+
drop(black_box(output));
70+
elapsed
71+
});
72+
runs.push(elapsed);
73+
74+
// Check if we should continue
75+
if overall_start.elapsed() >= time_limit {
76+
break;
77+
}
78+
}
79+
80+
runs
81+
}

bench-vortex/src/bin/random_access.rs

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::fs::File;
55
use std::io::{Write, stdout};
66
use std::path::PathBuf;
77

8-
use bench_vortex::bench_run::run_with_setup;
8+
use bench_vortex::bench_run::{run_timed_with_setup, run_with_setup};
99
use bench_vortex::datasets::taxi_data::*;
1010
use bench_vortex::display::{DisplayFormat, print_measurements_json, render_table};
1111
use bench_vortex::measurements::TimingMeasurement;
@@ -34,8 +34,13 @@ struct Args {
3434
default_values_t = vec![Format::Parquet, Format::OnDiskVortex]
3535
)]
3636
formats: Vec<Format>,
37+
/// Number of iterations to run (ignored if time_limit is set)
3738
#[arg(short, long, default_value_t = 10)]
3839
iterations: usize,
40+
/// Time limit in seconds for each benchmark target (e.g., 10 for 10 seconds)
41+
/// If set, benchmarks will run for this duration instead of a fixed number of iterations
42+
#[arg(long)]
43+
time_limit: Option<u64>,
3944
#[arg(short, long)]
4045
threads: Option<usize>,
4146
#[arg(short, long)]
@@ -61,6 +66,7 @@ fn main() -> anyhow::Result<()> {
6166
random_access(
6267
args.formats,
6368
runtime,
69+
args.time_limit,
6470
args.iterations,
6571
args.display_format,
6672
indices,
@@ -75,26 +81,38 @@ fn create_timing_measurement<O, B, F>(
7581
storage: String,
7682
runtime: &Runtime,
7783
indices: &Buffer<u64>,
84+
time_limit: Option<u64>,
7885
iterations: usize,
7986
target: Target,
8087
) -> TimingMeasurement
8188
where
8289
B: FnMut(Buffer<u64>) -> F,
8390
F: Future<Output = O>,
8491
{
85-
let benchmark_duration = run_with_setup(runtime, iterations, || indices.clone(), benchmark);
92+
let runs = if let Some(time_limit_secs) = time_limit {
93+
run_timed_with_setup(runtime, time_limit_secs, || indices.clone(), benchmark)
94+
} else {
95+
// For iteration-based, run once and collect that single duration
96+
vec![run_with_setup(
97+
runtime,
98+
iterations,
99+
|| indices.clone(),
100+
benchmark,
101+
)]
102+
};
86103

87104
TimingMeasurement {
88105
name,
89106
storage,
90107
target,
91-
time: benchmark_duration,
108+
runs,
92109
}
93110
}
94111

95112
fn random_access(
96113
formats: Vec<Format>,
97114
runtime: Runtime,
115+
time_limit: Option<u64>,
98116
iterations: usize,
99117
display_format: DisplayFormat,
100118
indices: Buffer<u64>,
@@ -127,6 +145,7 @@ fn random_access(
127145
STORAGE_NVME.to_owned(),
128146
&runtime,
129147
&indices,
148+
time_limit,
130149
iterations,
131150
target,
132151
)
@@ -143,6 +162,7 @@ fn random_access(
143162
STORAGE_NVME.to_owned(),
144163
&runtime,
145164
&indices,
165+
time_limit,
146166
iterations,
147167
target,
148168
)
@@ -156,6 +176,7 @@ fn random_access(
156176
STORAGE_NVME.to_owned(),
157177
&runtime,
158178
&indices,
179+
time_limit,
159180
iterations,
160181
target,
161182
)
@@ -170,6 +191,7 @@ fn random_access(
170191
STORAGE_NVME.to_owned(),
171192
&runtime,
172193
&indices,
194+
time_limit,
173195
iterations,
174196
target,
175197
)

bench-vortex/src/measurements.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,23 @@ pub struct TimingMeasurement {
161161
pub name: String,
162162
pub target: Target,
163163
pub storage: String,
164-
pub time: Duration,
164+
pub runs: Vec<Duration>,
165+
}
166+
167+
impl TimingMeasurement {
168+
pub fn mean_time(&self) -> Duration {
169+
let len = self.runs.len();
170+
if len == 0 {
171+
vortex_panic!("cannot have no runs");
172+
}
173+
174+
let total_nanos: u128 = self.runs.iter().map(|d| d.as_nanos()).sum();
175+
let mean_nanos = total_nanos / len as u128;
176+
Duration::new(
177+
u64::try_from(mean_nanos / 1_000_000_000).vortex_unwrap(),
178+
u32::try_from(mean_nanos % 1_000_000_000).vortex_unwrap(),
179+
)
180+
}
165181
}
166182

167183
impl ToTable for TimingMeasurement {
@@ -171,7 +187,7 @@ impl ToTable for TimingMeasurement {
171187
name: self.name.clone(),
172188
target: self.target,
173189
unit: Cow::from("μs"),
174-
value: MeasurementValue::Int(self.time.as_micros()),
190+
value: MeasurementValue::Int(self.mean_time().as_micros()),
175191
}
176192
}
177193
}
@@ -182,7 +198,7 @@ impl ToJson for TimingMeasurement {
182198
name: self.name.clone(),
183199
storage: Some(self.storage.clone()),
184200
unit: Some(Cow::from("ns")),
185-
value: MeasurementValue::Int(self.time.as_nanos()),
201+
value: MeasurementValue::Int(self.mean_time().as_nanos()),
186202
bytes: None,
187203
time: None,
188204
commit_id: Cow::from(GIT_COMMIT_ID.as_str()),

0 commit comments

Comments
 (0)