Skip to content

Commit aacd52b

Browse files
authored
Emulate blob storage latency in benchmarks (#2049)
The nvme devices we use for benchmarks (both in the cloud and locally) are incredibly fast and have incredibly low latency, which makes them forgiving in exactly the way real object storage isnt. This is an attempt at allowing us to emulate the performance of real cloud system in a more reproducible way, the constants and distributions are not set in stone and we can defiantly change them around, especially for ranged read where we have more specific numbers from AnyBlob.
1 parent ee7abec commit aacd52b

File tree

15 files changed

+268
-16
lines changed

15 files changed

+268
-16
lines changed

Cargo.lock

Lines changed: 79 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ libfuzzer-sys = "0.4"
101101
log = "0.4.21"
102102
mimalloc = "0.1.42"
103103
moka = "0.12"
104+
governor = "0.8"
104105
num-traits = "0.2.18"
105106
num_enum = "0.7.2"
106107
object_store = "0.11.0"
@@ -116,6 +117,7 @@ pyo3 = { version = ">= 0.22", features = ["extension-module", "abi3-py310"] }
116117
pyo3-log = ">= 0.11"
117118
rancor = "0.1.0"
118119
rand = "0.8.5"
120+
rand_distr = "0.4"
119121
rayon = "1.10.0"
120122
regex = "1.11.0"
121123
reqwest = { version = "0.12.0", features = ["blocking"] }

bench-vortex/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ anyhow = { workspace = true }
2626
arrow-array = { workspace = true }
2727
arrow-schema = { workspace = true }
2828
arrow-select = { workspace = true }
29+
async-trait = { workspace = true }
2930
bytes = { workspace = true }
3031
bzip2 = { workspace = true }
3132
clap = { workspace = true, features = ["derive"] }
@@ -37,6 +38,7 @@ datafusion-common = { workspace = true }
3738
datafusion-physical-plan = { workspace = true }
3839
enum-iterator = { workspace = true }
3940
futures = { workspace = true, features = ["executor"] }
41+
governor = { workspace = true }
4042
homedir = { workspace = true }
4143
humansize = { workspace = true }
4244
indicatif = { workspace = true }
@@ -46,6 +48,7 @@ mimalloc = { workspace = true }
4648
object_store = { workspace = true, features = ["aws"] }
4749
parquet = { workspace = true, features = ["async"] }
4850
rand = { workspace = true }
51+
rand_distr = { workspace = true }
4952
rayon = { workspace = true }
5053
regex = { workspace = true }
5154
reqwest = { workspace = true }

bench-vortex/benches/clickbench.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ fn benchmark(c: &mut Criterion) {
4141
.unwrap();
4242
}
4343

44-
let session_context = get_session_with_cache();
44+
let session_context = get_session_with_cache(false);
4545
let context = session_context.clone();
4646

4747
runtime.block_on(async move {

bench-vortex/benches/tpch.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,22 @@ fn benchmark(c: &mut Criterion) {
2020
Format::InMemoryVortex {
2121
enable_pushdown: true,
2222
},
23+
false,
2324
))
2425
.unwrap();
2526
let arrow_ctx = runtime
26-
.block_on(load_datasets(&data_dir, Format::Arrow))
27+
.block_on(load_datasets(&data_dir, Format::Arrow, false))
2728
.unwrap();
2829
let parquet_ctx = runtime
29-
.block_on(load_datasets(&data_dir, Format::Parquet))
30+
.block_on(load_datasets(&data_dir, Format::Parquet, false))
3031
.unwrap();
3132
let vortex_compressed_ctx = runtime
3233
.block_on(load_datasets(
3334
&data_dir,
3435
Format::OnDiskVortex {
3536
enable_compression: true,
3637
},
38+
false,
3739
))
3840
.unwrap();
3941

bench-vortex/src/bin/clickbench.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ struct Args {
3737
queries: Option<Vec<usize>>,
3838
#[arg(long, default_value = "false")]
3939
emit_plan: bool,
40+
#[arg(long, default_value = "false")]
41+
emulate_object_store: bool,
4042
}
4143

4244
fn main() {
@@ -120,7 +122,7 @@ fn main() {
120122
let mut all_measurements = Vec::default();
121123

122124
for format in &formats {
123-
let session_context = get_session_with_cache();
125+
let session_context = get_session_with_cache(args.emulate_object_store);
124126
let context = session_context.clone();
125127
match format {
126128
Format::Parquet => runtime.block_on(async {

bench-vortex/src/bin/tpch_benchmark.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ struct Args {
3333
verbose: bool,
3434
#[arg(short, long, default_value_t, value_enum)]
3535
display_format: DisplayFormat,
36+
#[arg(long, default_value = "false")]
37+
emulate_object_store: bool,
3638
}
3739

3840
fn main() -> ExitCode {
@@ -62,6 +64,7 @@ fn main() -> ExitCode {
6264
args.warmup,
6365
args.only_vortex,
6466
args.display_format,
67+
args.emulate_object_store,
6568
))
6669
}
6770

@@ -72,6 +75,7 @@ async fn bench_main(
7275
warmup: bool,
7376
only_vortex: bool,
7477
display_format: DisplayFormat,
78+
emulate_object_store: bool,
7579
) -> ExitCode {
7680
// uncomment the below to enable trace logging of datafusion execution
7781
// setup_logger(LevelFilter::Trace);
@@ -101,7 +105,7 @@ async fn bench_main(
101105
let ctxs = try_join_all(
102106
formats
103107
.iter()
104-
.map(|format| load_datasets(&data_dir, *format)),
108+
.map(|format| load_datasets(&data_dir, *format, emulate_object_store)),
105109
)
106110
.await
107111
.unwrap();

0 commit comments

Comments
 (0)