Skip to content

Commit 1d1ba18

Browse files
authored
Touch up some benchmarking code and clean up dependencies (#5665)
As I'm struggling through figuring out how to untangle the benchmarks a bit, these things fell out and I figured they are worth their own PR. 1. Used `cargo machete` to clear up some unused dependencies. 2. Made some lance-only code in benchmarks more explicitly behind a feature --------- Signed-off-by: Adam Gutglick <[email protected]>
1 parent a8ad74f commit 1d1ba18

File tree

33 files changed

+54
-115
lines changed

33 files changed

+54
-115
lines changed

Cargo.lock

Lines changed: 0 additions & 46 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bench-vortex/Cargo.toml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@ version = { workspace = true }
1717
workspace = true
1818

1919
[features]
20-
lance = ["dep:lance", "dep:lance-encoding"]
20+
lance = ["dep:lance", "dep:lance-encoding", "dep:arrow-cast"]
2121

2222
[dependencies]
23+
arrow-cast = { workspace = true, optional = true }
2324
lance = { version = "0.39.0", optional = true }
2425
lance-encoding = { version = "0.39.0", optional = true }
2526

2627
anyhow = { workspace = true }
2728
arrow-array = { workspace = true }
28-
arrow-cast = { workspace = true }
2929
arrow-schema = { workspace = true }
3030
arrow-select = { workspace = true }
3131
async-trait = { workspace = true }
@@ -57,7 +57,6 @@ opentelemetry-otlp = { workspace = true, features = ["trace"] }
5757
opentelemetry_sdk = { workspace = true }
5858
parking_lot = { workspace = true }
5959
parquet = { workspace = true, features = ["async"] }
60-
paste = { workspace = true }
6160
rand = { workspace = true }
6261
rayon = { workspace = true }
6362
regex = { workspace = true }

bench-vortex/src/benchmark_driver.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ use crate::metrics::export_plan_spans;
2828
use crate::query_bench::filter_queries;
2929
use crate::query_bench::print_memory_usage;
3030
use crate::query_bench::print_results;
31+
use crate::utils::file::url_scheme_to_storage;
3132
use crate::utils::new_tokio_runtime;
32-
use crate::utils::url_scheme_to_storage;
3333
use crate::vortex_panic;
3434

3535
/// Mode for EXPLAIN queries

bench-vortex/src/clickbench/clickbench_data.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ use crate::SESSION;
4848
use crate::conversions::parquet_to_vortex;
4949
#[cfg(feature = "lance")]
5050
use crate::utils;
51-
use crate::utils::file_utils::idempotent;
52-
use crate::utils::file_utils::idempotent_async;
51+
use crate::utils::file::idempotent;
52+
use crate::utils::file::idempotent_async;
5353

5454
pub static HITS_SCHEMA: LazyLock<Schema> = LazyLock::new(|| {
5555
use DataType::*;
@@ -248,7 +248,7 @@ pub async fn convert_parquet_to_lance(input_path: &Path) -> anyhow::Result<()> {
248248

249249
// Use the generic converter with no prefix filter (accepts all parquet files)
250250
// ClickBench also uses Utf8View columns that need conversion for Lance
251-
utils::convert_parquet_to_lance(
251+
utils::parquet::convert_parquet_to_lance(
252252
&parquet_dir,
253253
&lance_dir,
254254
"hits", // ClickBench uses "hits" as the dataset name

bench-vortex/src/compress/bench.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ use vortex::utils::aliases::hash_map::HashMap;
2222
use {
2323
super::lance::*,
2424
crate::bench_run::run_with_setup,
25-
crate::utils::convert_utf8view_batch,
26-
crate::utils::convert_utf8view_schema,
25+
crate::utils::parquet::convert_utf8view_batch,
26+
crate::utils::parquet::convert_utf8view_schema,
2727
arrow_array::RecordBatch,
2828
parking_lot::Mutex,
2929
std::fs,

bench-vortex/src/compress/lance.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ use lance::dataset::WriteParams;
2626
use lance_encoding::version::LanceFileVersion;
2727
use tempfile::TempDir;
2828

29-
use crate::utils::parquet_utils::convert_utf8view_batch;
30-
use crate::utils::parquet_utils::convert_utf8view_schema;
29+
use crate::utils::parquet::convert_utf8view_batch;
30+
use crate::utils::parquet::convert_utf8view_schema;
3131

3232
/// Write pre-converted [`RecordBatch`]es to Lance format.
3333
pub async fn lance_compress_write_only(

bench-vortex/src/datasets/data_downloads.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ use reqwest::Client;
1515
use tokio::fs::File as TokioFile;
1616
use tokio::io::AsyncWriteExt;
1717

18-
use crate::utils::file_utils::idempotent;
19-
use crate::utils::file_utils::idempotent_async;
18+
use crate::utils::file::idempotent;
19+
use crate::utils::file::idempotent_async;
2020

2121
pub async fn download_data(fname: PathBuf, data_url: &str) -> Result<PathBuf> {
2222
idempotent_async(&fname, async |path| {

bench-vortex/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use std::sync::LazyLock;
1212
use clap::ValueEnum;
1313
use itertools::Itertools;
1414
use serde::Serialize;
15-
pub use utils::file_utils::*;
15+
pub use utils::file::*;
1616
pub use utils::logging::*;
1717
use vortex::error::VortexUnwrap;
1818
use vortex::error::vortex_err;

bench-vortex/src/tpch/tpch_benchmark.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ use crate::tpch::schema::SUPPLIER;
4444
use crate::tpch::tpch_queries;
4545
use crate::tpch::tpchgen;
4646
use crate::tpch::tpchgen::TpchGenOptions;
47+
4748
#[cfg(feature = "lance")]
4849
#[rustfmt::skip]
4950
use crate::{
@@ -378,7 +379,7 @@ pub async fn convert_all_tpch_to_lance(parquet_dir: &Path, lance_dir: &Path) ->
378379
for table in &tables {
379380
// Use table_ prefix to avoid matching similar names (e.g., part vs partsupp)
380381
let file_prefix = format!("{}_", table);
381-
utils::convert_parquet_to_lance(
382+
utils::parquet::convert_parquet_to_lance(
382383
parquet_dir,
383384
lance_dir,
384385
table, // Dataset name is the table name

bench-vortex/src/tpch/tpchgen.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ use crate::CompactionStrategy;
4242
use crate::Format;
4343
use crate::IdempotentPath;
4444
use crate::SESSION;
45-
use crate::utils::file_utils::idempotent_async;
45+
use crate::utils::file::idempotent_async;
4646

4747
type TableFuture<'a> = Pin<Box<dyn Future<Output = Result<()>> + Send + 'a>>;
4848

0 commit comments

Comments
 (0)