Skip to content

Commit 1c72a47

Browse files
committed
Remove Lance benchmarks
Signed-off-by: Adam Gutglick <[email protected]>
1 parent fdafb28 commit 1c72a47

File tree

22 files changed

+112
-2829
lines changed

22 files changed

+112
-2829
lines changed

Cargo.lock

Lines changed: 111 additions & 2150 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bench-vortex/Cargo.toml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,7 @@ version = { workspace = true }
1616
[lints]
1717
workspace = true
1818

19-
[features]
20-
lance = ["dep:lance", "dep:lance-encoding"]
21-
2219
[dependencies]
23-
lance = { version = "0.39.0", optional = true }
24-
lance-encoding = { version = "0.39.0", optional = true }
25-
2620
anyhow = { workspace = true }
2721
arrow-array = { workspace = true }
2822
arrow-cast = { workspace = true }

bench-vortex/src/bin/compress.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,6 @@ pub fn benchmark_compress(
254254
let compress_fn: CompressFn = match format {
255255
Format::OnDiskVortex => compress::benchmark_vortex_compress,
256256
Format::Parquet => compress::benchmark_parquet_compress,
257-
#[cfg(feature = "lance")]
258-
Format::Lance => compress::benchmark_lance_compress,
259257
_ => unimplemented!("Compress bench not implemented for {format}"),
260258
};
261259

@@ -272,8 +270,6 @@ pub fn benchmark_compress(
272270
let decompress_fn: DecompressFn = match format {
273271
Format::OnDiskVortex => compress::benchmark_vortex_decompress,
274272
Format::Parquet => compress::benchmark_parquet_decompress,
275-
#[cfg(feature = "lance")]
276-
Format::Lance => compress::benchmark_lance_decompress,
277273
_ => unimplemented!("Decompress bench not implemented for {format}"),
278274
};
279275

bench-vortex/src/bin/random_access.rs

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@ use bench_vortex::display::DisplayFormat;
1616
use bench_vortex::display::print_measurements_json;
1717
use bench_vortex::display::render_table;
1818
use bench_vortex::measurements::TimingMeasurement;
19-
#[cfg(feature = "lance")]
20-
use bench_vortex::random_access::take::take_lance;
2119
use bench_vortex::random_access::take::take_parquet;
2220
use bench_vortex::random_access::take::take_vortex_tokio;
2321
use bench_vortex::setup_logging_and_tracing;
@@ -128,8 +126,6 @@ fn random_access(
128126
let engine = match format {
129127
Format::OnDiskVortex | Format::VortexCompact => Engine::Vortex,
130128
Format::Parquet => Engine::Arrow,
131-
#[cfg(feature = "lance")]
132-
Format::Lance => Engine::Arrow,
133129
Format::Csv | Format::Arrow | Format::OnDiskDuckDB => unimplemented!(),
134130
};
135131
let target = Target::new(engine, format);
@@ -185,22 +181,6 @@ fn random_access(
185181
},
186182
)
187183
}
188-
#[cfg(feature = "lance")]
189-
Format::Lance => {
190-
let taxi_lance = runtime.block_on(taxi_data_lance())?;
191-
192-
create_timing_measurement(
193-
|indices| async { take_lance(&taxi_lance, indices).await },
194-
TimingConfig {
195-
name: "random-access/lance-tokio-local-disk".to_string(),
196-
storage: STORAGE_NVME.to_owned(),
197-
runtime: &runtime,
198-
indices: &indices,
199-
time_limit,
200-
target,
201-
},
202-
)
203-
}
204184
Format::Csv | Format::Arrow | Format::OnDiskDuckDB => unimplemented!(),
205185
};
206186

bench-vortex/src/clickbench/clickbench_benchmark.rs

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -123,29 +123,6 @@ impl Benchmark for ClickBenchBenchmark {
123123
})?
124124
}
125125
}
126-
#[cfg(feature = "lance")]
127-
Format::Lance => {
128-
// Lance manages its own partitioning internally, so flavor doesn't matter.
129-
if self.flavor == Flavor::Single {
130-
eprintln!(
131-
"Note: Lance manages its own internal partitioning. There is no \
132-
difference between Single and Partitioned flavors for Lance format."
133-
);
134-
}
135-
136-
// Download Parquet files (either Single or Partitioned).
137-
self.flavor.download(&client, basepath.as_path())?;
138-
139-
// Then convert to Lance format (idempotent).
140-
if self.data_url.scheme() == "file" {
141-
let file_path = self.data_url.to_file_path().map_err(|_| {
142-
anyhow::anyhow!("invalid file URL: {}", self.data_url)
143-
})?;
144-
145-
let rt = Runtime::new()?;
146-
rt.block_on(async { convert_parquet_to_lance(&file_path).await })?
147-
}
148-
}
149126
f => {
150127
todo!("format {f} unsupported in clickbench")
151128
}

bench-vortex/src/clickbench/clickbench_data.rs

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,6 @@ use crate::CompactionStrategy;
4646
use crate::Format;
4747
use crate::SESSION;
4848
use crate::conversions::parquet_to_vortex;
49-
#[cfg(feature = "lance")]
50-
use crate::utils;
5149
use crate::utils::file_utils::idempotent;
5250
use crate::utils::file_utils::idempotent_async;
5351

@@ -238,26 +236,6 @@ pub async fn convert_parquet_to_vortex(
238236
Ok(())
239237
}
240238

241-
/// Convert Parquet files to Lance format for ClickBench.
242-
/// Lance manages its own internal partitioning, so we convert all Parquet files
243-
/// (whether Single or Partitioned flavor) into a single Lance dataset.
244-
#[cfg(feature = "lance")]
245-
pub async fn convert_parquet_to_lance(input_path: &Path) -> anyhow::Result<()> {
246-
let lance_dir = input_path.join(Format::Lance.name());
247-
let parquet_dir = input_path.join(Format::Parquet.name());
248-
249-
// Use the generic converter with no prefix filter (accepts all parquet files)
250-
// ClickBench also uses Utf8View columns that need conversion for Lance
251-
utils::convert_parquet_to_lance(
252-
&parquet_dir,
253-
&lance_dir,
254-
"hits", // ClickBench uses "hits" as the dataset name
255-
None, // No file prefix filter
256-
true, // Convert Utf8View to Utf8 for Lance compatibility
257-
)
258-
.await
259-
}
260-
261239
pub async fn register_vortex_files(
262240
session: SessionContext,
263241
table_name: &str,

bench-vortex/src/compress/bench.rs

Lines changed: 0 additions & 182 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,6 @@ use tokio::runtime::Runtime;
1717
use vortex::array::Array;
1818
use vortex::array::arrays::ChunkedVTable;
1919
use vortex::utils::aliases::hash_map::HashMap;
20-
#[cfg(feature = "lance")]
21-
#[rustfmt::skip]
22-
use {
23-
super::lance::*,
24-
crate::bench_run::run_with_setup,
25-
crate::utils::convert_utf8view_batch,
26-
crate::utils::convert_utf8view_schema,
27-
arrow_array::RecordBatch,
28-
parking_lot::Mutex,
29-
std::fs,
30-
std::path::PathBuf,
31-
std::sync::Arc,
32-
};
3320

3421
use crate::Format;
3522
use crate::bench_run::run;
@@ -229,125 +216,6 @@ pub fn benchmark_parquet_decompress(
229216
Ok((time, timing))
230217
}
231218

232-
#[cfg(feature = "lance")]
233-
pub fn benchmark_lance_compress(
234-
runtime: &Runtime,
235-
uncompressed: &dyn Array,
236-
iterations: usize,
237-
bench_name: &str,
238-
) -> Result<(
239-
Duration,
240-
u64,
241-
Vec<CustomUnitMeasurement>,
242-
CompressionTimingMeasurement,
243-
)> {
244-
// NOTE: Lance requires filesystem access unlike Parquet/Vortex which use in-memory buffers.
245-
// To make the benchmark fairer, we exclude directory creation and size calculation from timing
246-
// (which is included in timing in the other benchmarks).
247-
248-
let chunked = uncompressed.as_::<ChunkedVTable>().clone();
249-
let (batches, schema) = chunked_to_vec_record_batch(chunked);
250-
251-
// Convert Utf8View to Utf8 (Lance doesn't support Utf8View).
252-
let converted_batches: Vec<RecordBatch> = batches
253-
.into_iter()
254-
.map(convert_utf8view_batch)
255-
.collect::<Result<Vec<_>, _>>()?;
256-
let converted_schema = convert_utf8view_schema(&schema);
257-
258-
let temp_dir = tempfile::tempdir().expect("Failed to create temp dir");
259-
let iteration_paths: Arc<Mutex<Vec<PathBuf>>> = Arc::new(Mutex::new(Vec::new()));
260-
let iteration_counter = AtomicU64::new(0);
261-
262-
// Run the benchmark and measure time.
263-
let time = run_with_setup(
264-
runtime,
265-
iterations,
266-
|| {
267-
// Create a unique subdirectory for each iteration (not timed).
268-
let iteration_id = iteration_counter.fetch_add(1, Ordering::Relaxed);
269-
let iteration_dir = temp_dir.path().join(format!("iter_{}", iteration_id));
270-
fs::create_dir_all(&iteration_dir).expect("Failed to create iteration directory");
271-
272-
(
273-
iteration_dir,
274-
converted_batches.clone(),
275-
converted_schema.clone(),
276-
iteration_paths.clone(),
277-
)
278-
},
279-
|(iteration_dir, batches, schema, paths)| async move {
280-
lance_compress_write_only(batches, schema, &iteration_dir)
281-
.await
282-
.expect("Failed to compress with lance");
283-
284-
// Since there should be low contention, this won't block and will be fast.
285-
paths.lock().push(iteration_dir);
286-
},
287-
);
288-
289-
// Calculate size from the last iteration.
290-
let paths = iteration_paths.lock();
291-
let lance_compressed_size_val = if let Some(last_path) = paths.last() {
292-
calculate_lance_size(last_path).expect("Failed to calculate Lance size")
293-
} else {
294-
0
295-
};
296-
let ratios = vec![CustomUnitMeasurement {
297-
name: format!("lance size/{bench_name}"),
298-
// Unlike timings, ratios have a single column vortex.
299-
format: Format::OnDiskVortex,
300-
unit: Cow::from("bytes"),
301-
value: lance_compressed_size_val as f64,
302-
}];
303-
304-
let timing = CompressionTimingMeasurement {
305-
name: format!("compress time/{bench_name}"),
306-
time,
307-
format: Format::Lance,
308-
};
309-
310-
Ok((time, lance_compressed_size_val, ratios, timing))
311-
}
312-
313-
#[cfg(feature = "lance")]
314-
pub fn benchmark_lance_decompress(
315-
runtime: &Runtime,
316-
uncompressed: &dyn Array,
317-
iterations: usize,
318-
bench_name: &str,
319-
) -> Result<(Duration, CompressionTimingMeasurement)> {
320-
// NOTE: Lance requires filesystem access unlike Parquet/Vortex which use in-memory buffers.
321-
let chunked = uncompressed.as_::<ChunkedVTable>().clone();
322-
let (batches, schema) = chunked_to_vec_record_batch(chunked);
323-
let temp_dir = tempfile::tempdir().expect("Failed to create temp dir");
324-
325-
// Write the Lance dataset once for all iterations.
326-
let dataset_path = runtime.block_on(async {
327-
lance_compress_write(batches, schema, &temp_dir)
328-
.await
329-
.expect("Failed to compress with lance for decompression test")
330-
});
331-
332-
// Keep temp_dir alive to prevent deletion.
333-
let temp_path = (dataset_path, temp_dir);
334-
335-
// Run the benchmark and measure time.
336-
let time = run(runtime, iterations, || async {
337-
lance_decompress_read(&temp_path.0)
338-
.await
339-
.expect("Failed to decompress with lance");
340-
});
341-
342-
let timing = CompressionTimingMeasurement {
343-
name: format!("decompress time/{bench_name}"),
344-
time,
345-
format: Format::Lance,
346-
};
347-
348-
Ok((time, timing))
349-
}
350-
351219
// Helper function to calculate ratios between formats.
352220
pub fn calculate_ratios(
353221
measurements: &HashMap<(Format, CompressOp), Duration>,
@@ -356,9 +224,6 @@ pub fn calculate_ratios(
356224
ratios: &mut Vec<CustomUnitMeasurement>,
357225
) {
358226
calculate_vortex_parquet_ratios(measurements, compressed_sizes, bench_name, ratios);
359-
360-
#[cfg(feature = "lance")]
361-
calculate_vortex_lance_ratios(measurements, compressed_sizes, bench_name, ratios);
362227
}
363228

364229
fn calculate_vortex_parquet_ratios(
@@ -406,50 +271,3 @@ fn calculate_vortex_parquet_ratios(
406271
});
407272
}
408273
}
409-
410-
#[cfg(feature = "lance")]
411-
fn calculate_vortex_lance_ratios(
412-
measurements: &HashMap<(Format, CompressOp), Duration>,
413-
compressed_sizes: &HashMap<Format, u64>,
414-
bench_name: &str,
415-
ratios: &mut Vec<CustomUnitMeasurement>,
416-
) {
417-
// Size ratio: vortex vs lance.
418-
if let (Some(vortex_size), Some(lance_size)) = (
419-
compressed_sizes.get(&Format::OnDiskVortex),
420-
compressed_sizes.get(&Format::Lance),
421-
) {
422-
ratios.push(CustomUnitMeasurement {
423-
name: format!("vortex:lance size/{bench_name}"),
424-
format: Format::OnDiskVortex,
425-
unit: Cow::from("ratio"),
426-
value: *vortex_size as f64 / *lance_size as f64,
427-
});
428-
}
429-
430-
// Compress time ratio: vortex vs lance.
431-
if let (Some(vortex_time), Some(lance_time)) = (
432-
measurements.get(&(Format::OnDiskVortex, CompressOp::Compress)),
433-
measurements.get(&(Format::Lance, CompressOp::Compress)),
434-
) {
435-
ratios.push(CustomUnitMeasurement {
436-
name: format!("vortex:lance ratio compress time/{bench_name}"),
437-
format: Format::OnDiskVortex,
438-
unit: Cow::from("ratio"),
439-
value: vortex_time.as_nanos() as f64 / lance_time.as_nanos() as f64,
440-
});
441-
}
442-
443-
// Decompress time ratio: vortex vs lance.
444-
if let (Some(vortex_time), Some(lance_time)) = (
445-
measurements.get(&(Format::OnDiskVortex, CompressOp::Decompress)),
446-
measurements.get(&(Format::Lance, CompressOp::Decompress)),
447-
) {
448-
ratios.push(CustomUnitMeasurement {
449-
name: format!("vortex:lance ratio decompress time/{bench_name}"),
450-
format: Format::OnDiskVortex,
451-
unit: Cow::from("ratio"),
452-
value: vortex_time.as_nanos() as f64 / lance_time.as_nanos() as f64,
453-
});
454-
}
455-
}

0 commit comments

Comments
 (0)