@@ -17,19 +17,6 @@ use tokio::runtime::Runtime;
1717use vortex:: array:: Array ;
1818use vortex:: array:: arrays:: ChunkedVTable ;
1919use vortex:: utils:: aliases:: hash_map:: HashMap ;
20- #[ cfg( feature = "lance" ) ]
21- #[ rustfmt:: skip]
22- use {
23- super :: lance:: * ,
24- crate :: bench_run:: run_with_setup,
25- crate :: utils:: convert_utf8view_batch,
26- crate :: utils:: convert_utf8view_schema,
27- arrow_array:: RecordBatch ,
28- parking_lot:: Mutex ,
29- std:: fs,
30- std:: path:: PathBuf ,
31- std:: sync:: Arc ,
32- } ;
3320
3421use crate :: Format ;
3522use crate :: bench_run:: run;
@@ -229,125 +216,6 @@ pub fn benchmark_parquet_decompress(
229216 Ok ( ( time, timing) )
230217}
231218
232- #[ cfg( feature = "lance" ) ]
233- pub fn benchmark_lance_compress (
234- runtime : & Runtime ,
235- uncompressed : & dyn Array ,
236- iterations : usize ,
237- bench_name : & str ,
238- ) -> Result < (
239- Duration ,
240- u64 ,
241- Vec < CustomUnitMeasurement > ,
242- CompressionTimingMeasurement ,
243- ) > {
244- // NOTE: Lance requires filesystem access unlike Parquet/Vortex which use in-memory buffers.
245- // To make the benchmark fairer, we exclude directory creation and size calculation from timing
246- // (which is included in timing in the other benchmarks).
247-
248- let chunked = uncompressed. as_ :: < ChunkedVTable > ( ) . clone ( ) ;
249- let ( batches, schema) = chunked_to_vec_record_batch ( chunked) ;
250-
251- // Convert Utf8View to Utf8 (Lance doesn't support Utf8View).
252- let converted_batches: Vec < RecordBatch > = batches
253- . into_iter ( )
254- . map ( convert_utf8view_batch)
255- . collect :: < Result < Vec < _ > , _ > > ( ) ?;
256- let converted_schema = convert_utf8view_schema ( & schema) ;
257-
258- let temp_dir = tempfile:: tempdir ( ) . expect ( "Failed to create temp dir" ) ;
259- let iteration_paths: Arc < Mutex < Vec < PathBuf > > > = Arc :: new ( Mutex :: new ( Vec :: new ( ) ) ) ;
260- let iteration_counter = AtomicU64 :: new ( 0 ) ;
261-
262- // Run the benchmark and measure time.
263- let time = run_with_setup (
264- runtime,
265- iterations,
266- || {
267- // Create a unique subdirectory for each iteration (not timed).
268- let iteration_id = iteration_counter. fetch_add ( 1 , Ordering :: Relaxed ) ;
269- let iteration_dir = temp_dir. path ( ) . join ( format ! ( "iter_{}" , iteration_id) ) ;
270- fs:: create_dir_all ( & iteration_dir) . expect ( "Failed to create iteration directory" ) ;
271-
272- (
273- iteration_dir,
274- converted_batches. clone ( ) ,
275- converted_schema. clone ( ) ,
276- iteration_paths. clone ( ) ,
277- )
278- } ,
279- |( iteration_dir, batches, schema, paths) | async move {
280- lance_compress_write_only ( batches, schema, & iteration_dir)
281- . await
282- . expect ( "Failed to compress with lance" ) ;
283-
284- // Since there should be low contention, this won't block and will be fast.
285- paths. lock ( ) . push ( iteration_dir) ;
286- } ,
287- ) ;
288-
289- // Calculate size from the last iteration.
290- let paths = iteration_paths. lock ( ) ;
291- let lance_compressed_size_val = if let Some ( last_path) = paths. last ( ) {
292- calculate_lance_size ( last_path) . expect ( "Failed to calculate Lance size" )
293- } else {
294- 0
295- } ;
296- let ratios = vec ! [ CustomUnitMeasurement {
297- name: format!( "lance size/{bench_name}" ) ,
298- // Unlike timings, ratios have a single column vortex.
299- format: Format :: OnDiskVortex ,
300- unit: Cow :: from( "bytes" ) ,
301- value: lance_compressed_size_val as f64 ,
302- } ] ;
303-
304- let timing = CompressionTimingMeasurement {
305- name : format ! ( "compress time/{bench_name}" ) ,
306- time,
307- format : Format :: Lance ,
308- } ;
309-
310- Ok ( ( time, lance_compressed_size_val, ratios, timing) )
311- }
312-
313- #[ cfg( feature = "lance" ) ]
314- pub fn benchmark_lance_decompress (
315- runtime : & Runtime ,
316- uncompressed : & dyn Array ,
317- iterations : usize ,
318- bench_name : & str ,
319- ) -> Result < ( Duration , CompressionTimingMeasurement ) > {
320- // NOTE: Lance requires filesystem access unlike Parquet/Vortex which use in-memory buffers.
321- let chunked = uncompressed. as_ :: < ChunkedVTable > ( ) . clone ( ) ;
322- let ( batches, schema) = chunked_to_vec_record_batch ( chunked) ;
323- let temp_dir = tempfile:: tempdir ( ) . expect ( "Failed to create temp dir" ) ;
324-
325- // Write the Lance dataset once for all iterations.
326- let dataset_path = runtime. block_on ( async {
327- lance_compress_write ( batches, schema, & temp_dir)
328- . await
329- . expect ( "Failed to compress with lance for decompression test" )
330- } ) ;
331-
332- // Keep temp_dir alive to prevent deletion.
333- let temp_path = ( dataset_path, temp_dir) ;
334-
335- // Run the benchmark and measure time.
336- let time = run ( runtime, iterations, || async {
337- lance_decompress_read ( & temp_path. 0 )
338- . await
339- . expect ( "Failed to decompress with lance" ) ;
340- } ) ;
341-
342- let timing = CompressionTimingMeasurement {
343- name : format ! ( "decompress time/{bench_name}" ) ,
344- time,
345- format : Format :: Lance ,
346- } ;
347-
348- Ok ( ( time, timing) )
349- }
350-
351219// Helper function to calculate ratios between formats.
352220pub fn calculate_ratios (
353221 measurements : & HashMap < ( Format , CompressOp ) , Duration > ,
@@ -356,9 +224,6 @@ pub fn calculate_ratios(
356224 ratios : & mut Vec < CustomUnitMeasurement > ,
357225) {
358226 calculate_vortex_parquet_ratios ( measurements, compressed_sizes, bench_name, ratios) ;
359-
360- #[ cfg( feature = "lance" ) ]
361- calculate_vortex_lance_ratios ( measurements, compressed_sizes, bench_name, ratios) ;
362227}
363228
364229fn calculate_vortex_parquet_ratios (
@@ -406,50 +271,3 @@ fn calculate_vortex_parquet_ratios(
406271 } ) ;
407272 }
408273}
409-
410- #[ cfg( feature = "lance" ) ]
411- fn calculate_vortex_lance_ratios (
412- measurements : & HashMap < ( Format , CompressOp ) , Duration > ,
413- compressed_sizes : & HashMap < Format , u64 > ,
414- bench_name : & str ,
415- ratios : & mut Vec < CustomUnitMeasurement > ,
416- ) {
417- // Size ratio: vortex vs lance.
418- if let ( Some ( vortex_size) , Some ( lance_size) ) = (
419- compressed_sizes. get ( & Format :: OnDiskVortex ) ,
420- compressed_sizes. get ( & Format :: Lance ) ,
421- ) {
422- ratios. push ( CustomUnitMeasurement {
423- name : format ! ( "vortex:lance size/{bench_name}" ) ,
424- format : Format :: OnDiskVortex ,
425- unit : Cow :: from ( "ratio" ) ,
426- value : * vortex_size as f64 / * lance_size as f64 ,
427- } ) ;
428- }
429-
430- // Compress time ratio: vortex vs lance.
431- if let ( Some ( vortex_time) , Some ( lance_time) ) = (
432- measurements. get ( & ( Format :: OnDiskVortex , CompressOp :: Compress ) ) ,
433- measurements. get ( & ( Format :: Lance , CompressOp :: Compress ) ) ,
434- ) {
435- ratios. push ( CustomUnitMeasurement {
436- name : format ! ( "vortex:lance ratio compress time/{bench_name}" ) ,
437- format : Format :: OnDiskVortex ,
438- unit : Cow :: from ( "ratio" ) ,
439- value : vortex_time. as_nanos ( ) as f64 / lance_time. as_nanos ( ) as f64 ,
440- } ) ;
441- }
442-
443- // Decompress time ratio: vortex vs lance.
444- if let ( Some ( vortex_time) , Some ( lance_time) ) = (
445- measurements. get ( & ( Format :: OnDiskVortex , CompressOp :: Decompress ) ) ,
446- measurements. get ( & ( Format :: Lance , CompressOp :: Decompress ) ) ,
447- ) {
448- ratios. push ( CustomUnitMeasurement {
449- name : format ! ( "vortex:lance ratio decompress time/{bench_name}" ) ,
450- format : Format :: OnDiskVortex ,
451- unit : Cow :: from ( "ratio" ) ,
452- value : vortex_time. as_nanos ( ) as f64 / lance_time. as_nanos ( ) as f64 ,
453- } ) ;
454- }
455- }
0 commit comments