@@ -48,10 +48,6 @@ pub mod taxi_data;
4848pub mod tpch;
4949pub mod vortex_utils;
5050
51- // Sizes match default compressor configuration
52- const TARGET_BLOCK_BYTESIZE : usize = 16 * ( 1 << 20 ) ;
53- const TARGET_BLOCK_SIZE : usize = 64 * ( 1 << 10 ) ;
54-
5551#[ macro_export]
5652macro_rules! feature_flagged_allocator {
5753 ( ) => {
@@ -81,7 +77,7 @@ pub enum Format {
8177 Arrow ,
8278 Parquet ,
8379 InMemoryVortex ,
84- OnDiskVortex { enable_compression : bool } ,
80+ OnDiskVortex ,
8581}
8682
8783impl std:: fmt:: Display for Format {
@@ -93,8 +89,8 @@ impl std::fmt::Display for Format {
9389 Format :: InMemoryVortex => {
9490 write ! ( f, "in_memory_vortex" )
9591 }
96- Format :: OnDiskVortex { enable_compression } => {
97- write ! ( f, "on_disk_vortex(compressed={enable_compression} )" )
92+ Format :: OnDiskVortex => {
93+ write ! ( f, "on_disk_vortex(compressed=true )" )
9894 }
9995 }
10096 }
@@ -107,12 +103,7 @@ impl Format {
107103 Format :: Arrow => "arrow" . to_string ( ) ,
108104 Format :: Parquet => "parquet" . to_string ( ) ,
109105 Format :: InMemoryVortex => "vortex-in-memory" . to_string ( ) ,
110- Format :: OnDiskVortex { enable_compression } => if * enable_compression {
111- "vortex-file-compressed"
112- } else {
113- "vortex-file-uncompressed"
114- }
115- . to_string ( ) ,
106+ Format :: OnDiskVortex => "vortex-file-compressed" . to_string ( ) ,
116107 }
117108 }
118109}
@@ -404,65 +395,3 @@ pub fn generate_struct_of_list_of_ints_array(
404395 DType :: Struct ( struct_dtype. clone ( ) , Nullability :: NonNullable ) ,
405396 )
406397}
407-
408- #[ cfg( test) ]
409- mod test {
410- use std:: fs:: File ;
411- use std:: ops:: Deref ;
412- use std:: sync:: Arc ;
413-
414- use arrow_array:: { ArrayRef as ArrowArrayRef , StructArray as ArrowStructArray } ;
415- use log:: LevelFilter ;
416- use parquet:: arrow:: arrow_reader:: ParquetRecordBatchReaderBuilder ;
417- use vortex:: arrow:: { FromArrowArray , IntoArrowArray } ;
418- use vortex:: compress:: CompressionStrategy ;
419- use vortex:: sampling_compressor:: SamplingCompressor ;
420- use vortex:: Array ;
421-
422- use crate :: taxi_data:: taxi_data_parquet;
423- use crate :: { compress_taxi_data, setup_logger} ;
424-
425- #[ ignore]
426- #[ test]
427- fn compression_ratio ( ) {
428- setup_logger ( LevelFilter :: Debug ) ;
429- _ = compress_taxi_data ( ) ;
430- }
431-
432- #[ ignore]
433- #[ test]
434- fn round_trip_arrow ( ) {
435- let file = File :: open ( taxi_data_parquet ( ) ) . unwrap ( ) ;
436- let builder = ParquetRecordBatchReaderBuilder :: try_new ( file) . unwrap ( ) ;
437- let reader = builder. with_limit ( 1 ) . build ( ) . unwrap ( ) ;
438-
439- for record_batch in reader. map ( |batch_result| batch_result. unwrap ( ) ) {
440- let struct_arrow: ArrowStructArray = record_batch. into ( ) ;
441- let arrow_array: ArrowArrayRef = Arc :: new ( struct_arrow) ;
442- let vortex_array = Array :: from_arrow ( arrow_array. clone ( ) , false ) ;
443- let vortex_as_arrow = vortex_array. into_arrow_preferred ( ) . unwrap ( ) ;
444- assert_eq ! ( vortex_as_arrow. deref( ) , arrow_array. deref( ) ) ;
445- }
446- }
447-
448- // Ignoring since Struct arrays don't currently support equality.
449- // https://github.com/apache/arrow-rs/issues/5199
450- #[ ignore]
451- #[ test]
452- fn round_trip_arrow_compressed ( ) {
453- let file = File :: open ( taxi_data_parquet ( ) ) . unwrap ( ) ;
454- let builder = ParquetRecordBatchReaderBuilder :: try_new ( file) . unwrap ( ) ;
455- let reader = builder. with_limit ( 1 ) . build ( ) . unwrap ( ) ;
456- let compressor: & dyn CompressionStrategy = & SamplingCompressor :: default ( ) ;
457-
458- for record_batch in reader. map ( |batch_result| batch_result. unwrap ( ) ) {
459- let struct_arrow: ArrowStructArray = record_batch. into ( ) ;
460- let arrow_array: ArrowArrayRef = Arc :: new ( struct_arrow) ;
461- let vortex_array = Array :: from_arrow ( arrow_array. clone ( ) , false ) ;
462-
463- let compressed = compressor. compress ( & vortex_array) . unwrap ( ) ;
464- let compressed_as_arrow = compressed. into_arrow_preferred ( ) . unwrap ( ) ;
465- assert_eq ! ( compressed_as_arrow. deref( ) , arrow_array. deref( ) ) ;
466- }
467- }
468- }
0 commit comments