@@ -15,14 +15,17 @@ use num_traits::AsPrimitive;
1515use tempfile:: NamedTempFile ;
1616use vortex:: IntoArray ;
1717use vortex:: arrays:: {
18- BoolArray , ConstantArray , FixedSizeListArray , ListArray , PrimitiveArray , StructArray ,
19- VarBinArray , VarBinViewArray ,
18+ BoolArray , ConstantArray , DictArray , FixedSizeListArray , ListArray , PrimitiveArray ,
19+ StructArray , VarBinArray , VarBinViewArray ,
2020} ;
2121use vortex:: buffer:: buffer;
22+ use vortex:: dtype:: { Nullability , PType } ;
2223use vortex:: file:: WriteOptionsSessionExt ;
2324use vortex:: io:: runtime:: BlockingRuntime ;
24- use vortex:: scalar:: Scalar ;
25+ use vortex:: scalar:: { PValue , Scalar } ;
2526use vortex:: validity:: Validity ;
27+ use vortex_runend:: RunEndArray ;
28+ use vortex_sequence:: SequenceArray ;
2629
2730use crate :: cpp:: { duckdb_string_t, duckdb_timestamp} ;
2831use crate :: duckdb:: { Connection , Database } ;
@@ -703,3 +706,193 @@ fn test_vortex_scan_ultra_deep_nesting() {
703706 }
704707 assert_eq ! ( row_count, 1 , "Should have retrieved 1 row" ) ;
705708}
709+
710+ async fn write_vortex_file_with_encodings ( ) -> NamedTempFile {
711+ let temp_file_path = create_temp_file ( ) ;
712+
713+ // 0. Primitive
714+ let primitive_i32 = buffer ! [ 1i32 , 2 , 3 , 4 , 5 ] ;
715+ let primitive_f64 = buffer ! [ 1.1f64 , 2.2 , 3.3 , 4.4 , 5.5 ] ;
716+
717+ // 1. Constant
718+ let constant_str = ConstantArray :: new ( Scalar :: from ( "constant_value" ) , 5 ) ;
719+
720+ // 2. Boolean
721+ let bool_array = BoolArray :: from_bit_buffer (
722+ vec ! [ true , false , true , false , true ] . into ( ) ,
723+ Validity :: NonNullable ,
724+ ) ;
725+
726+ // 3. Dictionary
727+ let keys = buffer ! [ 0u32 , 1 , 0 , 2 , 1 ] ;
728+ let values = VarBinArray :: from ( vec ! [ "apple" , "banana" , "cherry" ] ) ;
729+ let dict_array = DictArray :: try_new ( keys. into_array ( ) , values. into_array ( ) ) . unwrap ( ) ;
730+
731+ // 4. Run-End
732+ let run_ends = buffer ! [ 3u32 , 5 ] ;
733+ let run_values = buffer ! [ 100i32 , 200 ] ;
734+ let rle_array = RunEndArray :: try_new ( run_ends. into_array ( ) , run_values. into_array ( ) ) . unwrap ( ) ;
735+
736+ // 5. Sequence array
737+ let sequence_array = SequenceArray :: new (
738+ PValue :: I64 ( 0 ) ,
739+ PValue :: I64 ( 10 ) ,
740+ PType :: I64 ,
741+ Nullability :: NonNullable ,
742+ 5 ,
743+ )
744+ . unwrap ( )
745+ . into_array ( ) ;
746+
747+ // 6. VarBin
748+ let varbin_array = VarBinArray :: from ( vec ! [ "hello" , "world" , "vortex" , "test" , "data" ] ) ;
749+
750+ // 7. List
751+ let list_values = PrimitiveArray :: from_iter ( [ 1i32 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ] ) ;
752+ let list_offsets = buffer ! [ 0u32 , 2 , 5 , 6 , 10 , 10 ] ; // [1,2], [3,4,5], [6], [7,8,9,10], []
753+ let list_array = ListArray :: try_new (
754+ list_values. into_array ( ) ,
755+ list_offsets. into_array ( ) ,
756+ Validity :: NonNullable ,
757+ )
758+ . unwrap ( ) ;
759+
760+ // 8. Fixed-size list
761+ let fixed_list_values = PrimitiveArray :: from_iter ( [ 1i32 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ] ) ;
762+ let fixed_list_array = FixedSizeListArray :: try_new (
763+ fixed_list_values. into_array ( ) ,
764+ 2 , // 2 elements per list
765+ Validity :: NonNullable ,
766+ 5 , // 5 lists
767+ )
768+ . unwrap ( ) ;
769+
770+ // Struct array containing the different encodings.
771+ let struct_array = StructArray :: try_from_iter ( [
772+ ( "primitive_i32" , primitive_i32. into_array ( ) ) ,
773+ ( "primitive_f64" , primitive_f64. into_array ( ) ) ,
774+ ( "constant_str" , constant_str. into_array ( ) ) ,
775+ ( "bool_col" , bool_array. into_array ( ) ) ,
776+ ( "dict_col" , dict_array. into_array ( ) ) ,
777+ ( "rle_col" , rle_array. into_array ( ) ) ,
778+ ( "sequence_col" , sequence_array) ,
779+ ( "varbin_col" , varbin_array. into_array ( ) ) ,
780+ ( "list_col" , list_array. into_array ( ) ) ,
781+ ( "fixed_list_col" , fixed_list_array. into_array ( ) ) ,
782+ ] )
783+ . unwrap ( ) ;
784+
785+ // Write to file
786+ let mut file = async_fs:: File :: create ( & temp_file_path) . await . unwrap ( ) ;
787+ SESSION
788+ . write_options ( )
789+ . write ( & mut file, struct_array. to_array_stream ( ) )
790+ . await
791+ . unwrap ( ) ;
792+
793+ temp_file_path
794+ }
795+
796+ #[ allow( clippy:: cognitive_complexity) ]
797+ #[ test]
798+ fn test_vortex_encodings_roundtrip ( ) {
799+ let file = RUNTIME . block_on ( write_vortex_file_with_encodings ( ) ) ;
800+ let conn = database_connection ( ) ;
801+
802+ // Test reading back each column type
803+ let result = conn
804+ . query ( & format ! (
805+ "SELECT * FROM vortex_scan('{}')" ,
806+ file. path( ) . to_string_lossy( )
807+ ) )
808+ . unwrap ( ) ;
809+
810+ let chunk = result. into_iter ( ) . next ( ) . unwrap ( ) ;
811+ assert_eq ! ( chunk. len( ) , 5 ) ; // 5 rows
812+ assert_eq ! ( chunk. column_count( ) , 10 ) ; // 10 columns
813+
814+ // Verify primitive i32 (column 0)
815+ let primitive_i32_vec = chunk. get_vector ( 0 ) ;
816+ let primitive_i32_slice = primitive_i32_vec. as_slice_with_len :: < i32 > ( chunk. len ( ) . as_ ( ) ) ;
817+ assert_eq ! ( primitive_i32_slice, [ 1 , 2 , 3 , 4 , 5 ] ) ;
818+
819+ // Verify primitive f64 (column 1)
820+ let primitive_f64_vec = chunk. get_vector ( 1 ) ;
821+ let primitive_f64_slice = primitive_f64_vec. as_slice_with_len :: < f64 > ( chunk. len ( ) . as_ ( ) ) ;
822+ assert ! ( ( primitive_f64_slice[ 0 ] - 1.1 ) . abs( ) < f64 :: EPSILON ) ;
823+ assert ! ( ( primitive_f64_slice[ 1 ] - 2.2 ) . abs( ) < f64 :: EPSILON ) ;
824+ assert ! ( ( primitive_f64_slice[ 2 ] - 3.3 ) . abs( ) < f64 :: EPSILON ) ;
825+
826+ // Verify constant string (column 2)
827+ let mut constant_vec = chunk. get_vector ( 2 ) ;
828+ let constant_slice = unsafe { constant_vec. as_slice_mut :: < duckdb_string_t > ( chunk. len ( ) . as_ ( ) ) } ;
829+ for idx in 0 ..5 {
830+ let string_val = String :: from_duckdb_value ( & mut constant_slice[ idx] ) ;
831+ assert_eq ! ( string_val, "constant_value" ) ;
832+ }
833+
834+ // Verify boolean (column 3)
835+ let bool_vec = chunk. get_vector ( 3 ) ;
836+ let bool_slice = bool_vec. as_slice_with_len :: < bool > ( chunk. len ( ) . as_ ( ) ) ;
837+ assert_eq ! ( bool_slice, [ true , false , true , false , true ] ) ;
838+
839+ // Verify dictionary (column 4)
840+ let mut dict_vec = chunk. get_vector ( 4 ) ;
841+ let dict_slice = unsafe { dict_vec. as_slice_mut :: < duckdb_string_t > ( chunk. len ( ) . as_ ( ) ) } ;
842+ // Keys were [0, 1, 0, 2, 1] and values were ["apple", "banana", "cherry"]
843+ let expected_dict_values = [ "apple" , "banana" , "apple" , "cherry" , "banana" ] ;
844+ for idx in 0 ..5 {
845+ let string_val = String :: from_duckdb_value ( & mut dict_slice[ idx] ) ;
846+ assert_eq ! ( string_val, expected_dict_values[ idx] ) ;
847+ }
848+
849+ // Verify RLE (column 5)
850+ let rle_vec = chunk. get_vector ( 5 ) ;
851+ let rle_slice = rle_vec. as_slice_with_len :: < i32 > ( chunk. len ( ) . as_ ( ) ) ;
852+ assert_eq ! ( rle_slice, [ 100 , 100 , 100 , 200 , 200 ] ) ;
853+
854+ // Verify sequence (column 6)
855+ let seq_vec = chunk. get_vector ( 6 ) ;
856+ let seq_slice = seq_vec. as_slice_with_len :: < i64 > ( chunk. len ( ) . as_ ( ) ) ;
857+ assert_eq ! ( seq_slice, [ 0 , 10 , 20 , 30 , 40 ] ) ;
858+
859+ // Verify varbin (column 7)
860+ let mut varbin_vec = chunk. get_vector ( 7 ) ;
861+ let varbin_slice = unsafe { varbin_vec. as_slice_mut :: < duckdb_string_t > ( chunk. len ( ) . as_ ( ) ) } ;
862+ let expected_strings = [ "hello" , "world" , "vortex" , "test" , "data" ] ;
863+ for i in 0 ..5 {
864+ let string_val = String :: from_duckdb_value ( & mut varbin_slice[ i] ) ;
865+ assert_eq ! ( string_val, expected_strings[ i] ) ;
866+ }
867+
868+ // Verify list (column 8)
869+ // Expected lists: [1,2], [3,4,5], [6], [7,8,9,10], []
870+ let list_vec = chunk. get_vector ( 8 ) ;
871+ let list_entries = list_vec. as_slice_with_len :: < cpp:: duckdb_list_entry > ( chunk. len ( ) . as_ ( ) ) ;
872+
873+ // Verify list lengths
874+ assert_eq ! ( list_entries[ 0 ] . length, 2 ) ; // [1,2]
875+ assert_eq ! ( list_entries[ 1 ] . length, 3 ) ; // [3,4,5]
876+ assert_eq ! ( list_entries[ 2 ] . length, 1 ) ; // [6]
877+ assert_eq ! ( list_entries[ 3 ] . length, 4 ) ; // [7,8,9,10]
878+ assert_eq ! ( list_entries[ 4 ] . length, 0 ) ; // []
879+
880+ // Verify list offsets are sequential
881+ assert_eq ! ( list_entries[ 0 ] . offset, 0 ) ;
882+ assert_eq ! ( list_entries[ 1 ] . offset, 2 ) ;
883+ assert_eq ! ( list_entries[ 2 ] . offset, 5 ) ;
884+ assert_eq ! ( list_entries[ 3 ] . offset, 6 ) ;
885+ assert_eq ! ( list_entries[ 4 ] . offset, 10 ) ;
886+
887+ // Get child vector and verify actual values
888+ let list_child = list_vec. list_vector_get_child ( ) ;
889+ let child_values = list_child. as_slice_with_len :: < i32 > ( 10 ) ; // 10 total child elements
890+ assert_eq ! ( child_values, [ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ] ) ;
891+
892+ // Verify fixed-size list column (column 9)
893+ // Expected fixed-size lists: [1,2], [3,4], [5,6], [7,8], [9,10]
894+ let fixed_list_vec = chunk. get_vector ( 9 ) ;
895+ let fixed_child = fixed_list_vec. array_vector_get_child ( ) ;
896+ let fixed_child_values = fixed_child. as_slice_with_len :: < i32 > ( 10 ) ; // 10 total child elements
897+ assert_eq ! ( fixed_child_values, [ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ] ) ;
898+ }
0 commit comments