@@ -7,16 +7,117 @@ use vortex_array::ToCanonical;
77use vortex_array:: arrays:: PrimitiveArray ;
88use vortex_array:: builders:: { ArrayBuilder , PrimitiveBuilder , UninitRange } ;
99use vortex_array:: patches:: Patches ;
10+ use vortex_array:: validity:: Validity ;
11+ use vortex_array:: vtable:: ValidityHelper ;
12+ use vortex_buffer:: BufferMut ;
1013use vortex_dtype:: {
11- IntegerPType , NativePType , match_each_integer_ptype, match_each_unsigned_integer_ptype,
14+ IntegerPType , NativePType , UnsignedPType , match_each_integer_ptype,
15+ match_each_unsigned_integer_ptype,
1216} ;
1317use vortex_error:: { VortexExpect , vortex_panic} ;
14- use vortex_mask:: Mask ;
18+ use vortex_mask:: { Mask , MaskMut } ;
1519use vortex_scalar:: Scalar ;
20+ use vortex_vector:: primitive:: { PVectorMut , PrimitiveVectorMut } ;
1621
1722use crate :: BitPackedArray ;
1823use crate :: unpack_iter:: BitPacked ;
1924
25+ /// Unpacks a bit-packed array into a primitive vector.
26+ pub fn unpack_to_primitive_vector ( array : & BitPackedArray ) -> PrimitiveVectorMut {
27+ match_each_integer_ptype ! ( array. ptype( ) , |P | { unpack_to_pvector:: <P >( array) . into( ) } )
28+ }
29+
30+ /// Unpacks a bit-packed array into a generic [`PVectorMut`].
31+ pub fn unpack_to_pvector < P : BitPacked > ( array : & BitPackedArray ) -> PVectorMut < P > {
32+ if array. is_empty ( ) {
33+ return PVectorMut :: with_capacity ( 0 ) ;
34+ }
35+
36+ let len = array. len ( ) ;
37+ let mut elements = BufferMut :: < P > :: with_capacity ( len) ;
38+ let uninit_slice = & mut elements. spare_capacity_mut ( ) [ ..len] ;
39+
40+ // Decode into an uninitialized slice.
41+ let mut bit_packed_iter = array. unpacked_chunks ( ) ;
42+ bit_packed_iter. decode_into ( uninit_slice) ;
43+ // SAFETY: `decode_into` initialized exactly `len` elements into the spare (existing) capacity.
44+ unsafe { elements. set_len ( len) } ;
45+
46+ let mut validity = array. validity_mask ( ) . into_mut ( ) ;
47+ debug_assert_eq ! ( validity. len( ) , len) ;
48+
49+ // TODO(connor): Implement a fused version of patching instead.
50+ if let Some ( patches) = array. patches ( ) {
51+ let patch_indices = patches. indices ( ) . to_primitive ( ) ;
52+ let patch_values = patches. values ( ) . to_primitive ( ) ;
53+ let patches_validity = patch_values. validity ( ) ;
54+ let patch_offset = patches. offset ( ) ;
55+
56+ let patch_values_slice = patch_values. as_slice :: < P > ( ) ;
57+ match_each_unsigned_integer_ptype ! ( patch_indices. ptype( ) , |I | {
58+ let patch_indices_slice = patch_indices. as_slice:: <I >( ) ;
59+
60+ // SAFETY:
61+ // - `Patches` invariant guarantees indices are sorted and within array bounds.
62+ // - `patch_indices` and `patch_values` have equal length (from `Patches` invariant).
63+ // - `elements` and `validity` have equal length (both are `len` from the array).
64+ // - All patch indices are valid after offset adjustment (guaranteed by `Patches`).
65+ unsafe {
66+ apply_patches_inner(
67+ & mut elements,
68+ & mut validity,
69+ patch_indices_slice,
70+ patch_offset,
71+ patch_values_slice,
72+ patches_validity,
73+ )
74+ } ;
75+ } ) ;
76+ }
77+
78+ // SAFETY: `elements` and `validity` have the same length.
79+ unsafe { PVectorMut :: new_unchecked ( elements, validity) }
80+ }
81+
82+ /// # Safety
83+ ///
84+ /// - All indices in `patch_indices` after subtracting `patch_offset` must be valid indices
85+ /// into both `buffer` and `validity`.
86+ /// - `patch_indices` must be sorted in ascending order.
87+ /// - `patch_indices` and `patch_values` must have the same length.
88+ /// - `buffer` and `validity` must have the same length.
89+ unsafe fn apply_patches_inner < P , I > (
90+ buffer : & mut [ P ] ,
91+ validity : & mut MaskMut ,
92+ patch_indices : & [ I ] ,
93+ patch_offset : usize ,
94+ patch_values : & [ P ] ,
95+ patches_validity : & Validity ,
96+ ) where
97+ P : NativePType ,
98+ I : UnsignedPType ,
99+ {
100+ debug_assert ! ( !patch_indices. is_empty( ) ) ;
101+ debug_assert_eq ! ( patch_indices. len( ) , patch_values. len( ) ) ;
102+ debug_assert_eq ! ( buffer. len( ) , validity. len( ) ) ;
103+ debug_assert ! ( patch_indices. is_sorted( ) ) ;
104+ debug_assert ! ( patch_indices. last( ) . vortex_expect( "can't be empty" ) . as_( ) <= validity. len( ) ) ;
105+
106+ match patches_validity {
107+ Validity :: NonNullable | Validity :: AllValid => {
108+ for ( & i, & value) in patch_indices. iter ( ) . zip_eq ( patch_values) {
109+ let index = i. as_ ( ) - patch_offset;
110+
111+ // SAFETY: `index` is valid because caller guarantees all patch indices are within
112+ // bounds after offset adjustment.
113+ unsafe { validity. set_unchecked ( index) } ;
114+ buffer[ index] = value;
115+ }
116+ }
117+ _ => vortex_panic ! ( "BitPackedArray somehow had nullable patch values" ) ,
118+ }
119+ }
120+
20121pub fn unpack_array ( array : & BitPackedArray ) -> PrimitiveArray {
21122 match_each_integer_ptype ! ( array. ptype( ) , |P | { unpack_primitive_array:: <P >( array) } )
22123}
@@ -161,6 +262,7 @@ mod tests {
161262 use vortex_array:: { IntoArray , assert_arrays_eq} ;
162263 use vortex_buffer:: { Buffer , BufferMut , buffer} ;
163264 use vortex_dtype:: Nullability ;
265+ use vortex_vector:: { VectorMutOps , VectorOps } ;
164266
165267 use super :: * ;
166268 use crate :: BitPackedVTable ;
@@ -350,4 +452,268 @@ mod tests {
350452 // Verify all values were correctly unpacked including patches.
351453 assert_arrays_eq ! ( result, PrimitiveArray :: from_iter( values) ) ;
352454 }
455+
456+ /// Test basic unpacking to primitive vector for multiple types and sizes.
457+ #[ test]
458+ fn test_unpack_to_primitive_vector_basic ( ) {
459+ // Test with u8 values.
460+ let u8_values = PrimitiveArray :: from_iter ( [ 5u8 , 10 , 15 , 20 , 25 ] ) ;
461+ let u8_bitpacked = bitpack_encode ( & u8_values, 5 , None ) . unwrap ( ) ;
462+ let u8_vector = unpack_to_primitive_vector ( & u8_bitpacked) ;
463+ // Compare with existing unpack method.
464+ let expected = unpack_array ( & u8_bitpacked) ;
465+ assert_eq ! ( u8_vector. len( ) , expected. len( ) ) ;
466+ // Verify the vector matches expected values by checking specific elements.
467+ let _u8_frozen = u8_vector. freeze ( ) ;
468+ // We know both produce the same primitive values, just in different forms.
469+
470+ // Test with u32 values - empty array.
471+ let u32_empty: PrimitiveArray = PrimitiveArray :: from_iter ( Vec :: < u32 > :: new ( ) ) ;
472+ let u32_empty_bp = bitpack_encode ( & u32_empty, 0 , None ) . unwrap ( ) ;
473+ let u32_empty_vec = unpack_to_primitive_vector ( & u32_empty_bp) ;
474+ assert_eq ! ( u32_empty_vec. len( ) , 0 ) ;
475+
476+ // Test with u16 values - exactly one chunk (1024 elements).
477+ let u16_values = PrimitiveArray :: from_iter ( 0u16 ..1024 ) ;
478+ let u16_bitpacked = bitpack_encode ( & u16_values, 10 , None ) . unwrap ( ) ;
479+ let u16_vector = unpack_to_primitive_vector ( & u16_bitpacked) ;
480+ assert_eq ! ( u16_vector. len( ) , 1024 ) ;
481+
482+ // Test with i32 values - partial chunk (1025 elements).
483+ let i32_values = PrimitiveArray :: from_iter ( ( 0i32 ..1025 ) . map ( |x| x % 512 ) ) ;
484+ let i32_bitpacked = bitpack_encode ( & i32_values, 9 , None ) . unwrap ( ) ;
485+ let i32_vector = unpack_to_primitive_vector ( & i32_bitpacked) ;
486+ assert_eq ! ( i32_vector. len( ) , 1025 ) ;
487+
488+ // Verify consistency: unpack_to_primitive_vector and unpack_array should produce same values.
489+ let i32_array = unpack_array ( & i32_bitpacked) ;
490+ assert_eq ! ( i32_vector. len( ) , i32_array. len( ) ) ;
491+ }
492+
493+ /// Test unpacking with patches at various positions.
494+ #[ test]
495+ fn test_unpack_to_primitive_vector_with_patches ( ) {
496+ // Create an array where patches are needed at start, middle, and end.
497+ let values: Vec < u32 > = vec ! [
498+ 2000 , // Patch at start
499+ 5 , 10 , 15 , 20 , 25 , 30 , 3000 , // Patch in middle
500+ 35 , 40 , 45 , 50 , 55 , 4000 , // Patch at end
501+ ] ;
502+ let array = PrimitiveArray :: from_iter ( values. clone ( ) ) ;
503+
504+ // Bitpack with a small bit width to force patches.
505+ let bitpacked = bitpack_encode ( & array, 6 , None ) . unwrap ( ) ;
506+ assert ! ( bitpacked. patches( ) . is_some( ) , "Should have patches" ) ;
507+
508+ // Unpack to vector.
509+ let vector = unpack_to_primitive_vector ( & bitpacked) ;
510+
511+ // Verify length and that patches were applied.
512+ assert_eq ! ( vector. len( ) , values. len( ) ) ;
513+ // The vector should have the patched values, which unpack_array also produces.
514+ let expected = unpack_array ( & bitpacked) ;
515+ assert_eq ! ( vector. len( ) , expected. len( ) ) ;
516+
517+ // Test with a larger array with multiple patches across chunks.
518+ let large_values: Vec < u16 > = ( 0 ..3072 )
519+ . map ( |i| {
520+ if i % 500 == 0 {
521+ 2000 + i as u16 // Values that need patches
522+ } else {
523+ ( i % 256 ) as u16 // Values that fit in 8 bits
524+ }
525+ } )
526+ . collect ( ) ;
527+ let large_array = PrimitiveArray :: from_iter ( large_values) ;
528+ let large_bitpacked = bitpack_encode ( & large_array, 8 , None ) . unwrap ( ) ;
529+ assert ! ( large_bitpacked. patches( ) . is_some( ) ) ;
530+
531+ let large_vector = unpack_to_primitive_vector ( & large_bitpacked) ;
532+ assert_eq ! ( large_vector. len( ) , 3072 ) ;
533+ }
534+
535+ /// Test unpacking with nullability and validity masks.
536+ #[ test]
537+ fn test_unpack_to_primitive_vector_nullability ( ) {
538+ // Test with null values at various positions.
539+ let values = Buffer :: from_iter ( [ 100u32 , 0 , 200 , 0 , 300 , 0 , 400 ] ) ;
540+ let validity = Validity :: from_iter ( [ true , false , true , false , true , false , true ] ) ;
541+ let array = PrimitiveArray :: new ( values, validity) ;
542+
543+ let bitpacked = bitpack_encode ( & array, 9 , None ) . unwrap ( ) ;
544+ let vector = unpack_to_primitive_vector ( & bitpacked) ;
545+
546+ // Verify length.
547+ assert_eq ! ( vector. len( ) , 7 ) ;
548+ // Validity should be preserved when unpacking.
549+
550+ // Test combining patches with nullability.
551+ let patch_values = Buffer :: from_iter ( [ 10u16 , 0 , 2000 , 0 , 30 , 3000 , 0 ] ) ;
552+ let patch_validity = Validity :: from_iter ( [ true , false , true , false , true , true , false ] ) ;
553+ let patch_array = PrimitiveArray :: new ( patch_values, patch_validity) ;
554+
555+ let patch_bitpacked = bitpack_encode ( & patch_array, 5 , None ) . unwrap ( ) ;
556+ assert ! ( patch_bitpacked. patches( ) . is_some( ) ) ;
557+
558+ let patch_vector = unpack_to_primitive_vector ( & patch_bitpacked) ;
559+ assert_eq ! ( patch_vector. len( ) , 7 ) ;
560+
561+ // Test all nulls edge case.
562+ let all_nulls = PrimitiveArray :: new (
563+ Buffer :: from_iter ( [ 0u32 , 0 , 0 , 0 ] ) ,
564+ Validity :: from_iter ( [ false , false , false , false ] ) ,
565+ ) ;
566+ let all_nulls_bp = bitpack_encode ( & all_nulls, 0 , None ) . unwrap ( ) ;
567+ let all_nulls_vec = unpack_to_primitive_vector ( & all_nulls_bp) ;
568+ assert_eq ! ( all_nulls_vec. len( ) , 4 ) ;
569+ }
570+
571+ /// Test that the execute method produces consistent results with other unpacking methods.
572+ #[ test]
573+ fn test_execute_method_consistency ( ) {
574+ use vortex_vector:: Vector ;
575+
576+ // Test that execute(), unpack_to_primitive_vector(), and unpack_array() all produce consistent results.
577+ let test_consistency = |array : & PrimitiveArray , bit_width : u8 | {
578+ let bitpacked = bitpack_encode ( array, bit_width, None ) . unwrap ( ) ;
579+
580+ // Method 1: Using the new unpack_to_primitive_vector.
581+ let vector_result = unpack_to_primitive_vector ( & bitpacked) ;
582+
583+ // Method 2: Using the old unpack_array.
584+ let unpacked_array = unpack_array ( & bitpacked) ;
585+
586+ // Method 3: Using the execute() method (this is what would be used in production).
587+ let executed = bitpacked. into_array ( ) . execute ( ) . unwrap ( ) ;
588+
589+ // All three should produce the same length.
590+ assert_eq ! ( vector_result. len( ) , array. len( ) , "vector length mismatch" ) ;
591+ assert_eq ! (
592+ unpacked_array. len( ) ,
593+ array. len( ) ,
594+ "unpacked array length mismatch"
595+ ) ;
596+
597+ // The executed vector should also have the correct length.
598+ match & executed {
599+ Vector :: Primitive ( pv) => {
600+ assert_eq ! ( pv. len( ) , array. len( ) , "executed vector length mismatch" ) ;
601+ }
602+ _ => panic ! ( "Expected primitive vector from execute" ) ,
603+ }
604+
605+ // Verify that the execute() method works correctly by comparing with unpack_array.
606+ // We convert unpack_array result to a vector using execute() to compare.
607+ let unpacked_executed = unpacked_array. into_array ( ) . execute ( ) . unwrap ( ) ;
608+ match ( & executed, & unpacked_executed) {
609+ ( Vector :: Primitive ( exec_pv) , Vector :: Primitive ( unpack_pv) ) => {
610+ assert_eq ! (
611+ exec_pv. len( ) ,
612+ unpack_pv. len( ) ,
613+ "execute() and unpack_array().execute() produced different lengths"
614+ ) ;
615+ // Both should produce identical vectors since they represent the same data.
616+ }
617+ _ => panic ! ( "Expected both to be primitive vectors" ) ,
618+ }
619+ } ;
620+
621+ // Test various scenarios without patches.
622+ test_consistency ( & PrimitiveArray :: from_iter ( 0u16 ..100 ) , 7 ) ;
623+ test_consistency ( & PrimitiveArray :: from_iter ( 0u32 ..1024 ) , 10 ) ;
624+
625+ // Test with values that will create patches.
626+ test_consistency ( & PrimitiveArray :: from_iter ( ( 0i16 ..2048 ) . map ( |x| x % 128 ) ) , 7 ) ;
627+
628+ // Test with an array that definitely has patches.
629+ let patch_values: Vec < u32 > = ( 0 ..100 )
630+ . map ( |i| if i % 20 == 0 { 1000 + i } else { i % 16 } )
631+ . collect ( ) ;
632+ let patch_array = PrimitiveArray :: from_iter ( patch_values) ;
633+ test_consistency ( & patch_array, 4 ) ;
634+
635+ // Test with sliced array (offset > 0).
636+ let values = PrimitiveArray :: from_iter ( 0u32 ..2048 ) ;
637+ let bitpacked = bitpack_encode ( & values, 11 , None ) . unwrap ( ) ;
638+ let sliced = bitpacked. slice ( 500 ..1500 ) ;
639+
640+ // Test all three methods on the sliced array.
641+ let sliced_bp = sliced. as_ :: < BitPackedVTable > ( ) ;
642+ let vector_result = unpack_to_primitive_vector ( sliced_bp) ;
643+ let unpacked_array = unpack_array ( sliced_bp) ;
644+ let executed = sliced. execute ( ) . unwrap ( ) ;
645+
646+ assert_eq ! (
647+ vector_result. len( ) ,
648+ 1000 ,
649+ "sliced vector length should be 1000"
650+ ) ;
651+ assert_eq ! (
652+ unpacked_array. len( ) ,
653+ 1000 ,
654+ "sliced unpacked array length should be 1000"
655+ ) ;
656+
657+ match executed {
658+ Vector :: Primitive ( pv) => {
659+ assert_eq ! (
660+ pv. len( ) ,
661+ 1000 ,
662+ "sliced executed vector length should be 1000"
663+ ) ;
664+ }
665+ _ => panic ! ( "Expected primitive vector from execute on sliced array" ) ,
666+ }
667+ }
668+
669+ /// Test edge cases for unpacking.
670+ #[ test]
671+ fn test_unpack_edge_cases ( ) {
672+ // Empty array.
673+ let empty: PrimitiveArray = PrimitiveArray :: from_iter ( Vec :: < u64 > :: new ( ) ) ;
674+ let empty_bp = bitpack_encode ( & empty, 0 , None ) . unwrap ( ) ;
675+ let empty_vec = unpack_to_primitive_vector ( & empty_bp) ;
676+ assert_eq ! ( empty_vec. len( ) , 0 ) ;
677+
678+ // All zeros (bit_width = 0).
679+ let zeros = PrimitiveArray :: from_iter ( [ 0u32 ; 100 ] ) ;
680+ let zeros_bp = bitpack_encode ( & zeros, 0 , None ) . unwrap ( ) ;
681+ let zeros_vec = unpack_to_primitive_vector ( & zeros_bp) ;
682+ assert_eq ! ( zeros_vec. len( ) , 100 ) ;
683+ // Verify consistency with unpack_array.
684+ let zeros_array = unpack_array ( & zeros_bp) ;
685+ assert_eq ! ( zeros_vec. len( ) , zeros_array. len( ) ) ;
686+
687+ // Maximum bit width for u16 (15 bits, since bitpacking requires bit_width < type bit width).
688+ let max_values = PrimitiveArray :: from_iter ( [ 32767u16 ; 50 ] ) ; // 2^15 - 1
689+ let max_bp = bitpack_encode ( & max_values, 15 , None ) . unwrap ( ) ;
690+ let max_vec = unpack_to_primitive_vector ( & max_bp) ;
691+ assert_eq ! ( max_vec. len( ) , 50 ) ;
692+
693+ // Exactly 3072 elements with patches across chunks.
694+ let boundary_values: Vec < u32 > = ( 0 ..3072 )
695+ . map ( |i| {
696+ if i == 1023 || i == 1024 || i == 2047 || i == 2048 {
697+ 50000 // Force patches at chunk boundaries
698+ } else {
699+ ( i % 128 ) as u32
700+ }
701+ } )
702+ . collect ( ) ;
703+ let boundary_array = PrimitiveArray :: from_iter ( boundary_values) ;
704+ let boundary_bp = bitpack_encode ( & boundary_array, 7 , None ) . unwrap ( ) ;
705+ assert ! ( boundary_bp. patches( ) . is_some( ) ) ;
706+
707+ let boundary_vec = unpack_to_primitive_vector ( & boundary_bp) ;
708+ assert_eq ! ( boundary_vec. len( ) , 3072 ) ;
709+ // Verify consistency.
710+ let boundary_unpacked = unpack_array ( & boundary_bp) ;
711+ assert_eq ! ( boundary_vec. len( ) , boundary_unpacked. len( ) ) ;
712+
713+ // Single element.
714+ let single = PrimitiveArray :: from_iter ( [ 42u8 ] ) ;
715+ let single_bp = bitpack_encode ( & single, 6 , None ) . unwrap ( ) ;
716+ let single_vec = unpack_to_primitive_vector ( & single_bp) ;
717+ assert_eq ! ( single_vec. len( ) , 1 ) ;
718+ }
353719}
0 commit comments