@@ -12,7 +12,7 @@ use vortex_array::patches::Patches;
1212use vortex_array:: validity:: Validity ;
1313use vortex_array:: vtable:: CanonicalVTable ;
1414use vortex_array:: { Array , Canonical , ToCanonical as _} ;
15- use vortex_buffer:: { Buffer , BufferMut , buffer } ;
15+ use vortex_buffer:: { Buffer , buffer , buffer_mut } ;
1616use vortex_dtype:: {
1717 DType , DecimalDType , NativePType , Nullability , StructFields , match_each_integer_ptype,
1818 match_each_native_ptype,
@@ -235,26 +235,18 @@ fn canonicalize_utf8<I: NativePType>(
235235 )
236236 } else {
237237 // any <=12 character value will do
238- BinaryView :: make_view ( "FILL_VALUE" . as_ref ( ) , 0 , 0 )
238+ BinaryView :: make_view ( & [ ] , 0 , 0 )
239239 } ;
240240
241- let mut view_builder = BufferMut :: < BinaryView > :: with_capacity ( len) ;
242- let mut i = 0usize ;
241+ let mut views = buffer_mut ! [ fill; len] ;
243242 for ( patch_index, & patch) in indices. into_iter ( ) . zip_eq ( values. views ( ) . iter ( ) ) {
244243 let patch_index_usize = <usize as NumCast >:: from ( patch_index)
245244 . vortex_expect ( "var bin view indices must fit in usize" ) ;
246- for _ in i..patch_index_usize {
247- view_builder. push ( fill)
248- }
249- view_builder. push ( patch) ;
250- i = patch_index_usize + 1 ;
251- }
252- for _ in i..len {
253- view_builder. push ( fill)
245+ views[ patch_index_usize] = patch;
254246 }
255247
256248 let array = VarBinViewArray :: try_new (
257- view_builder . freeze ( ) ,
249+ views . freeze ( ) ,
258250 buffers,
259251 DType :: Utf8 ( validity. nullability ( ) ) ,
260252 validity,
@@ -265,10 +257,10 @@ fn canonicalize_utf8<I: NativePType>(
265257
266258#[ cfg( test) ]
267259mod test {
268- use itertools:: Itertools as _;
269260 use rstest:: rstest;
270261 use vortex_array:: arrays:: {
271- BoolArray , BooleanBufferBuilder , DecimalArray , PrimitiveArray , StructArray , VarBinViewArray ,
262+ BoolArray , BooleanBufferBuilder , DecimalArray , PrimitiveArray , StructArray , VarBinArray ,
263+ VarBinViewArray ,
272264 } ;
273265 use vortex_array:: arrow:: IntoArrowArray as _;
274266 use vortex_array:: validity:: Validity ;
@@ -634,26 +626,6 @@ mod test {
634626 ] )
635627 . into_array ( ) ;
636628
637- println ! (
638- "strings: {}" ,
639- ( 0 ..strings. len( ) )
640- . map( |i| strings. scalar_at( i) . unwrap( ) )
641- . join( "," )
642- ) ;
643- let va = strings. validity_mask ( ) . unwrap ( ) . into_array ( ) ;
644- println ! (
645- "strings_validity: {}" ,
646- ( 0 ..va. len( ) )
647- . map( |i| va
648- . scalar_at( i)
649- . unwrap( )
650- . as_bool( )
651- . value( )
652- . map_or( "N" , |b| if b { "T" } else { "F" } )
653- . to_owned( ) )
654- . join( "," )
655- ) ;
656-
657629 let array = SparseArray :: try_new (
658630 buffer ! [ 0u16 , 3 , 4 , 5 , 7 , 9 , 10 ] . into_array ( ) ,
659631 strings,
@@ -720,4 +692,49 @@ mod test {
720692 assert_eq ! ( actual. data_type( ) , expected. data_type( ) ) ;
721693 assert_eq ! ( & actual, & expected) ;
722694 }
695+
696+ #[ test]
697+ fn test_sparse_varbin_null_fill ( ) {
698+ let strings = <VarBinArray as FromIterator < _ > >:: from_iter ( [
699+ Some ( "hello" ) ,
700+ Some ( "goodbye" ) ,
701+ Some ( "hello" ) ,
702+ None ,
703+ Some ( "bonjour" ) ,
704+ Some ( "你好" ) ,
705+ None ,
706+ ] )
707+ . into_array ( ) ;
708+
709+ let array = SparseArray :: try_new (
710+ buffer ! [ 0u16 , 3 , 4 , 5 , 7 , 9 , 10 ] . into_array ( ) ,
711+ strings,
712+ 12 ,
713+ Scalar :: null ( DType :: Utf8 ( Nullable ) ) ,
714+ )
715+ . unwrap ( ) ;
716+
717+ let actual = array. to_varbinview ( ) . unwrap ( ) . into_array ( ) ;
718+ let expected = <VarBinViewArray as FromIterator < _ > >:: from_iter ( [
719+ Some ( "hello" ) ,
720+ None ,
721+ None ,
722+ Some ( "goodbye" ) ,
723+ Some ( "hello" ) ,
724+ None ,
725+ None ,
726+ Some ( "bonjour" ) ,
727+ None ,
728+ Some ( "你好" ) ,
729+ None ,
730+ None ,
731+ ] )
732+ . into_array ( ) ;
733+
734+ let actual = actual. into_arrow_preferred ( ) . unwrap ( ) ;
735+ let expected = expected. into_arrow_preferred ( ) . unwrap ( ) ;
736+
737+ assert_eq ! ( actual. data_type( ) , expected. data_type( ) ) ;
738+ assert_eq ! ( & actual, & expected) ;
739+ }
723740}
0 commit comments