11use arrow_buffer:: { BooleanBufferBuilder , Buffer , MutableBuffer , ScalarBuffer } ;
2- use vortex_dtype:: { DType , PType , StructDType } ;
2+ use vortex_dtype:: { DType , Nullability , PType , StructDType } ;
33use vortex_error:: { vortex_bail, vortex_err, ErrString , VortexExpect , VortexResult } ;
44
55use crate :: array:: chunked:: ChunkedArray ;
66use crate :: array:: extension:: ExtensionArray ;
77use crate :: array:: null:: NullArray ;
88use crate :: array:: primitive:: PrimitiveArray ;
99use crate :: array:: struct_:: StructArray ;
10- use crate :: array:: { BinaryView , BoolArray , VarBinViewArray } ;
10+ use crate :: array:: { BinaryView , BoolArray , ListArray , VarBinViewArray } ;
11+ use crate :: compute:: { scalar_at, slice, try_cast} ;
1112use crate :: validity:: Validity ;
1213use crate :: {
13- ArrayDType , ArrayData , ArrayValidity , Canonical , IntoArrayData , IntoArrayVariant , IntoCanonical ,
14+ ArrayDType , ArrayData , ArrayLen , ArrayValidity , Canonical , IntoArrayData , IntoArrayVariant ,
15+ IntoCanonical ,
1416} ;
1517
1618impl IntoCanonical for ChunkedArray {
@@ -88,9 +90,11 @@ pub(crate) fn try_canonicalize_chunks(
8890 ) ) )
8991 }
9092
91- // TODO(aduffy): better list support
9293 DType :: List ( ..) => {
93- todo ! ( )
94+ // TODO(joe): improve performance, use a listview, once it exists
95+
96+ let list = pack_lists ( chunks. as_slice ( ) , validity, dtype) ?;
97+ Ok ( Canonical :: List ( list) )
9498 }
9599
96100 DType :: Bool ( _) => {
@@ -117,6 +121,50 @@ pub(crate) fn try_canonicalize_chunks(
117121 }
118122}
119123
124+ fn pack_lists ( chunks : & [ ArrayData ] , validity : Validity , dtype : & DType ) -> VortexResult < ListArray > {
125+ let len: usize = chunks. iter ( ) . map ( |c| c. len ( ) ) . sum ( ) ;
126+ let mut offsets = Vec :: with_capacity ( len + 1 ) ;
127+ offsets. push ( 0 ) ;
128+ let mut elements = Vec :: new ( ) ;
129+ let elem_dtype = dtype
130+ . as_list_element ( )
131+ . vortex_expect ( "ListArray must have List dtype" ) ;
132+
133+ for chunk in chunks {
134+ let chunk = chunk. clone ( ) . into_list ( ) ?;
135+ // TODO: handle i32 offsets if they fit.
136+ let offsets_arr = try_cast (
137+ chunk. offsets ( ) ,
138+ & DType :: Primitive ( PType :: I64 , Nullability :: NonNullable ) ,
139+ ) ?
140+ . into_primitive ( ) ?;
141+
142+ let first_offset_value: usize = usize:: try_from ( & scalar_at ( offsets_arr. as_ref ( ) , 0 ) ?) ?;
143+ let last_offset_value: usize =
144+ usize:: try_from ( & scalar_at ( offsets_arr. as_ref ( ) , offsets_arr. len ( ) - 1 ) ?) ?;
145+ elements. push ( slice (
146+ chunk. elements ( ) ,
147+ first_offset_value,
148+ last_offset_value,
149+ ) ?) ;
150+
151+ let adjustment_from_previous = * offsets
152+ . last ( )
153+ . ok_or_else ( || vortex_err ! ( "List offsets must have at least one element" ) ) ?;
154+ offsets. extend (
155+ offsets_arr
156+ . maybe_null_slice :: < i64 > ( )
157+ . iter ( )
158+ . skip ( 1 )
159+ . map ( |off| off + adjustment_from_previous - first_offset_value as i64 ) ,
160+ ) ;
161+ }
162+ let chunked_elements = ChunkedArray :: try_new ( elements, elem_dtype. clone ( ) ) ?. into_array ( ) ;
163+ let offsets = PrimitiveArray :: from_vec ( offsets, Validity :: NonNullable ) ;
164+
165+ ListArray :: try_new ( chunked_elements, offsets. into_array ( ) , validity)
166+ }
167+
120168/// Swizzle the pointers within a ChunkedArray of StructArrays to instead be a single
121169/// StructArray, where the Array for each Field is a ChunkedArray.
122170///
@@ -238,12 +286,17 @@ fn pack_views(
238286
239287#[ cfg( test) ]
240288mod tests {
241- use vortex_dtype:: { DType , Nullability } ;
289+ use std:: sync:: Arc ;
290+
291+ use vortex_dtype:: DType ;
292+ use vortex_dtype:: DType :: { List , Primitive } ;
293+ use vortex_dtype:: Nullability :: NonNullable ;
294+ use vortex_dtype:: PType :: I32 ;
242295
243296 use crate :: accessor:: ArrayAccessor ;
244297 use crate :: array:: chunked:: canonical:: pack_views;
245- use crate :: array:: { ChunkedArray , StructArray , VarBinViewArray } ;
246- use crate :: compute:: slice;
298+ use crate :: array:: { ChunkedArray , ListArray , StructArray , VarBinViewArray } ;
299+ use crate :: compute:: { scalar_at , slice} ;
247300 use crate :: validity:: Validity ;
248301 use crate :: variants:: StructArrayTrait ;
249302 use crate :: { ArrayDType , ArrayLen , IntoArrayData , IntoArrayVariant , ToArrayData } ;
@@ -258,7 +311,7 @@ mod tests {
258311 let array2 = slice ( stringview_array ( ) . as_ref ( ) , 2 , 4 ) . unwrap ( ) ;
259312 let packed = pack_views (
260313 & [ array1, array2] ,
261- & DType :: Utf8 ( Nullability :: NonNullable ) ,
314+ & DType :: Utf8 ( NonNullable ) ,
262315 Validity :: NonNullable ,
263316 )
264317 . unwrap ( ) ;
@@ -308,4 +361,37 @@ mod tests {
308361 . unwrap ( ) ;
309362 assert_eq ! ( orig_values, canon_values) ;
310363 }
364+
365+ #[ test]
366+ pub fn pack_nested_lists ( ) {
367+ let l1 = ListArray :: try_new (
368+ vec ! [ 1 , 2 , 3 , 4 ] . into_array ( ) ,
369+ vec ! [ 0 , 3 ] . into_array ( ) ,
370+ Validity :: NonNullable ,
371+ )
372+ . unwrap ( ) ;
373+
374+ let l2 = ListArray :: try_new (
375+ vec ! [ 5 , 6 ] . into_array ( ) ,
376+ vec ! [ 0 , 2 ] . into_array ( ) ,
377+ Validity :: NonNullable ,
378+ )
379+ . unwrap ( ) ;
380+
381+ let chunked_list = ChunkedArray :: try_new (
382+ vec ! [ l1. clone( ) . into_array( ) , l2. clone( ) . into_array( ) ] ,
383+ List ( Arc :: new ( Primitive ( I32 , NonNullable ) ) , NonNullable ) ,
384+ ) ;
385+
386+ let canon_values = chunked_list. unwrap ( ) . into_list ( ) . unwrap ( ) ;
387+
388+ assert_eq ! (
389+ scalar_at( l1, 0 ) . unwrap( ) ,
390+ scalar_at( canon_values. clone( ) , 0 ) . unwrap( )
391+ ) ;
392+ assert_eq ! (
393+ scalar_at( l2, 0 ) . unwrap( ) ,
394+ scalar_at( canon_values, 1 ) . unwrap( )
395+ ) ;
396+ }
311397}
0 commit comments