11// SPDX-License-Identifier: Apache-2.0
22// SPDX-FileCopyrightText: Copyright the Vortex contributors
33
4+ use std:: any:: type_name;
45use std:: sync:: Arc ;
56
67use arrow_array:: Array ;
@@ -19,6 +20,7 @@ use vortex_error::VortexError;
1920use vortex_error:: VortexExpect ;
2021use vortex_error:: VortexResult ;
2122use vortex_error:: vortex_ensure;
23+ use vortex_error:: vortex_err;
2224use vortex_session:: VortexSession ;
2325
2426use crate :: ArrayRef ;
@@ -28,9 +30,11 @@ use crate::arrays::ListArray;
2830use crate :: arrays:: ListVTable ;
2931use crate :: arrays:: ListViewArray ;
3032use crate :: arrays:: ListViewVTable ;
33+ use crate :: arrays:: PrimitiveArray ;
3134use crate :: arrow:: ArrowArrayExecutor ;
3235use crate :: arrow:: executor:: validity:: to_arrow_null_buffer;
3336use crate :: builtins:: ArrayBuiltins ;
37+ use crate :: validity:: Validity ;
3438use crate :: vtable:: ValidityHelper ;
3539
3640/// Convert a Vortex array into an Arrow GenericBinaryArray.
@@ -49,8 +53,9 @@ pub(super) fn to_arrow_list<O: OffsetSizeTrait + NativePType>(
4953 Ok ( array) => {
5054 if array. is_zero_copy_to_list ( ) {
5155 return list_view_zctl :: < O > ( array, elements_field, session) ;
56+ } else {
57+ return list_view_to_list :: < O > ( array, elements_field, session) ;
5258 }
53- array. into_array ( )
5459 }
5560 Err ( a) => a,
5661 } ;
@@ -159,3 +164,71 @@ fn list_view_zctl<O: OffsetSizeTrait + NativePType>(
159164 null_buffer,
160165 ) ) )
161166}
167+
168+ fn list_view_to_list < O : OffsetSizeTrait + NativePType > (
169+ array : ListViewArray ,
170+ elements_field : & FieldRef ,
171+ session : & VortexSession ,
172+ ) -> VortexResult < ArrowArrayRef > {
173+ let ( elements, offsets, sizes, validity) = array. into_parts ( ) ;
174+
175+ let offsets = offsets
176+ . cast ( DType :: Primitive ( O :: PTYPE , Nullability :: NonNullable ) ) ?
177+ . execute_vector ( session) ?
178+ . into_primitive ( )
179+ . downcast :: < O > ( )
180+ . into_nonnull_buffer ( ) ;
181+ let sizes = sizes
182+ . cast ( DType :: Primitive ( O :: PTYPE , Nullability :: NonNullable ) ) ?
183+ . execute_vector ( session) ?
184+ . into_primitive ( )
185+ . downcast :: < O > ( )
186+ . into_nonnull_buffer ( ) ;
187+
188+ // We create a new offsets buffer for the final list array.
189+ // And we also create an `indices` buffer for taking the elements.
190+ let mut new_offsets = BufferMut :: < O > :: with_capacity ( offsets. len ( ) + 1 ) ;
191+ let mut take_indices = BufferMut :: < u32 > :: with_capacity ( elements. len ( ) ) ;
192+
193+ // Add the offset for the first subarray
194+ new_offsets. push ( O :: zero ( ) ) ;
195+ for ( offset, size) in offsets. iter ( ) . zip ( sizes. iter ( ) ) {
196+ let offset = offset. as_usize ( ) ;
197+ let size = size. as_usize ( ) ;
198+ let end = offset + size;
199+ for j in offset..end {
200+ take_indices. push ( u32:: try_from ( j) . map_err ( |_| {
201+ vortex_err ! ( "List array too large for {} indices" , type_name:: <O >( ) )
202+ } ) ?) ;
203+ }
204+ new_offsets. push ( O :: usize_as ( take_indices. len ( ) ) ) ;
205+ }
206+
207+ // Now we can "take" the elements using the computed indices.
208+ let elements =
209+ elements. take ( PrimitiveArray :: new ( take_indices, Validity :: NonNullable ) . into_array ( ) ) ?;
210+
211+ let elements = elements. execute_arrow ( elements_field. data_type ( ) , session) ?;
212+ vortex_ensure ! (
213+ elements_field. is_nullable( ) || elements. null_count( ) == 0 ,
214+ "Cannot convert to non-nullable Arrow array with null elements"
215+ ) ;
216+
217+ // We need to compute the final offsets from the sizes.
218+ let mut final_offsets = Vec :: with_capacity ( sizes. len ( ) + 1 ) ;
219+ final_offsets. push ( O :: usize_as ( 0 ) ) ;
220+ for i in 0 ..sizes. len ( ) {
221+ let last_offset = final_offsets[ i] . as_usize ( ) ;
222+ let size = sizes[ i] . as_usize ( ) ;
223+ final_offsets. push ( O :: usize_as ( last_offset + size) ) ;
224+ }
225+
226+ let null_buffer = to_arrow_null_buffer ( & validity, sizes. len ( ) , session) ?;
227+
228+ Ok ( Arc :: new ( GenericListArray :: < O > :: new (
229+ elements_field. clone ( ) ,
230+ offsets. into_arrow_offset_buffer ( ) ,
231+ elements,
232+ null_buffer,
233+ ) ) )
234+ }
0 commit comments