@@ -222,26 +222,44 @@ pub fn flat_vector_to_vortex(vector: &mut Vector, len: usize) -> VortexResult<Ar
222222 }
223223 DUCKDB_TYPE :: DUCKDB_TYPE_LIST => {
224224 let mut offsets = BufferMut :: with_capacity ( len) ;
225- let mut lengths = BufferMut :: with_capacity ( len) ;
226- for duckdb_list_entry { offset, length } in
227- vector. as_slice_with_len :: < duckdb_list_entry > ( len)
225+ let mut sizes = BufferMut :: with_capacity ( len) ;
226+
227+ let mut child_max_length = 0 ;
228+ for duckdb_list_entry {
229+ offset,
230+ length : size,
231+ } in vector. as_slice_with_len :: < duckdb_list_entry > ( len)
228232 {
233+ let offset = i64:: try_from ( * offset) . vortex_expect ( "offset must fit i64" ) ;
234+ assert ! ( offset >= 0 , "offset must be non-negative" ) ;
235+ let size = i64:: try_from ( * size) . vortex_expect ( "length must fit i64" ) ;
236+ assert ! ( size >= 0 , "size must be non-negative" ) ;
237+
238+ // TODO(connor): Figure out if we need to worry about validity. In Arrow, null list
239+ // views must still have a valid offset and size, but this might not be true is
240+ // DuckDB? The public docs don't say anything about this...
241+
242+ // We have to track the ends of all list views to determine the length of the child
243+ // vector since 1) they can be arbitrarily out of order and 2) there might be a null
244+ // list at the end of the vector.
245+ let end = usize:: try_from ( offset + size) . vortex_expect (
246+ "either math is broken or the child vector length did not \
247+ fit into a 32-bit `usize` type",
248+ ) ;
249+ child_max_length = child_max_length. max ( end) ;
250+
251+ // SAFETY: We just allocated enough capacity for both of these.
229252 unsafe {
230- offsets. push_unchecked (
231- i64:: try_from ( * offset) . vortex_expect ( "offset must fit i64" ) ,
232- ) ;
233- lengths. push_unchecked (
234- i64:: try_from ( * length) . vortex_expect ( "length must fit i64" ) ,
235- ) ;
253+ offsets. push_unchecked ( offset) ;
254+ sizes. push_unchecked ( size) ;
236255 }
237256 }
257+
238258 let offsets = offsets. freeze ( ) ;
239- let lengths = lengths. freeze ( ) ;
240- let child_data = flat_vector_to_vortex (
241- & mut vector. list_vector_get_child ( ) ,
242- usize:: try_from ( offsets[ len - 1 ] + lengths[ len - 1 ] )
243- . vortex_expect ( "last offset and length sum must fit in usize " ) ,
244- ) ?;
259+ let lengths = sizes. freeze ( ) ;
260+
261+ let child_data =
262+ flat_vector_to_vortex ( & mut vector. list_vector_get_child ( ) , child_max_length) ?;
245263
246264 ListViewArray :: try_new (
247265 child_data,
@@ -651,4 +669,105 @@ mod tests {
651669 & [ 5 , 6 , 7 , 8 ]
652670 ) ;
653671 }
672+
673+ #[ test]
674+ fn test_list_with_trailing_null ( ) {
675+ // Regression test: when the last list entry is null, its offset/length may be 0/0,
676+ // so we can't use the last entry to compute child vector length.
677+ let child_values = vec ! [ 1i32 , 2 , 3 , 4 ] ;
678+ let len = 2 ;
679+
680+ let logical_type =
681+ LogicalType :: list_type ( LogicalType :: new ( DUCKDB_TYPE :: DUCKDB_TYPE_INTEGER ) )
682+ . vortex_unwrap ( ) ;
683+ let mut vector = Vector :: with_capacity ( logical_type, len) ;
684+
685+ // Entry 0: offset=0, length=4 -> all elements (end=4)
686+ // Entry 1: null, offset=0, length=0 (end=0)
687+ unsafe {
688+ let entries = vector. as_slice_mut :: < duckdb_list_entry > ( len) ;
689+ entries[ 0 ] = duckdb_list_entry {
690+ offset : 0 ,
691+ length : child_values. len ( ) as u64 ,
692+ } ;
693+ entries[ 1 ] = duckdb_list_entry {
694+ offset : 0 ,
695+ length : 0 ,
696+ } ;
697+ let mut child = vector. list_vector_get_child ( ) ;
698+ let slice = child. as_slice_mut :: < i32 > ( child_values. len ( ) ) ;
699+ slice. copy_from_slice ( & child_values) ;
700+ }
701+
702+ // Set the second entry as null.
703+ let validity_slice = unsafe { vector. ensure_validity_bitslice ( len) } ;
704+ validity_slice. set ( 1 , false ) ;
705+
706+ // Test conversion - the old bug would compute child length as 0+0=0 instead of
707+ // max(4,0)=4.
708+ let result = flat_vector_to_vortex ( & mut vector, len) . unwrap ( ) ;
709+ let vortex_array = result. to_listview ( ) ;
710+
711+ assert_eq ! ( vortex_array. len( ) , len) ;
712+ assert_eq ! (
713+ vortex_array
714+ . list_elements_at( 0 )
715+ . to_primitive( )
716+ . as_slice:: <i32 >( ) ,
717+ & [ 1 , 2 , 3 , 4 ]
718+ ) ;
719+ assert_eq ! ( vortex_array. validity_mask( ) , Mask :: from_indices( 2 , vec![ 0 ] ) ) ;
720+ }
721+
722+ #[ test]
723+ fn test_list_out_of_order ( ) {
724+ // Regression test: list views can be out of order in DuckDB. The child vector length
725+ // must be computed as the maximum end offset, not just the last entry's end offset.
726+ let child_values = vec ! [ 1i32 , 2 , 3 , 4 ] ;
727+ let len = 2 ;
728+
729+ let logical_type =
730+ LogicalType :: list_type ( LogicalType :: new ( DUCKDB_TYPE :: DUCKDB_TYPE_INTEGER ) )
731+ . vortex_unwrap ( ) ;
732+ let mut vector = Vector :: with_capacity ( logical_type, len) ;
733+
734+ // Populate with out-of-order list entries:
735+ // - Entry 0: offset=2, length=2 -> elements [3, 4] (end=4)
736+ // - Entry 1: offset=0, length=2 -> elements [1, 2] (end=2)
737+ unsafe {
738+ let entries = vector. as_slice_mut :: < duckdb_list_entry > ( len) ;
739+ entries[ 0 ] = duckdb_list_entry {
740+ offset : 2 ,
741+ length : 2 ,
742+ } ;
743+ entries[ 1 ] = duckdb_list_entry {
744+ offset : 0 ,
745+ length : 2 ,
746+ } ;
747+ let mut child = vector. list_vector_get_child ( ) ;
748+ let slice = child. as_slice_mut :: < i32 > ( child_values. len ( ) ) ;
749+ slice. copy_from_slice ( & child_values) ;
750+ }
751+
752+ // Test conversion - the old bug would compute child length as 0+2=2 instead of
753+ // max(4,2)=4.
754+ let result = flat_vector_to_vortex ( & mut vector, len) . unwrap ( ) ;
755+ let vortex_array = result. to_listview ( ) ;
756+
757+ assert_eq ! ( vortex_array. len( ) , len) ;
758+ assert_eq ! (
759+ vortex_array
760+ . list_elements_at( 0 )
761+ . to_primitive( )
762+ . as_slice:: <i32 >( ) ,
763+ & [ 3 , 4 ]
764+ ) ;
765+ assert_eq ! (
766+ vortex_array
767+ . list_elements_at( 1 )
768+ . to_primitive( )
769+ . as_slice:: <i32 >( ) ,
770+ & [ 1 , 2 ]
771+ ) ;
772+ }
654773}
0 commit comments