11// SPDX-License-Identifier: Apache-2.0
22// SPDX-FileCopyrightText: Copyright the Vortex contributors
33
4+ use std:: sync:: Arc ;
5+
46use vortex_buffer:: BitBufferMut ;
57use vortex_buffer:: BufferMut ;
68use vortex_dtype:: IntegerPType ;
@@ -9,19 +11,18 @@ use vortex_error::VortexExpect;
911use vortex_error:: VortexResult ;
1012use vortex_mask:: Mask ;
1113use vortex_mask:: MaskIter ;
14+ use vortex_mask:: MaskValues ;
1215
1316use crate :: Array ;
1417use crate :: ArrayRef ;
1518use crate :: IntoArray ;
1619use crate :: ToCanonical ;
1720use crate :: arrays:: ListArray ;
1821use crate :: arrays:: ListVTable ;
19- use crate :: arrays:: PrimitiveArray ;
2022use crate :: compute:: FilterKernel ;
2123use crate :: compute:: FilterKernelAdapter ;
2224use crate :: compute:: filter;
2325use crate :: register_kernel;
24- use crate :: validity:: Validity ;
2526use crate :: vtable:: ValidityHelper ;
2627
2728/// Density threshold for choosing between indices and slices representation when expanding masks.
@@ -44,21 +45,22 @@ impl FilterKernel for ListVTable {
4445 ) ;
4546
4647 let ( new_elements, new_offsets) = match_each_integer_ptype ! ( offsets. ptype( ) , |O | {
47- compute_filtered_elements_and_offsets:: <O >(
48+ let ( new_elements , new_offsets ) = compute_filtered_elements_and_offsets:: <O >(
4849 elements. as_ref( ) ,
4950 offsets. as_slice:: <O >( ) ,
5051 selection_mask,
51- ) ?
52+ ) ?;
53+ ( new_elements, new_offsets. into_array( ) )
5254 } ) ;
5355
5456 // SAFETY: Filter operation maintains all ListArray invariants:
5557 // - Offsets are monotonically increasing (built correctly above).
5658 // - Elements are properly filtered to match the offsets.
5759 // - Validity matches the original array's nullability.
58- Ok ( unsafe {
59- ListArray :: new_unchecked ( new_elements, new_offsets. into_array ( ) , new_validity)
60- }
61- . into_array ( ) )
60+ Ok (
61+ unsafe { ListArray :: new_unchecked ( new_elements, new_offsets, new_validity) }
62+ . into_array ( ) ,
63+ )
6264 }
6365}
6466
@@ -74,7 +76,7 @@ fn compute_filtered_elements_and_offsets<O: IntegerPType>(
7476 elements : & dyn Array ,
7577 offsets : & [ O ] ,
7678 selection_mask : & Mask ,
77- ) -> VortexResult < ( ArrayRef , PrimitiveArray ) > {
79+ ) -> VortexResult < ( ArrayRef , BufferMut < O > ) > {
7880 let values = selection_mask
7981 . values ( )
8082 . vortex_expect ( "`AllTrue` and `AllFalse` are handled by filter entry point" ) ;
@@ -132,11 +134,50 @@ fn compute_filtered_elements_and_offsets<O: IntegerPType>(
132134 // The `Mask` can determine the best representation based on the buffer's density in the future.
133135 let new_elements = filter ( elements, & Mask :: from_buffer ( new_mask_builder. freeze ( ) ) ) ?;
134136
135- let new_offsets = PrimitiveArray :: new ( new_offsets, Validity :: NonNullable ) ;
136-
137137 Ok ( ( new_elements, new_offsets) )
138138}
139139
140+ /// Construct an element mask from contiguous list offsets and a selection mask.
141+ pub fn element_mask_from_offsets < O : IntegerPType > (
142+ offsets : & [ O ] ,
143+ selection : & Arc < MaskValues > ,
144+ ) -> Mask {
145+ let first_offset = offsets. first ( ) . map_or ( 0 , |first_offset| first_offset. as_ ( ) ) ;
146+ let last_offset = offsets. last ( ) . map_or ( 0 , |last_offset| last_offset. as_ ( ) ) ;
147+ let len = last_offset - first_offset;
148+
149+ let mut mask_builder = BitBufferMut :: with_capacity ( len) ;
150+
151+ match selection. threshold_iter ( MASK_EXPANSION_DENSITY_THRESHOLD ) {
152+ MaskIter :: Slices ( slices) => {
153+ // Dense iteration: process ranges of consecutive selected lists.
154+ for & ( start, end) in slices {
155+ // Optimization: for dense ranges, we can process the elements mask more efficiently.
156+ let elems_start = offsets[ start] . as_ ( ) - first_offset;
157+ let elems_end = offsets[ end] . as_ ( ) - first_offset;
158+
159+ // Process the entire range of elements at once.
160+ process_element_range ( elems_start, elems_end, & mut mask_builder) ;
161+ }
162+ }
163+ MaskIter :: Indices ( indices) => {
164+ // Sparse iteration: process individual selected lists.
165+ for & idx in indices {
166+ let list_start = offsets[ idx] . as_ ( ) - first_offset;
167+ let list_end = offsets[ idx + 1 ] . as_ ( ) - first_offset;
168+
169+ // Process the elements for this list.
170+ process_element_range ( list_start, list_end, & mut mask_builder) ;
171+ }
172+ }
173+ }
174+
175+ // Pad to full length if necessary.
176+ mask_builder. append_n ( false , last_offset - mask_builder. len ( ) ) ;
177+
178+ Mask :: from_buffer ( mask_builder. freeze ( ) )
179+ }
180+
140181/// Process a range of elements for filtering.
141182fn process_element_range (
142183 elems_start : usize ,
0 commit comments