22// SPDX-FileCopyrightText: Copyright the Vortex contributors
33
44use std:: mem:: MaybeUninit ;
5+ use std:: sync:: Arc ;
56
67use fastlanes:: BitPacking ;
78use vortex_array:: ExecutionCtx ;
8- use vortex_array:: IntoArray ;
9- use vortex_array:: VectorExecutor ;
109use vortex_array:: arrays:: FilterArray ;
1110use vortex_array:: arrays:: FilterVTable ;
1211use vortex_array:: kernel:: ExecuteParentKernel ;
12+ use vortex_array:: kernel:: ParentKernelSet ;
1313use vortex_array:: matchers:: Exact ;
14- use vortex_array:: patches:: patch_pvector;
15- use vortex_buffer:: Buffer ;
1614use vortex_buffer:: BufferMut ;
1715use vortex_compute:: filter:: Filter ;
1816use vortex_dtype:: NativePType ;
1917use vortex_dtype:: PType ;
2018use vortex_dtype:: UnsignedPType ;
2119use vortex_dtype:: match_each_integer_ptype;
22- use vortex_error:: VortexExpect ;
2320use vortex_error:: VortexResult ;
2421use vortex_mask:: Mask ;
22+ use vortex_mask:: MaskValues ;
2523use vortex_vector:: Vector ;
26- use vortex_vector:: VectorMut ;
2724use vortex_vector:: VectorMutOps ;
2825use vortex_vector:: primitive:: PVector ;
26+ use vortex_vector:: primitive:: PVectorMut ;
2927use vortex_vector:: primitive:: PrimitiveVector ;
3028
3129use crate :: BitPackedArray ;
3230use crate :: BitPackedVTable ;
33- use crate :: bitpacking:: kernels:: UNPACK_CHUNK_THRESHOLD ;
34- use crate :: bitpacking:: kernels:: chunked_indices;
31+ use crate :: bitpacking:: vtable:: kernels:: UNPACK_CHUNK_THRESHOLD ;
32+ use crate :: bitpacking:: vtable:: kernels:: chunked_indices;
33+
34+ pub ( crate ) const PARENT_KERNELS : ParentKernelSet < BitPackedVTable > =
35+ ParentKernelSet :: new ( & [ ParentKernelSet :: lift ( & BitPackingFilterKernel ) ] ) ;
3536
3637/// The threshold over which it is faster to fully unpack the entire [`BitPackedArray`] and then
3738/// filter the result than to unpack only specific bitpacked values into the output buffer.
@@ -48,6 +49,7 @@ pub const fn unpack_then_filter_threshold<T>() -> f64 {
4849 }
4950}
5051
52+ /// Kernel to execute filtering directly on a bit-packed array.
5153#[ derive( Debug ) ]
5254struct BitPackingFilterKernel ;
5355
@@ -63,50 +65,47 @@ impl ExecuteParentKernel<BitPackedVTable> for BitPackingFilterKernel {
6365 array : & BitPackedArray ,
6466 parent : & FilterArray ,
6567 _child_idx : usize ,
66- ctx : & mut ExecutionCtx ,
68+ _ctx : & mut ExecutionCtx ,
6769 ) -> VortexResult < Option < Vector > > {
68- let selection = parent. filter_mask ( ) ;
69-
70- let true_count = selection. true_count ( ) ;
71- if true_count == 0 {
72- // Fast-path for an empty mask.
73- return Ok ( Some ( VectorMut :: with_capacity ( array. dtype ( ) , 0 ) . freeze ( ) ) ) ;
74- } else if true_count == selection. len ( ) {
75- // Fast-path for a full mask.
76- return Ok ( Some ( array. to_array ( ) . execute ( ctx) ?) ) ;
77- }
70+ let values = match parent. filter_mask ( ) {
71+ Mask :: AllTrue ( _) | Mask :: AllFalse ( _) => {
72+ // No optimization for full or empty mask
73+ return Ok ( None ) ;
74+ }
75+ Mask :: Values ( values) => values,
76+ } ;
7877
7978 match_each_integer_ptype ! ( array. ptype( ) , |I | {
8079 // If the density is high enough, then we would rather decompress the whole array and then apply
8180 // a filter over decompressing values one by one.
82- if selection . density( ) > unpack_then_filter_threshold:: <I >( ) {
81+ if values . density( ) > unpack_then_filter_threshold:: <I >( ) {
8382 return Ok ( None ) ;
8483 }
8584 } ) ;
8685
8786 let primitive_vector: PrimitiveVector = match array. ptype ( ) {
88- PType :: U8 => filter_primitive :: < u8 > ( array, selection ) ?. into ( ) ,
89- PType :: U16 => filter_primitive :: < u16 > ( array, selection ) ?. into ( ) ,
90- PType :: U32 => filter_primitive :: < u32 > ( array, selection ) ?. into ( ) ,
91- PType :: U64 => filter_primitive :: < u64 > ( array, selection ) ?. into ( ) ,
87+ PType :: U8 => filter_primitive :: < u8 > ( array, values ) ?. into ( ) ,
88+ PType :: U16 => filter_primitive :: < u16 > ( array, values ) ?. into ( ) ,
89+ PType :: U32 => filter_primitive :: < u32 > ( array, values ) ?. into ( ) ,
90+ PType :: U64 => filter_primitive :: < u64 > ( array, values ) ?. into ( ) ,
9291
9392 // Since the fastlanes crate only supports unsigned integers, and since we know that all
9493 // numbers are going to be non-negative, we can safely "cast" to unsigned and back.
9594 PType :: I8 => {
96- let pvector = filter_primitive :: < u8 > ( array, selection ) ?;
97- pvector. cast_into :: < i8 > ( ) . into ( )
95+ let pvector = filter_primitive :: < u8 > ( array, values ) ?;
96+ unsafe { pvector. transmute :: < i8 > ( ) } . into ( )
9897 }
9998 PType :: I16 => {
100- let pvector = filter_primitive :: < u16 > ( array, selection ) ?;
101- pvector. cast_into :: < i16 > ( ) . into ( )
99+ let pvector = filter_primitive :: < u16 > ( array, values ) ?;
100+ unsafe { pvector. transmute :: < i16 > ( ) } . into ( )
102101 }
103102 PType :: I32 => {
104- let pvector = filter_primitive :: < u32 > ( array, selection ) ?;
105- pvector. cast_into :: < i32 > ( ) . into ( )
103+ let pvector = filter_primitive :: < u32 > ( array, values ) ?;
104+ unsafe { pvector. transmute :: < i32 > ( ) } . into ( )
106105 }
107106 PType :: I64 => {
108- let pvector = filter_primitive :: < u64 > ( array, selection ) ?;
109- pvector. cast_into :: < i64 > ( ) . into ( )
107+ let pvector = filter_primitive :: < u64 > ( array, values ) ?;
108+ unsafe { pvector. transmute :: < i64 > ( ) } . into ( )
110109 }
111110 other => {
112111 unreachable ! ( "Unsupported ptype {other} for bitpacking, we also checked this above" )
@@ -128,42 +127,39 @@ impl ExecuteParentKernel<BitPackedVTable> for BitPackingFilterKernel {
128127/// elements is relatively slow.
129128fn filter_primitive < U : UnsignedPType + BitPacking > (
130129 array : & BitPackedArray ,
131- selection : & Mask ,
130+ selection : & Arc < MaskValues > ,
132131) -> VortexResult < PVector < U > > {
133- let values = filter_with_indices (
134- array,
135- selection
136- . values ( )
137- . vortex_expect ( "AllTrue and AllFalse handled by filter fn" )
138- . indices ( ) ,
139- ) ;
140- let validity = array. validity_mask ( ) . filter ( selection) ;
132+ let values = filter_with_indices ( array, selection. indices ( ) ) ;
133+ let validity = array
134+ . validity_mask ( )
135+ . filter ( & Mask :: Values ( selection. clone ( ) ) )
136+ . into_mut ( ) ;
141137
142138 debug_assert_eq ! (
143139 values. len( ) ,
144140 validity. len( ) ,
145141 "`filter_with_indices` was somehow incorrect"
146142 ) ;
147143
148- let mut pvector = unsafe { PVector :: new_unchecked ( values, validity) } ;
144+ let mut pvector = unsafe { PVectorMut :: new_unchecked ( values, validity) } ;
149145
150146 // TODO(connor): We want a `PatchesArray` or patching compute functions instead of this.
151147 let patches = array
152148 . patches ( )
153- . map ( |patches| patches. filter ( selection) )
149+ . map ( |patches| patches. filter ( & Mask :: Values ( selection. clone ( ) ) ) )
154150 . transpose ( ) ?
155151 . flatten ( ) ;
156152 if let Some ( patches) = patches {
157- pvector = patch_pvector ( pvector, & patches ) ;
153+ pvector = patches . apply_to_pvector ( pvector) ;
158154 }
159155
160- Ok ( pvector)
156+ Ok ( pvector. freeze ( ) )
161157}
162158
163159fn filter_with_indices < T : NativePType + BitPacking > (
164160 array : & BitPackedArray ,
165161 indices : & [ usize ] ,
166- ) -> Buffer < T > {
162+ ) -> BufferMut < T > {
167163 let offset = array. offset ( ) as usize ;
168164 let bit_width = array. bit_width ( ) as usize ;
169165 let mut values = BufferMut :: with_capacity ( indices. len ( ) ) ;
@@ -209,5 +205,5 @@ fn filter_with_indices<T: NativePType + BitPacking>(
209205 }
210206 } ) ;
211207
212- values. freeze ( )
208+ values
213209}
0 commit comments