1- use arrow_buffer:: { BooleanBuffer , BooleanBufferBuilder } ;
2- use vortex_error:: VortexResult ;
1+ use arrow_buffer:: { bit_util , BooleanBuffer , BooleanBufferBuilder } ;
2+ use vortex_error:: { VortexExpect , VortexResult } ;
33
44use crate :: array:: BoolArray ;
5- use crate :: compute:: { FilterFn , FilterMask } ;
5+ use crate :: compute:: { FilterFn , FilterIter , FilterMask } ;
66use crate :: { ArrayData , IntoArrayData } ;
77
88impl FilterFn for BoolArray {
9- fn filter ( & self , mask : & FilterMask ) -> VortexResult < ArrayData > {
10- filter_select_bool ( self , mask) . map ( |a| a. into_array ( ) )
9+ fn filter ( & self , mask : FilterMask ) -> VortexResult < ArrayData > {
10+ let validity = self . validity ( ) . filter ( & mask) ?;
11+
12+ let buffer = match mask. iter ( ) ? {
13+ FilterIter :: Indices ( indices) => filter_indices_slice ( & self . boolean_buffer ( ) , indices) ,
14+ FilterIter :: IndicesIter ( iter) => {
15+ filter_indices ( & self . boolean_buffer ( ) , mask. true_count ( ) , iter)
16+ }
17+ FilterIter :: Slices ( slices) => filter_slices (
18+ & self . boolean_buffer ( ) ,
19+ mask. true_count ( ) ,
20+ slices. iter ( ) . copied ( ) ,
21+ ) ,
22+ FilterIter :: SlicesIter ( iter) => {
23+ filter_slices ( & self . boolean_buffer ( ) , mask. true_count ( ) , iter)
24+ }
25+ } ;
26+
27+ Ok ( Self :: try_new ( buffer, validity) ?. into_array ( ) )
1128 }
1229}
1330
14- fn filter_select_bool ( arr : & BoolArray , mask : & FilterMask ) -> VortexResult < BoolArray > {
15- let validity = arr. validity ( ) . filter ( mask) ?;
16-
17- let selection_count = mask. true_count ( ) ;
18- let out = if selection_count * 2 > arr. len ( ) {
19- filter_select_bool_by_slice ( & arr. boolean_buffer ( ) , mask, selection_count)
20- } else {
21- filter_select_bool_by_index ( & arr. boolean_buffer ( ) , mask, selection_count)
22- } ;
23- BoolArray :: try_new ( out?, validity)
31+ /// Select indices from a boolean buffer.
32+ /// NOTE: it was benchmarked to be faster using collect_bool to index into a slice than to
33+ /// pass the indices as an iterator of usize. So we keep this alternate implementation.
34+ fn filter_indices_slice ( buffer : & BooleanBuffer , indices : & [ usize ] ) -> BooleanBuffer {
35+ let src = buffer. values ( ) . as_ptr ( ) ;
36+ let offset = buffer. offset ( ) ;
37+ BooleanBuffer :: collect_bool ( indices. len ( ) , |idx| unsafe {
38+ bit_util:: get_bit_raw ( src, * indices. get_unchecked ( idx) + offset)
39+ } )
2440}
2541
26- fn filter_select_bool_by_slice (
27- values : & BooleanBuffer ,
28- mask : & FilterMask ,
29- selection_count : usize ,
30- ) -> VortexResult < BooleanBuffer > {
31- let mut out_buf = BooleanBufferBuilder :: new ( selection_count) ;
32- mask. iter_slices ( ) ?. for_each ( |( start, end) | {
33- out_buf. append_buffer ( & values. slice ( start, end - start) ) ;
34- } ) ;
35- Ok ( out_buf. finish ( ) )
42+ pub fn filter_indices (
43+ buffer : & BooleanBuffer ,
44+ indices_len : usize ,
45+ mut indices : impl Iterator < Item = usize > ,
46+ ) -> BooleanBuffer {
47+ let src = buffer. values ( ) . as_ptr ( ) ;
48+ let offset = buffer. offset ( ) ;
49+
50+ BooleanBuffer :: collect_bool ( indices_len, |_idx| {
51+ let idx = indices
52+ . next ( )
53+ . vortex_expect ( "iterator is guaranteed to be within the length of the array." ) ;
54+ unsafe { bit_util:: get_bit_raw ( src, idx + offset) }
55+ } )
3656}
3757
38- fn filter_select_bool_by_index (
39- values : & BooleanBuffer ,
40- mask : & FilterMask ,
41- selection_count : usize ,
42- ) -> VortexResult < BooleanBuffer > {
43- let mut out_buf = BooleanBufferBuilder :: new ( selection_count) ;
44- mask. iter_indices ( ) ?
45- . for_each ( |idx| out_buf. append ( values. value ( idx) ) ) ;
46- Ok ( out_buf. finish ( ) )
58+ pub fn filter_slices (
59+ buffer : & BooleanBuffer ,
60+ indices_len : usize ,
61+ slices : impl Iterator < Item = ( usize , usize ) > ,
62+ ) -> BooleanBuffer {
63+ let src = buffer. values ( ) ;
64+ let offset = buffer. offset ( ) ;
65+
66+ let mut builder = BooleanBufferBuilder :: new ( indices_len) ;
67+ for ( start, end) in slices {
68+ builder. append_packed_range ( start + offset..end + offset, src)
69+ }
70+ builder. into ( )
4771}
4872
4973#[ cfg( test) ]
5074mod test {
5175 use itertools:: Itertools ;
5276
53- use crate :: array:: bool:: compute:: filter:: {
54- filter_select_bool, filter_select_bool_by_index, filter_select_bool_by_slice,
55- } ;
77+ use crate :: array:: bool:: compute:: filter:: { filter_indices, filter_slices} ;
5678 use crate :: array:: BoolArray ;
57- use crate :: compute:: FilterMask ;
79+ use crate :: compute:: { filter, FilterMask } ;
80+ use crate :: { IntoArrayData , IntoArrayVariant } ;
5881
5982 #[ test]
6083 fn filter_bool_test ( ) {
6184 let arr = BoolArray :: from_iter ( [ true , true , false ] ) ;
6285 let mask = FilterMask :: from_iter ( [ true , false , true ] ) ;
6386
64- let filtered = filter_select_bool ( & arr, & mask) . unwrap ( ) ;
87+ let filtered = filter ( & arr. into_array ( ) , mask)
88+ . unwrap ( )
89+ . into_bool ( )
90+ . unwrap ( ) ;
6591 assert_eq ! ( 2 , filtered. len( ) ) ;
6692
6793 assert_eq ! (
@@ -73,9 +99,8 @@ mod test {
7399 #[ test]
74100 fn filter_bool_by_slice_test ( ) {
75101 let arr = BoolArray :: from_iter ( [ true , true , false ] ) ;
76- let mask = FilterMask :: from_iter ( [ true , false , true ] ) ;
77102
78- let filtered = filter_select_bool_by_slice ( & arr. boolean_buffer ( ) , & mask , 2 ) . unwrap ( ) ;
103+ let filtered = filter_slices ( & arr. boolean_buffer ( ) , 2 , [ ( 0 , 1 ) , ( 2 , 3 ) ] . into_iter ( ) ) ;
79104 assert_eq ! ( 2 , filtered. len( ) ) ;
80105
81106 assert_eq ! ( vec![ true , false ] , filtered. iter( ) . collect_vec( ) )
@@ -84,9 +109,8 @@ mod test {
84109 #[ test]
85110 fn filter_bool_by_index_test ( ) {
86111 let arr = BoolArray :: from_iter ( [ true , true , false ] ) ;
87- let mask = FilterMask :: from_iter ( [ true , false , true ] ) ;
88112
89- let filtered = filter_select_bool_by_index ( & arr. boolean_buffer ( ) , & mask , 2 ) . unwrap ( ) ;
113+ let filtered = filter_indices ( & arr. boolean_buffer ( ) , 2 , [ 0 , 2 ] . into_iter ( ) ) ;
90114 assert_eq ! ( 2 , filtered. len( ) ) ;
91115
92116 assert_eq ! ( vec![ true , false ] , filtered. iter( ) . collect_vec( ) )
0 commit comments