22// SPDX-FileCopyrightText: Copyright the Vortex contributors
33
44use vortex_buffer:: { BitBuffer , BitBufferMut , get_bit} ;
5- use vortex_mask:: { Mask , MaskIter } ;
5+ use vortex_mask:: Mask ;
66
77use crate :: filter:: { Filter , MaskIndices } ;
88
9- /// If the filter density is above 80%, we use slices to filter the array instead of indices.
10- // TODO(ngates): we need more experimentation to determine the best threshold here.
11- const FILTER_SLICES_DENSITY_THRESHOLD : f64 = 0.8 ;
12-
139impl Filter < Mask > for & BitBuffer {
1410 type Output = BitBuffer ;
1511
@@ -23,12 +19,29 @@ impl Filter<Mask> for &BitBuffer {
2319 match selection_mask {
2420 Mask :: AllTrue ( _) => self . clone ( ) ,
2521 Mask :: AllFalse ( _) => BitBuffer :: empty ( ) ,
26- Mask :: Values ( v) => match v. threshold_iter ( FILTER_SLICES_DENSITY_THRESHOLD ) {
27- MaskIter :: Indices ( indices) => filter_indices ( self , indices) ,
28- MaskIter :: Slices ( slices) => {
29- filter_slices ( self , selection_mask. true_count ( ) , slices)
30- }
31- } ,
22+ Mask :: Values ( v) => {
23+ filter_indices ( self . inner ( ) . as_ref ( ) , self . offset ( ) , v. indices ( ) ) . freeze ( )
24+ }
25+ }
26+ }
27+ }
28+
29+ impl Filter < Mask > for & mut BitBufferMut {
30+ type Output = ( ) ;
31+
32+ fn filter ( self , selection_mask : & Mask ) {
33+ assert_eq ! (
34+ selection_mask. len( ) ,
35+ self . len( ) ,
36+ "Selection mask length must equal the mask length"
37+ ) ;
38+
39+ match selection_mask {
40+ Mask :: AllTrue ( _) => { }
41+ Mask :: AllFalse ( _) => self . clear ( ) ,
42+ Mask :: Values ( v) => {
43+ * self = filter_indices ( self . inner ( ) . as_slice ( ) , self . offset ( ) , v. indices ( ) )
44+ }
3245 }
3346 }
3447}
@@ -37,25 +50,24 @@ impl Filter<MaskIndices<'_>> for &BitBuffer {
3750 type Output = BitBuffer ;
3851
3952 fn filter ( self , indices : & MaskIndices ) -> BitBuffer {
40- filter_indices ( self , indices)
53+ filter_indices ( self . inner ( ) . as_ref ( ) , self . offset ( ) , indices) . freeze ( )
4154 }
4255}
4356
44- fn filter_indices ( bools : & BitBuffer , indices : & [ usize ] ) -> BitBuffer {
45- let buffer = bools. inner ( ) . as_ref ( ) ;
46- BitBuffer :: collect_bool ( indices. len ( ) , |idx| {
47- let idx = * unsafe { indices. get_unchecked ( idx) } ;
48- get_bit ( buffer, bools. offset ( ) + idx)
49- } )
50- }
57+ impl Filter < MaskIndices < ' _ > > for & mut BitBufferMut {
58+ type Output = ( ) ;
5159
52- fn filter_slices ( buffer : & BitBuffer , output_len : usize , slices : & [ ( usize , usize ) ] ) -> BitBuffer {
53- let mut builder = BitBufferMut :: with_capacity ( output_len) ;
54- for ( start, end) in slices {
55- // TODO(ngates): we probably want a borrowed slice for things like this.
56- builder. append_buffer ( & buffer. slice ( * start..* end) ) ;
60+ fn filter ( self , indices : & MaskIndices ) {
61+ * self = filter_indices ( self . inner ( ) . as_ref ( ) , self . offset ( ) , indices)
5762 }
58- builder. freeze ( )
63+ }
64+
65+ fn filter_indices ( bools : & [ u8 ] , bit_offset : usize , indices : & [ usize ] ) -> BitBufferMut {
66+ // FIXME(ngates): this is slower than it could be!
67+ BitBufferMut :: collect_bool ( indices. len ( ) , |idx| {
68+ let idx = * unsafe { indices. get_unchecked ( idx) } ;
69+ get_bit ( bools, bit_offset + idx)
70+ } )
5971}
6072
6173#[ cfg( test) ]
@@ -64,20 +76,10 @@ mod test {
6476
6577 use super :: * ;
6678
67- #[ test]
68- fn filter_bool_by_slice_test ( ) {
69- let bits = bitbuffer ! [ 1 1 0 ] ;
70-
71- let filtered = filter_slices ( & bits, 2 , & [ ( 0 , 1 ) , ( 2 , 3 ) ] ) ;
72- assert_eq ! ( 2 , filtered. len( ) ) ;
73-
74- assert_eq ! ( filtered, bitbuffer![ 1 0 ] )
75- }
76-
7779 #[ test]
7880 fn filter_bool_by_index_test ( ) {
7981 let buf = bitbuffer ! [ 1 1 0 ] ;
80- let filtered = filter_indices ( & buf, & [ 0 , 2 ] ) ;
82+ let filtered = filter_indices ( buf. inner ( ) . as_ref ( ) , 0 , & [ 0 , 2 ] ) . freeze ( ) ;
8183 assert_eq ! ( 2 , filtered. len( ) ) ;
8284 assert_eq ! ( filtered, bitbuffer![ 1 0 ] )
8385 }
0 commit comments