Skip to content

Commit dbbfd56

Browse files
authored
Filter mask (#1327)
Add a memoized filter mask. FLUPs: - [ ] Swap to a single `iter()` function that returns an Enum of slices or indices based on selectivity. This forces all consumers to support both.
1 parent f952450 commit dbbfd56

File tree

34 files changed

+403
-484
lines changed

34 files changed

+403
-484
lines changed

encodings/alp/src/alp/compute.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
use vortex_array::array::ConstantArray;
22
use vortex_array::compute::unary::{scalar_at, scalar_at_unchecked, ScalarAtFn};
33
use vortex_array::compute::{
4-
compare, filter, slice, take, ArrayCompute, FilterFn, MaybeCompareFn, Operator, SliceFn,
5-
TakeFn, TakeOptions,
4+
compare, filter, slice, take, ArrayCompute, FilterFn, FilterMask, MaybeCompareFn, Operator,
5+
SliceFn, TakeFn, TakeOptions,
66
};
77
use vortex_array::stats::{ArrayStatistics, Stat};
88
use vortex_array::variants::PrimitiveArrayTrait;
@@ -86,11 +86,11 @@ impl SliceFn for ALPArray {
8686
}
8787

8888
impl FilterFn for ALPArray {
89-
fn filter(&self, predicate: &ArrayData) -> VortexResult<ArrayData> {
89+
fn filter(&self, mask: &FilterMask) -> VortexResult<ArrayData> {
9090
Ok(Self::try_new(
91-
filter(self.encoded(), predicate)?,
91+
filter(&self.encoded(), mask)?,
9292
self.exponents(),
93-
self.patches().map(|p| filter(&p, predicate)).transpose()?,
93+
self.patches().map(|p| filter(&p, mask)).transpose()?,
9494
)?
9595
.into_array())
9696
}
Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
1-
use vortex_array::compute::{filter, FilterFn};
1+
use vortex_array::compute::{filter, FilterFn, FilterMask};
22
use vortex_array::{ArrayDType, ArrayData, IntoArrayData};
33
use vortex_error::VortexResult;
44

55
use crate::ALPRDArray;
66

77
impl FilterFn for ALPRDArray {
8-
fn filter(&self, predicate: &ArrayData) -> VortexResult<ArrayData> {
8+
fn filter(&self, mask: &FilterMask) -> VortexResult<ArrayData> {
99
let left_parts_exceptions = self
1010
.left_parts_exceptions()
11-
.map(|array| filter(&array, predicate))
11+
.map(|array| filter(&array, mask))
1212
.transpose()?;
1313

1414
Ok(ALPRDArray::try_new(
1515
self.dtype().clone(),
16-
filter(self.left_parts(), predicate)?,
16+
filter(&self.left_parts(), mask)?,
1717
self.left_parts_dict(),
18-
filter(self.right_parts(), predicate)?,
18+
filter(&self.right_parts(), mask)?,
1919
self.right_bit_width(),
2020
left_parts_exceptions,
2121
)?
@@ -26,8 +26,8 @@ impl FilterFn for ALPRDArray {
2626
#[cfg(test)]
2727
mod test {
2828
use rstest::rstest;
29-
use vortex_array::array::{BoolArray, PrimitiveArray};
30-
use vortex_array::compute::filter;
29+
use vortex_array::array::PrimitiveArray;
30+
use vortex_array::compute::{filter, FilterMask};
3131
use vortex_array::IntoArrayVariant;
3232

3333
use crate::{ALPRDFloat, RDEncoder};
@@ -43,10 +43,13 @@ mod test {
4343
assert!(encoded.left_parts_exceptions().is_some());
4444

4545
// The first two values need no patching
46-
let filtered = filter(encoded.as_ref(), BoolArray::from_iter([true, false, true]))
47-
.unwrap()
48-
.into_primitive()
49-
.unwrap();
46+
let filtered = filter(
47+
encoded.as_ref(),
48+
&FilterMask::from_iter([true, false, true]),
49+
)
50+
.unwrap()
51+
.into_primitive()
52+
.unwrap();
5053
assert_eq!(filtered.maybe_null_slice::<T>(), &[a, outlier]);
5154
}
5255
}

encodings/bytebool/src/array.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -99,14 +99,6 @@ impl BoolArrayTrait for ByteBoolArray {
9999
)
100100
.map(|a| a.into_array())
101101
}
102-
103-
fn maybe_null_indices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = usize> + 'a> {
104-
todo!()
105-
}
106-
107-
fn maybe_null_slices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = (usize, usize)> + 'a> {
108-
todo!()
109-
}
110102
}
111103

112104
impl From<Vec<bool>> for ByteBoolArray {

encodings/dict/src/compute.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use vortex_array::compute::unary::{scalar_at, scalar_at_unchecked, ScalarAtFn};
22
use vortex_array::compute::{
3-
compare, filter, slice, take, ArrayCompute, FilterFn, MaybeCompareFn, Operator, SliceFn,
4-
TakeFn, TakeOptions,
3+
compare, filter, slice, take, ArrayCompute, FilterFn, FilterMask, MaybeCompareFn, Operator,
4+
SliceFn, TakeFn, TakeOptions,
55
};
66
use vortex_array::stats::{ArrayStatistics, Stat};
77
use vortex_array::{ArrayData, IntoArrayData};
@@ -86,8 +86,8 @@ impl TakeFn for DictArray {
8686
}
8787

8888
impl FilterFn for DictArray {
89-
fn filter(&self, predicate: &ArrayData) -> VortexResult<ArrayData> {
90-
let codes = filter(self.codes(), predicate)?;
89+
fn filter(&self, mask: &FilterMask) -> VortexResult<ArrayData> {
90+
let codes = filter(&self.codes(), mask)?;
9191
Self::try_new(codes, self.values()).map(|a| a.into_array())
9292
}
9393
}

encodings/dict/src/variants.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,6 @@ impl BoolArrayTrait for DictArray {
2929
fn invert(&self) -> VortexResult<ArrayData> {
3030
todo!()
3131
}
32-
33-
fn maybe_null_indices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = usize> + 'a> {
34-
todo!()
35-
}
36-
37-
fn maybe_null_slices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = (usize, usize)> + 'a> {
38-
todo!()
39-
}
4032
}
4133

4234
impl PrimitiveArrayTrait for DictArray {}
Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
use vortex_array::compute::unary::ScalarAtFn;
2-
use vortex_array::compute::{filter, ArrayCompute, FilterFn, SearchSortedFn, SliceFn, TakeFn};
3-
use vortex_array::stats::ArrayStatistics;
4-
use vortex_array::{ArrayData, IntoCanonical};
5-
use vortex_error::{vortex_err, VortexResult};
2+
use vortex_array::compute::{ArrayCompute, SearchSortedFn, SliceFn, TakeFn};
63

74
use crate::BitPackedArray;
85

@@ -12,10 +9,6 @@ mod slice;
129
mod take;
1310

1411
impl ArrayCompute for BitPackedArray {
15-
fn filter(&self) -> Option<&dyn FilterFn> {
16-
Some(self)
17-
}
18-
1912
fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
2013
Some(self)
2114
}
@@ -32,14 +25,3 @@ impl ArrayCompute for BitPackedArray {
3225
Some(self)
3326
}
3427
}
35-
36-
impl FilterFn for BitPackedArray {
37-
fn filter(&self, predicate: &ArrayData) -> VortexResult<ArrayData> {
38-
let _predicate_true_count = predicate
39-
.statistics()
40-
.compute_true_count()
41-
.ok_or_else(|| vortex_err!("Cannot compute true count of predicate"))?;
42-
43-
filter(self.clone().into_canonical()?.as_ref(), predicate)
44-
}
45-
}

encodings/fastlanes/src/bitpacking/compute/search_sorted.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,11 @@ impl<'a, T: BitPacking + NativePType> BitPackedSearch<'a, T> {
147147
Validity::AllInvalid => 0,
148148
Validity::Array(varray) => {
149149
// In sorted order, nulls come after all the non-null values.
150-
varray.with_dyn(|a| a.as_bool_array_unchecked().true_count())
150+
varray.with_dyn(|a| {
151+
a.statistics()
152+
.compute_true_count()
153+
.vortex_expect("Failed to compute true count")
154+
})
151155
}
152156
};
153157

encodings/fastlanes/src/for/compute.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ use std::ops::{AddAssign, Shl, Shr};
33
use num_traits::{WrappingAdd, WrappingSub};
44
use vortex_array::compute::unary::{scalar_at_unchecked, ScalarAtFn};
55
use vortex_array::compute::{
6-
filter, search_sorted, slice, take, ArrayCompute, FilterFn, SearchResult, SearchSortedFn,
7-
SearchSortedSide, SliceFn, TakeFn, TakeOptions,
6+
filter, search_sorted, slice, take, ArrayCompute, FilterFn, FilterMask, SearchResult,
7+
SearchSortedFn, SearchSortedSide, SliceFn, TakeFn, TakeOptions,
88
};
99
use vortex_array::variants::PrimitiveArrayTrait;
1010
use vortex_array::{ArrayDType, ArrayData, IntoArrayData};
@@ -48,9 +48,9 @@ impl TakeFn for FoRArray {
4848
}
4949

5050
impl FilterFn for FoRArray {
51-
fn filter(&self, predicate: &ArrayData) -> VortexResult<ArrayData> {
51+
fn filter(&self, mask: &FilterMask) -> VortexResult<ArrayData> {
5252
Self::try_new(
53-
filter(self.encoded(), predicate)?,
53+
filter(&self.encoded(), mask)?,
5454
self.owned_reference_scalar(),
5555
self.shift(),
5656
)

encodings/fsst/src/compute.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ use fsst::Symbol;
22
use vortex_array::array::{varbin_scalar, ConstantArray};
33
use vortex_array::compute::unary::{scalar_at_unchecked, ScalarAtFn};
44
use vortex_array::compute::{
5-
compare, filter, slice, take, ArrayCompute, FilterFn, MaybeCompareFn, Operator, SliceFn,
6-
TakeFn, TakeOptions,
5+
compare, filter, slice, take, ArrayCompute, FilterFn, FilterMask, MaybeCompareFn, Operator,
6+
SliceFn, TakeFn, TakeOptions,
77
};
88
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};
99
use vortex_buffer::Buffer;
@@ -151,13 +151,13 @@ impl ScalarAtFn for FSSTArray {
151151

152152
impl FilterFn for FSSTArray {
153153
// Filtering an FSSTArray filters the codes array, leaving the symbols array untouched
154-
fn filter(&self, predicate: &ArrayData) -> VortexResult<ArrayData> {
154+
fn filter(&self, mask: &FilterMask) -> VortexResult<ArrayData> {
155155
Ok(Self::try_new(
156156
self.dtype().clone(),
157157
self.symbols(),
158158
self.symbol_lengths(),
159-
filter(self.codes(), predicate)?,
160-
filter(self.uncompressed_lengths(), predicate)?,
159+
filter(&self.codes(), mask)?,
160+
filter(&self.uncompressed_lengths(), mask)?,
161161
)?
162162
.into_array())
163163
}

encodings/fsst/tests/fsst_tests.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#![cfg(test)]
22

33
use vortex_array::array::builder::VarBinBuilder;
4-
use vortex_array::array::{BoolArray, PrimitiveArray};
4+
use vortex_array::array::PrimitiveArray;
55
use vortex_array::compute::unary::scalar_at;
6-
use vortex_array::compute::{filter, slice, take, TakeOptions};
6+
use vortex_array::compute::{filter, slice, take, FilterMask, TakeOptions};
77
use vortex_array::validity::Validity;
88
use vortex_array::{ArrayData, ArrayDef, IntoArrayData, IntoCanonical};
99
use vortex_dtype::{DType, Nullability};
@@ -85,9 +85,9 @@ fn test_fsst_array_ops() {
8585
);
8686

8787
// test filter
88-
let predicate = BoolArray::from_iter([false, true, false]).into_array();
88+
let mask = FilterMask::from_iter([false, true, false]);
8989

90-
let fsst_filtered = filter(&fsst_array, &predicate).unwrap();
90+
let fsst_filtered = filter(&fsst_array, &mask).unwrap();
9191
assert_eq!(fsst_filtered.encoding().id(), FSST::ENCODING.id());
9292
assert_eq!(fsst_filtered.len(), 1);
9393
assert_nth_scalar!(

0 commit comments

Comments
 (0)