Skip to content

Commit ab7815b

Browse files
committed
fix: filter
Signed-off-by: Alexander Droste <[email protected]>
1 parent 46b17e5 commit ab7815b

File tree

1 file changed

+6
-23
lines changed

1 file changed

+6
-23
lines changed

vortex-layout/src/layouts/flat/reader.rs

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,6 @@ use crate::segments::SegmentSource;
3737
// actual expression? Perhaps all expressions are given a selection mask to decide for themselves?
3838
const EXPR_EVAL_THRESHOLD: f64 = 0.2;
3939

40-
/// Below this mask density we will propagate filters one by one. In other words, we filter an
41-
/// array using a mask prior to running a filter expression, and then have to perform a more
42-
/// expensive rank intersection on the result. This threshold exists because filtering has a
43-
/// non-trivial cost, and often that cost outweighs evaluating the filter expression over a few
44-
/// more rows that are already known to be false.
45-
///
46-
/// TODO(ngates): this threshold should really be estimated based on the cost of the filter + the
47-
/// the cost of the expression itself.
48-
const FILTER_OF_FILTER_THRESHOLD: f64 = 0.8;
49-
5040
pub struct FlatReader {
5141
layout: FlatLayout,
5242
name: Arc<str>,
@@ -157,20 +147,13 @@ impl LayoutReader for FlatReader {
157147
}
158148

159149
let array_mask = if *USE_VORTEX_OPERATORS {
160-
if mask.density() < FILTER_OF_FILTER_THRESHOLD {
161-
// Run only over the pre-filtered rows.
162-
let array = array.filter(mask.clone())?;
163-
let array = array.apply(&expr)?;
164-
let array_mask = array.execute_mask(&session)?;
165-
166-
mask.intersect_by_rank(&array_mask)
167-
} else {
168-
// Run over the full array, with a simpler bitand at the end.
169-
let array = array.apply(&expr)?;
170-
let array_mask = array.execute_mask(&session)?;
150+
// Always apply the expression to the full array first, avoiding the overhead of
151+
// premature filter materialization. The operators can optimize the full expression
152+
// tree more effectively than if we fragment the work into filter + apply.
153+
let array = array.apply(&expr)?;
154+
let array_mask = array.execute_mask(&session)?;
171155

172-
mask.bitand(&array_mask)
173-
}
156+
mask.bitand(&array_mask)
174157
} else {
175158
// TODO(ngates): the mask may actually be dense within a range, as is often the case when
176159
// we have approximate mask results from a zone map. In which case we could look at

0 commit comments

Comments
 (0)