@@ -37,16 +37,6 @@ use crate::segments::SegmentSource;
3737// actual expression? Perhaps all expressions are given a selection mask to decide for themselves?
3838const EXPR_EVAL_THRESHOLD : f64 = 0.2 ;
3939
40- /// Below this mask density we will propagate filters one by one. In other words, we filter an
41- /// array using a mask prior to running a filter expression, and then have to perform a more
42- /// expensive rank intersection on the result. This threshold exists because filtering has a
43- /// non-trivial cost, and often that cost outweighs evaluating the filter expression over a few
44- /// more rows that are already known to be false.
45- ///
46- /// TODO(ngates): this threshold should really be estimated based on the cost of the filter + the
47- /// the cost of the expression itself.
48- const FILTER_OF_FILTER_THRESHOLD : f64 = 0.8 ;
49-
5040pub struct FlatReader {
5141 layout : FlatLayout ,
5242 name : Arc < str > ,
@@ -157,20 +147,13 @@ impl LayoutReader for FlatReader {
157147 }
158148
159149 let array_mask = if * USE_VORTEX_OPERATORS {
160- if mask. density ( ) < FILTER_OF_FILTER_THRESHOLD {
161- // Run only over the pre-filtered rows.
162- let array = array. filter ( mask. clone ( ) ) ?;
163- let array = array. apply ( & expr) ?;
164- let array_mask = array. execute_mask ( & session) ?;
165-
166- mask. intersect_by_rank ( & array_mask)
167- } else {
168- // Run over the full array, with a simpler bitand at the end.
169- let array = array. apply ( & expr) ?;
170- let array_mask = array. execute_mask ( & session) ?;
150+ // Always apply the expression to the full array first, avoiding the overhead of
151+ // premature filter materialization. The operators can optimize the full expression
152+ // tree more effectively than if we fragment the work into filter + apply.
153+ let array = array. apply ( & expr) ?;
154+ let array_mask = array. execute_mask ( & session) ?;
171155
172- mask. bitand ( & array_mask)
173- }
156+ mask. bitand ( & array_mask)
174157 } else {
175158 // TODO(ngates): the mask may actually be dense within a range, as is often the case when
176159 // we have approximate mask results from a zone map. In which case we could look at
0 commit comments