File tree Expand file tree Collapse file tree 2 files changed +11
-12
lines changed
vortex-layout/src/layouts Expand file tree Collapse file tree 2 files changed +11
-12
lines changed Original file line number Diff line number Diff line change @@ -92,6 +92,7 @@ impl DictReader {
9292 // We capture the name, so it may be wrong if we re-use the same reader within multiple
9393 // different parent readers. But that's rare...
9494 let values_len = self . values_len ;
95+ let session = self . session . clone ( ) ;
9596 self . values_array
9697 . get_or_init ( move || {
9798 self . values
@@ -102,7 +103,15 @@ impl DictReader {
102103 )
103104 . vortex_expect ( "must construct dict values array evaluation" )
104105 . map_err ( Arc :: new)
105- . map_ok ( |arr| arr. to_canonical ( ) . into_array ( ) )
106+ . map ( move |array| {
107+ if * USE_VORTEX_OPERATORS {
108+ // We execute the array to avoid re-evaluating for every split.
109+ let array = array?;
110+ Ok ( array. execute_vector ( & session) ?. into_array ( array. dtype ( ) ) )
111+ } else {
112+ Ok ( array?. to_canonical ( ) . into_array ( ) )
113+ }
114+ } )
106115 . boxed ( )
107116 . shared ( )
108117 } )
Original file line number Diff line number Diff line change @@ -37,16 +37,6 @@ use crate::segments::SegmentSource;
3737// actual expression? Perhaps all expressions are given a selection mask to decide for themselves?
3838const EXPR_EVAL_THRESHOLD : f64 = 0.2 ;
3939
40- /// Below this mask density we will propagate filters one by one. In other words, we filter an
41- /// array using a mask prior to running a filter expression, and then have to perform a more
42- /// expensive rank intersection on the result. This threshold exists because filtering has a
43- /// non-trivial cost, and often that cost outweighs evaluating the filter expression over a few
44- /// more rows that are already known to be false.
45- ///
46- /// TODO(ngates): this threshold should really be estimated based on the cost of the filter + the
47- /// the cost of the expression itself.
48- const FILTER_OF_FILTER_THRESHOLD : f64 = 0.8 ;
49-
5040pub struct FlatReader {
5141 layout : FlatLayout ,
5242 name : Arc < str > ,
@@ -157,7 +147,7 @@ impl LayoutReader for FlatReader {
157147 }
158148
159149 let array_mask = if * USE_VORTEX_OPERATORS {
160- if mask. density ( ) < FILTER_OF_FILTER_THRESHOLD {
150+ if mask. density ( ) < EXPR_EVAL_THRESHOLD {
161151 // We have the choice to apply the filter or the expression first, we apply the
162152 // expression first so that it can try pushing down itself and then the filter
163153 // after this.
You can’t perform that action at this time.
0 commit comments