Skip to content

Commit aa7a891

Browse files
authored
Dictionary execution (#5791)
Signed-off-by: Nicholas Gates <[email protected]>
1 parent 397d7f4 commit aa7a891

File tree

2 files changed

+11
-12
lines changed

2 files changed

+11
-12
lines changed

vortex-layout/src/layouts/dict/reader.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ impl DictReader {
9292
// We capture the name, so it may be wrong if we re-use the same reader within multiple
9393
// different parent readers. But that's rare...
9494
let values_len = self.values_len;
95+
let session = self.session.clone();
9596
self.values_array
9697
.get_or_init(move || {
9798
self.values
@@ -102,7 +103,15 @@ impl DictReader {
102103
)
103104
.vortex_expect("must construct dict values array evaluation")
104105
.map_err(Arc::new)
105-
.map_ok(|arr| arr.to_canonical().into_array())
106+
.map(move |array| {
107+
if *USE_VORTEX_OPERATORS {
108+
// We execute the array to avoid re-evaluating for every split.
109+
let array = array?;
110+
Ok(array.execute_vector(&session)?.into_array(array.dtype()))
111+
} else {
112+
Ok(array?.to_canonical().into_array())
113+
}
114+
})
106115
.boxed()
107116
.shared()
108117
})

vortex-layout/src/layouts/flat/reader.rs

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,6 @@ use crate::segments::SegmentSource;
3737
// actual expression? Perhaps all expressions are given a selection mask to decide for themselves?
3838
const EXPR_EVAL_THRESHOLD: f64 = 0.2;
3939

40-
/// Below this mask density we will propagate filters one by one. In other words, we filter an
41-
/// array using a mask prior to running a filter expression, and then have to perform a more
42-
/// expensive rank intersection on the result. This threshold exists because filtering has a
43-
/// non-trivial cost, and often that cost outweighs evaluating the filter expression over a few
44-
/// more rows that are already known to be false.
45-
///
46-
/// TODO(ngates): this threshold should really be estimated based on the cost of the filter + the
47-
/// the cost of the expression itself.
48-
const FILTER_OF_FILTER_THRESHOLD: f64 = 0.8;
49-
5040
pub struct FlatReader {
5141
layout: FlatLayout,
5242
name: Arc<str>,
@@ -157,7 +147,7 @@ impl LayoutReader for FlatReader {
157147
}
158148

159149
let array_mask = if *USE_VORTEX_OPERATORS {
160-
if mask.density() < FILTER_OF_FILTER_THRESHOLD {
150+
if mask.density() < EXPR_EVAL_THRESHOLD {
161151
// We have the choice to apply the filter or the expression first, we apply the
162152
// expression first so that it can try pushing down itself and then the filter
163153
// after this.

0 commit comments

Comments
 (0)