Skip to content

Commit 5458ab2

Browse files
authored
Expression execution to be passed input vectors (#5507)
This execution model trades off short-circuiting in favor of CSE. It also allows expressions to be executed independently of the expression tree, which may be useful depending on how we ultimately end up wrapping up scalar expressions into arrays. @joseph-isaacs the alternative would be to pass a `trait ExecutionArgs` and then calling `child(usize)` could lazily compute the child, bringing back the flexibility of short-circuiting depending on how the expression is ultimately executed. Signed-off-by: Nicholas Gates <[email protected]>
1 parent 130be94 commit 5458ab2

File tree

8 files changed

+165
-113
lines changed

8 files changed

+165
-113
lines changed

vortex-array/src/expr/expression.rs

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,16 @@ use std::fmt::{Debug, Display, Formatter};
77
use std::hash::{Hash, Hasher};
88
use std::sync::Arc;
99

10+
use itertools::Itertools;
1011
use vortex_dtype::DType;
1112
use vortex_error::{VortexExpect, VortexResult};
12-
use vortex_vector::Vector;
13+
use vortex_vector::{Vector, VectorOps};
1314

1415
use crate::ArrayRef;
1516
use crate::expr::display::DisplayTreeExpr;
16-
use crate::expr::{ChildName, ExprId, ExprVTable, ExpressionView, StatsCatalog, VTable};
17+
use crate::expr::{
18+
ChildName, ExecutionArgs, ExprId, ExprVTable, ExpressionView, Root, StatsCatalog, VTable,
19+
};
1720
use crate::stats::Stat;
1821

1922
/// A node in a Vortex expression tree.
@@ -144,7 +147,31 @@ impl Expression {
144147

145148
/// Executes the expression over the given vector input scope.
146149
pub fn execute(&self, vector: &Vector, dtype: &DType) -> VortexResult<Vector> {
147-
self.vtable.as_dyn().execute(self, vector, dtype)
150+
// We special-case the "root" expression that must extract that scope vector directly.
151+
if self.is::<Root>() {
152+
return Ok(vector.clone());
153+
}
154+
155+
let return_dtype = self.return_dtype(dtype)?;
156+
let child_dtypes: Vec<_> = self
157+
.children
158+
.iter()
159+
.map(|child| child.return_dtype(dtype))
160+
.try_collect()?;
161+
let child_vectors: Vec<_> = self
162+
.children
163+
.iter()
164+
.map(|child| child.execute(vector, dtype))
165+
.try_collect()?;
166+
167+
let args = ExecutionArgs {
168+
vectors: child_vectors,
169+
dtypes: child_dtypes,
170+
row_count: vector.len(),
171+
return_dtype,
172+
};
173+
174+
self.vtable.as_dyn().execute(&self.data, args)
148175
}
149176

150177
/// An expression over zone-statistics which implies all records in the zone evaluate to false.

vortex-array/src/expr/exprs/cast/mod.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ use vortex_vector::Vector;
1313
use crate::ArrayRef;
1414
use crate::compute::cast as compute_cast;
1515
use crate::expr::expression::Expression;
16-
use crate::expr::{ChildName, ExprId, ExpressionView, StatsCatalog, VTable, VTableExt};
16+
use crate::expr::{
17+
ChildName, ExecutionArgs, ExprId, ExpressionView, StatsCatalog, VTable, VTableExt,
18+
};
1719
use crate::stats::Stat;
1820

1921
/// A cast expression that converts values to a target data type.
@@ -88,16 +90,6 @@ impl VTable for Cast {
8890
})
8991
}
9092

91-
fn execute(
92-
&self,
93-
expr: &ExpressionView<Self>,
94-
vector: &Vector,
95-
dtype: &DType,
96-
) -> VortexResult<Vector> {
97-
let input = expr.child(0).execute(vector, dtype)?;
98-
vortex_compute::cast::Cast::cast(&input, dtype)
99-
}
100-
10193
fn stat_expression(
10294
&self,
10395
expr: &ExpressionView<Self>,
@@ -129,6 +121,14 @@ impl VTable for Cast {
129121
}
130122
}
131123
}
124+
125+
fn execute(&self, target_dtype: &DType, mut args: ExecutionArgs) -> VortexResult<Vector> {
126+
let input = args
127+
.vectors
128+
.pop()
129+
.vortex_expect("missing input for Cast expression");
130+
vortex_compute::cast::Cast::cast(&input, target_dtype)
131+
}
132132
}
133133

134134
/// Creates an expression that casts values to a target data type.

vortex-array/src/expr/exprs/get_item/mod.rs

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@ use std::ops::Not;
99
use prost::Message;
1010
use vortex_compute::mask::MaskValidity;
1111
use vortex_dtype::{DType, FieldName, FieldPath, Nullability};
12-
use vortex_error::{VortexResult, vortex_bail, vortex_err};
12+
use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
1313
use vortex_proto::expr as pb;
1414
use vortex_vector::{Vector, VectorOps};
1515

1616
use crate::compute::mask;
1717
use crate::expr::exprs::root::root;
18-
use crate::expr::{ChildName, ExprId, Expression, ExpressionView, StatsCatalog, VTable, VTableExt};
18+
use crate::expr::{
19+
ChildName, ExecutionArgs, ExprId, Expression, ExpressionView, StatsCatalog, VTable, VTableExt,
20+
};
1921
use crate::stats::Stat;
2022
use crate::{ArrayRef, ToCanonical};
2123

@@ -98,29 +100,6 @@ impl VTable for GetItem {
98100
}
99101
}
100102

101-
fn execute(
102-
&self,
103-
expr: &ExpressionView<Self>,
104-
vector: &Vector,
105-
dtype: &DType,
106-
) -> VortexResult<Vector> {
107-
let child_dtype = expr.child(0).return_dtype(dtype)?;
108-
let struct_dtype = child_dtype
109-
.as_struct_fields_opt()
110-
.ok_or_else(|| vortex_err!("Expected struct dtype for child of GetItem expression"))?;
111-
let field_idx = struct_dtype
112-
.find(expr.data())
113-
.ok_or_else(|| vortex_err!("Field {} not found in struct dtype", expr.data()))?;
114-
115-
let struct_vector = expr.child(0).execute(vector, dtype)?.into_struct();
116-
117-
// We must intersect the validity with that of the parent struct
118-
let field = struct_vector.fields()[field_idx].clone();
119-
let field = MaskValidity::mask_validity(field, struct_vector.validity());
120-
121-
Ok(field)
122-
}
123-
124103
fn stat_expression(
125104
&self,
126105
expr: &ExpressionView<Self>,
@@ -137,6 +116,27 @@ impl VTable for GetItem {
137116
// falsify expressions, but for now I'm preserving the existing buggy behavior.
138117
catalog.stats_ref(&FieldPath::from_name(expr.data().clone()), stat)
139118
}
119+
120+
fn execute(&self, field_name: &FieldName, mut args: ExecutionArgs) -> VortexResult<Vector> {
121+
let struct_dtype = args.dtypes[0]
122+
.as_struct_fields_opt()
123+
.ok_or_else(|| vortex_err!("Expected struct dtype for child of GetItem expression"))?;
124+
let field_idx = struct_dtype
125+
.find(field_name)
126+
.ok_or_else(|| vortex_err!("Field {} not found in struct dtype", field_name))?;
127+
128+
let struct_vector = args
129+
.vectors
130+
.pop()
131+
.vortex_expect("missing input")
132+
.into_struct();
133+
134+
// We must intersect the validity with that of the parent struct
135+
let field = struct_vector.fields()[field_idx].clone();
136+
let field = MaskValidity::mask_validity(field, struct_vector.validity());
137+
138+
Ok(field)
139+
}
140140
}
141141

142142
/// Creates an expression that accesses a field from the root array.

vortex-array/src/expr/exprs/is_null.rs

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,17 @@ use std::fmt::Formatter;
55
use std::ops::Not;
66

77
use vortex_dtype::{DType, Nullability};
8-
use vortex_error::{VortexResult, vortex_bail};
8+
use vortex_error::{VortexExpect, VortexResult, vortex_bail};
99
use vortex_mask::Mask;
1010
use vortex_vector::bool::BoolVector;
1111
use vortex_vector::{Vector, VectorOps};
1212

1313
use crate::arrays::{BoolArray, ConstantArray};
1414
use crate::expr::exprs::binary::eq;
1515
use crate::expr::exprs::literal::lit;
16-
use crate::expr::{ChildName, ExprId, Expression, ExpressionView, StatsCatalog, VTable, VTableExt};
16+
use crate::expr::{
17+
ChildName, ExecutionArgs, ExprId, Expression, ExpressionView, StatsCatalog, VTable, VTableExt,
18+
};
1719
use crate::stats::Stat;
1820
use crate::{Array, ArrayRef, IntoArray};
1921

@@ -71,20 +73,6 @@ impl VTable for IsNull {
7173
}
7274
}
7375

74-
fn execute(
75-
&self,
76-
expr: &ExpressionView<Self>,
77-
vector: &Vector,
78-
dtype: &DType,
79-
) -> VortexResult<Vector> {
80-
let child = expr.child(0).execute(vector, dtype)?;
81-
Ok(BoolVector::new(
82-
child.validity().to_bit_buffer().not(),
83-
Mask::new_true(child.len()),
84-
)
85-
.into())
86-
}
87-
8876
fn stat_falsification(
8977
&self,
9078
expr: &ExpressionView<Self>,
@@ -93,6 +81,15 @@ impl VTable for IsNull {
9381
let null_count_expr = expr.child(0).stat_expression(Stat::NullCount, catalog)?;
9482
Some(eq(null_count_expr, lit(0u64)))
9583
}
84+
85+
fn execute(&self, _data: &Self::Instance, mut args: ExecutionArgs) -> VortexResult<Vector> {
86+
let child = args.vectors.pop().vortex_expect("Missing input child");
87+
Ok(BoolVector::new(
88+
child.validity().to_bit_buffer().not(),
89+
Mask::new_true(child.len()),
90+
)
91+
.into())
92+
}
9693
}
9794

9895
/// Creates an expression that checks for null values.

vortex-array/src/expr/exprs/not.rs

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@ use std::fmt::Formatter;
55

66
use vortex_compute::logical::LogicalNot;
77
use vortex_dtype::DType;
8-
use vortex_error::{VortexResult, vortex_bail};
8+
use vortex_error::{VortexExpect, VortexResult, vortex_bail};
99
use vortex_vector::Vector;
1010

1111
use crate::ArrayRef;
1212
use crate::compute::invert;
13-
use crate::expr::{ChildName, ExprId, Expression, ExpressionView, VTable, VTableExt};
13+
use crate::expr::{
14+
ChildName, ExecutionArgs, ExprId, Expression, ExpressionView, VTable, VTableExt,
15+
};
1416

1517
/// Expression that logically inverts boolean values.
1618
pub struct Not;
@@ -69,13 +71,8 @@ impl VTable for Not {
6971
invert(&child_result)
7072
}
7173

72-
fn execute(
73-
&self,
74-
expr: &ExpressionView<Self>,
75-
vector: &Vector,
76-
dtype: &DType,
77-
) -> VortexResult<Vector> {
78-
let child = expr.child(0).execute(vector, dtype)?;
74+
fn execute(&self, _data: &Self::Instance, mut args: ExecutionArgs) -> VortexResult<Vector> {
75+
let child = args.vectors.pop().vortex_expect("Missing input child");
7976
Ok(child.into_bool().not().into())
8077
}
8178
}

vortex-array/src/expr/exprs/root.rs

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ use vortex_vector::Vector;
99

1010
use crate::ArrayRef;
1111
use crate::expr::expression::Expression;
12-
use crate::expr::{ChildName, ExprId, ExpressionView, StatsCatalog, VTable, VTableExt};
12+
use crate::expr::{
13+
ChildName, ExecutionArgs, ExprId, ExpressionView, StatsCatalog, VTable, VTableExt,
14+
};
1315
use crate::stats::Stat;
1416

1517
/// An expression that returns the full scope of the expression evaluation.
@@ -60,13 +62,8 @@ impl VTable for Root {
6062
Ok(scope.clone())
6163
}
6264

63-
fn execute(
64-
&self,
65-
_expr: &ExpressionView<Self>,
66-
vector: &Vector,
67-
_dtype: &DType,
68-
) -> VortexResult<Vector> {
69-
Ok(vector.clone())
65+
fn execute(&self, _data: &Self::Instance, _args: ExecutionArgs) -> VortexResult<Vector> {
66+
vortex_bail!("Root expression is not executable")
7067
}
7168

7269
fn stat_expression(

vortex-array/src/expr/exprs/select/mod.rs

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
pub mod transform;
55

66
use std::fmt::{Display, Formatter};
7+
use std::sync::Arc;
78

89
use itertools::Itertools;
910
use prost::Message;
@@ -12,10 +13,11 @@ use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
1213
use vortex_proto::expr::select_opts::Opts;
1314
use vortex_proto::expr::{FieldNames as ProtoFieldNames, SelectOpts};
1415
use vortex_vector::Vector;
16+
use vortex_vector::struct_::StructVector;
1517

1618
use crate::expr::expression::Expression;
1719
use crate::expr::field::DisplayFieldNames;
18-
use crate::expr::{ChildName, ExprId, ExpressionView, VTable, VTableExt};
20+
use crate::expr::{ChildName, ExecutionArgs, ExprId, ExpressionView, VTable, VTableExt};
1921
use crate::{ArrayRef, IntoArray, ToCanonical};
2022

2123
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -146,15 +148,45 @@ impl VTable for Select {
146148
.into_array())
147149
}
148150

149-
fn execute(
150-
&self,
151-
_expr: &ExpressionView<Self>,
152-
_vector: &Vector,
153-
_dtype: &DType,
154-
) -> VortexResult<Vector> {
155-
vortex_bail!(
156-
"Select expressions cannot be executed. They must be removed during optimization."
157-
)
151+
fn execute(&self, selection: &FieldSelection, mut args: ExecutionArgs) -> VortexResult<Vector> {
152+
let child = args
153+
.vectors
154+
.pop()
155+
.vortex_expect("Missing input child")
156+
.into_struct();
157+
let child_fields = args
158+
.dtypes
159+
.pop()
160+
.vortex_expect("Missing input dtype")
161+
.into_struct_fields();
162+
163+
let field_indices: Vec<usize> = match selection {
164+
FieldSelection::Include(f) => f
165+
.iter()
166+
.map(|name| {
167+
child_fields
168+
.find(name)
169+
.ok_or_else(|| vortex_err!("Field {} not found in struct dtype", name))
170+
})
171+
.try_collect(),
172+
FieldSelection::Exclude(names) => child_fields
173+
.names()
174+
.iter()
175+
.filter(|&f| !names.as_ref().contains(f))
176+
.map(|name| {
177+
child_fields
178+
.find(name)
179+
.ok_or_else(|| vortex_err!("Field {} not found in struct dtype", name))
180+
})
181+
.try_collect(),
182+
}?;
183+
184+
let (fields, mask) = child.into_parts();
185+
let new_fields = field_indices
186+
.iter()
187+
.map(|&idx| fields[idx].clone())
188+
.collect();
189+
Ok(unsafe { StructVector::new_unchecked(Arc::new(new_fields), mask) }.into())
158190
}
159191
}
160192

0 commit comments

Comments
 (0)