Skip to content

Commit 6437b41

Browse files
A data fusion inspired traversal API for expressions (#1828)
This could later be used for other hierarchies
1 parent 61290ad commit 6437b41

File tree

16 files changed

+516
-47
lines changed

16 files changed

+516
-47
lines changed

vortex-datafusion/src/memory/plans.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use vortex_array::compute::take;
2323
use vortex_array::{ArrayData, IntoArrayVariant, IntoCanonical};
2424
use vortex_dtype::Field;
2525
use vortex_error::{vortex_err, vortex_panic, VortexError};
26-
use vortex_expr::ExprRef;
26+
use vortex_expr::{ExprRef, VortexExprExt};
2727

2828
/// Physical plan operator that applies a set of [filters][Expr] against the input, producing a
2929
/// row mask that can be used downstream to force a take against the corresponding struct array

vortex-expr/src/binary.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,8 @@ use std::any::Any;
22
use std::fmt::Display;
33
use std::sync::Arc;
44

5-
use vortex_array::aliases::hash_set::HashSet;
65
use vortex_array::compute::{and_kleene, compare, or_kleene, Operator as ArrayOperator};
76
use vortex_array::ArrayData;
8-
use vortex_dtype::Field;
97
use vortex_error::VortexResult;
108

119
use crate::{ExprRef, Operator, VortexExpr};
@@ -62,9 +60,13 @@ impl VortexExpr for BinaryExpr {
6260
}
6361
}
6462

65-
fn collect_references<'a>(&'a self, references: &mut HashSet<&'a Field>) {
66-
self.lhs.collect_references(references);
67-
self.rhs.collect_references(references);
63+
fn children(&self) -> Vec<&ExprRef> {
64+
vec![&self.lhs, &self.rhs]
65+
}
66+
67+
fn replacing_children(self: Arc<Self>, children: Vec<ExprRef>) -> ExprRef {
68+
assert_eq!(children.len(), 2);
69+
BinaryExpr::new_expr(children[0].clone(), self.operator, children[1].clone())
6870
}
6971
}
7072

vortex-expr/src/column.rs

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ use std::any::Any;
22
use std::fmt::Display;
33
use std::sync::Arc;
44

5-
use vortex_array::aliases::hash_set::HashSet;
65
use vortex_array::array::StructArray;
76
use vortex_array::variants::StructArrayTrait;
87
use vortex_array::ArrayData;
@@ -17,8 +16,10 @@ pub struct Column {
1716
}
1817

1918
impl Column {
20-
pub fn new_expr(field: Field) -> ExprRef {
21-
Arc::new(Self { field })
19+
pub fn new_expr(field: impl Into<Field>) -> ExprRef {
20+
Arc::new(Self {
21+
field: field.into(),
22+
})
2223
}
2324

2425
pub fn field(&self) -> &Field {
@@ -69,7 +70,12 @@ impl VortexExpr for Column {
6970
.ok_or_else(|| vortex_err!("Array doesn't contain child array {}", self.field))
7071
}
7172

72-
fn collect_references<'a>(&'a self, references: &mut HashSet<&'a Field>) {
73-
references.insert(self.field());
73+
fn children(&self) -> Vec<&ExprRef> {
74+
vec![]
75+
}
76+
77+
fn replacing_children(self: Arc<Self>, children: Vec<ExprRef>) -> ExprRef {
78+
assert_eq!(children.len(), 0);
79+
self
7480
}
7581
}

vortex-expr/src/identity.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,15 @@ impl VortexExpr for Identity {
3030
fn evaluate(&self, batch: &ArrayData) -> VortexResult<ArrayData> {
3131
Ok(batch.clone())
3232
}
33+
34+
fn children(&self) -> Vec<&ExprRef> {
35+
vec![]
36+
}
37+
38+
fn replacing_children(self: Arc<Self>, children: Vec<ExprRef>) -> ExprRef {
39+
assert_eq!(children.len(), 0);
40+
self
41+
}
3342
}
3443

3544
// Return a global pointer to the identity token.

vortex-expr/src/lib.rs

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ use std::any::Any;
22
use std::fmt::{Debug, Display};
33
use std::sync::Arc;
44

5-
use vortex_array::aliases::hash_set::HashSet;
6-
75
mod binary;
86
mod column;
97
pub mod datafusion;
@@ -16,6 +14,8 @@ mod project;
1614
pub mod pruning;
1715
mod row_filter;
1816
mod select;
17+
#[allow(dead_code)]
18+
mod traversal;
1919

2020
pub use binary::*;
2121
pub use column::*;
@@ -27,9 +27,12 @@ pub use operators::*;
2727
pub use project::*;
2828
pub use row_filter::*;
2929
pub use select::*;
30+
use vortex_array::aliases::hash_set::HashSet;
3031
use vortex_array::ArrayData;
3132
use vortex_dtype::Field;
32-
use vortex_error::VortexResult;
33+
use vortex_error::{VortexResult, VortexUnwrap};
34+
35+
use crate::traversal::{Node, ReferenceCollector};
3336

3437
pub type ExprRef = Arc<dyn VortexExpr>;
3538

@@ -41,14 +44,22 @@ pub trait VortexExpr: Debug + Send + Sync + DynEq + Display {
4144
/// Compute result of expression on given batch producing a new batch
4245
fn evaluate(&self, batch: &ArrayData) -> VortexResult<ArrayData>;
4346

44-
/// Accumulate all field references from this expression and its children in the provided set
45-
fn collect_references<'a>(&'a self, _references: &mut HashSet<&'a Field>) {}
47+
fn children(&self) -> Vec<&ExprRef>;
48+
49+
fn replacing_children(self: Arc<Self>, children: Vec<ExprRef>) -> ExprRef;
50+
}
51+
52+
pub trait VortexExprExt {
53+
/// Accumulate all field references from this expression and its children in a set
54+
fn references(&self) -> HashSet<&Field>;
55+
}
4656

47-
/// Accumulate all field references from this expression and its children in a new set
57+
impl VortexExprExt for ExprRef {
4858
fn references(&self) -> HashSet<&Field> {
49-
let mut refs = HashSet::new();
50-
self.collect_references(&mut refs);
51-
refs
59+
let mut collector = ReferenceCollector::new();
60+
// The collector is infallible, so we can unwrap the result
61+
self.accept(&mut collector).vortex_unwrap();
62+
collector.into_fields()
5263
}
5364
}
5465

vortex-expr/src/like.rs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,8 @@ use std::any::Any;
22
use std::fmt::Display;
33
use std::sync::Arc;
44

5-
use vortex_array::aliases::hash_set::HashSet;
65
use vortex_array::compute::{like, LikeOptions};
76
use vortex_array::ArrayData;
8-
use vortex_dtype::Field;
97
use vortex_error::VortexResult;
108

119
use crate::{ExprRef, VortexExpr};
@@ -74,9 +72,18 @@ impl VortexExpr for Like {
7472
)
7573
}
7674

77-
fn collect_references<'a>(&'a self, references: &mut HashSet<&'a Field>) {
78-
self.child().collect_references(references);
79-
self.pattern().collect_references(references);
75+
fn children(&self) -> Vec<&ExprRef> {
76+
vec![&self.pattern, &self.child]
77+
}
78+
79+
fn replacing_children(self: Arc<Self>, children: Vec<ExprRef>) -> ExprRef {
80+
assert_eq!(children.len(), 2);
81+
Like::new_expr(
82+
children[0].clone(),
83+
children[1].clone(),
84+
self.negated,
85+
self.case_insensitive,
86+
)
8087
}
8188
}
8289

vortex-expr/src/literal.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ pub struct Literal {
1515
}
1616

1717
impl Literal {
18-
pub fn new_expr(value: Scalar) -> ExprRef {
19-
Arc::new(Self { value })
18+
pub fn new_expr(value: impl Into<Scalar>) -> ExprRef {
19+
Arc::new(Self {
20+
value: value.into(),
21+
})
2022
}
2123

2224
pub fn value(&self) -> &Scalar {
@@ -38,6 +40,15 @@ impl VortexExpr for Literal {
3840
fn evaluate(&self, batch: &ArrayData) -> VortexResult<ArrayData> {
3941
Ok(ConstantArray::new(self.value.clone(), batch.len()).into_array())
4042
}
43+
44+
fn children(&self) -> Vec<&ExprRef> {
45+
vec![]
46+
}
47+
48+
fn replacing_children(self: Arc<Self>, children: Vec<ExprRef>) -> ExprRef {
49+
assert_eq!(children.len(), 0);
50+
self
51+
}
4152
}
4253

4354
/// Create a new `Literal` expression from a type that coerces to `Scalar`.

vortex-expr/src/not.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,8 @@ use std::any::Any;
22
use std::fmt::Display;
33
use std::sync::Arc;
44

5-
use vortex_array::aliases::hash_set::HashSet;
65
use vortex_array::compute::invert;
76
use vortex_array::ArrayData;
8-
use vortex_dtype::Field;
97
use vortex_error::VortexResult;
108

119
use crate::{ExprRef, VortexExpr};
@@ -42,8 +40,13 @@ impl VortexExpr for Not {
4240
invert(&child_result)
4341
}
4442

45-
fn collect_references<'a>(&'a self, references: &mut HashSet<&'a Field>) {
46-
self.child.collect_references(references)
43+
fn children(&self) -> Vec<&ExprRef> {
44+
vec![&self.child]
45+
}
46+
47+
fn replacing_children(self: Arc<Self>, mut children: Vec<ExprRef>) -> ExprRef {
48+
assert_eq!(children.len(), 0);
49+
Self::new_expr(children.remove(0))
4750
}
4851
}
4952

vortex-expr/src/project.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use vortex_dtype::Field;
55

66
use crate::{
77
col, lit, BinaryExpr, Column, ExprRef, Identity, Like, Literal, Not, Operator, RowFilter,
8-
Select, VortexExpr,
8+
Select, VortexExpr, VortexExprExt,
99
};
1010

1111
/// Restrict expression to only the fields that appear in projection
@@ -52,7 +52,7 @@ pub fn expr_project(expr: &ExprRef, projection: &[Field]) -> Option<ExprRef> {
5252
}
5353
})
5454
} else if let Some(n) = expr.as_any().downcast_ref::<Not>() {
55-
let own_refs = n.references();
55+
let own_refs = expr.references();
5656
if own_refs.iter().all(|p| projection.contains(p)) {
5757
expr_project(n.child(), projection).map(Not::new_expr)
5858
} else {

vortex-expr/src/pruning.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use vortex_scalar::Scalar;
1515

1616
use crate::{
1717
and, col, eq, gt, gt_eq, lit, lt_eq, or, BinaryExpr, Column, ExprRef, Identity, Literal, Not,
18-
Operator, RowFilter,
18+
Operator, RowFilter, VortexExprExt,
1919
};
2020

2121
#[derive(Debug, Clone)]

0 commit comments

Comments
 (0)