Skip to content

Commit 9bdc3bf

Browse files
committed
wip
Signed-off-by: Joe Isaacs <[email protected]>
1 parent b245acb commit 9bdc3bf

File tree

23 files changed

+286
-164
lines changed

23 files changed

+286
-164
lines changed

vortex-array/src/expr/analysis/annotations.rs

Lines changed: 0 additions & 83 deletions
This file was deleted.

vortex-array/src/expr/analysis/immediate_access.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ use vortex_error::VortexExpect;
66
use vortex_utils::aliases::hash_set::HashSet;
77

88
use crate::expr::Expression;
9-
use crate::expr::analysis::annotations::{AnnotationFn, Annotations, descendent_annotations};
9+
use crate::expr::analysis::annotation_union_set::{
10+
AnnotationFn, Annotations, descendent_annotation_union_set,
11+
};
1012
use crate::expr::exprs::get_item::GetItem;
1113
use crate::expr::exprs::root::Root;
1214
use crate::expr::exprs::select::Select;
@@ -44,7 +46,7 @@ pub fn immediate_scope_accesses<'a>(
4446
expr: &'a Expression,
4547
scope: &'a StructFields,
4648
) -> FieldAccesses<'a> {
47-
descendent_annotations(expr, annotate_scope_access(scope))
49+
descendent_annotation_union_set(expr, annotate_scope_access(scope))
4850
}
4951

5052
/// This returns the immediate scope_access (as explained `immediate_scope_accesses`) for `expr`.
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_error::{VortexExpect, VortexResult};
5+
use vortex_utils::aliases::hash_map::HashMap;
6+
7+
use crate::expr::Expression;
8+
use crate::expr::traversal::{NodeExt, NodeVisitor, TraversalOrder};
9+
10+
/// Label each node in an expression tree using a bottom-up traversal.
11+
///
12+
/// This function separates tree labeling into two distinct steps:
13+
/// 1. **Label Edge**: Compute a label for each node based only on the node itself
14+
/// 2. **Merge Child**: Fold/accumulate labels from children into the node's self-label
15+
///
16+
/// The labeling process:
17+
/// - First, `label_edge` is called on the node to produce its self-label
18+
/// - Then, for each child, `merge_child` is called with `(accumulator, child_label)`
19+
/// to fold the child label into the accumulator, starting with the self-label
20+
/// - This produces the final label for the node
21+
///
22+
/// This approach avoids allocating a Vec for child labels by processing them one at a time.
23+
///
24+
/// # Parameters
25+
///
26+
/// - `expr`: The root expression to label
27+
/// - `label_edge`: Function that computes a label for a single node
28+
/// - `merge_child`: Mutable function that folds child labels into an accumulator.
29+
/// Takes `(accumulator, child_label)` and returns the updated accumulator.
30+
/// Called once per child, with the initial accumulator being the node's self-label.
31+
///
32+
/// # Examples
33+
///
34+
/// ```ignore
35+
/// // Count depth of each subtree
36+
/// let depths = label_tree(
37+
/// expr,
38+
/// |_node| 1, // Each node has depth 1 by itself
39+
/// |self_depth, child_depth| self_depth.max(*child_depth + 1)
40+
/// );
41+
/// ```
42+
///
43+
/// ```ignore
44+
/// // Check if any node in subtree is null-sensitive
45+
/// let sensitive = label_tree(
46+
/// expr,
47+
/// |node| node.is_null_sensitive(),
48+
/// |acc, child| acc || *child // OR all children with self
49+
/// );
50+
/// ```
51+
pub fn label_tree<L: Clone>(
52+
expr: &Expression,
53+
label_edge: impl Fn(&Expression) -> L,
54+
mut merge_child: impl FnMut(L, &L) -> L,
55+
) -> HashMap<&Expression, L> {
56+
let mut visitor = LabelingVisitor {
57+
labels: Default::default(),
58+
label_edge,
59+
merge_child: &mut merge_child,
60+
};
61+
expr.accept(&mut visitor)
62+
.vortex_expect("LabelingVisitor is infallible");
63+
visitor.labels
64+
}
65+
66+
struct LabelingVisitor<'a, 'b, L, F, G>
67+
where
68+
F: Fn(&Expression) -> L,
69+
G: FnMut(L, &L) -> L,
70+
{
71+
labels: HashMap<&'a Expression, L>,
72+
label_edge: F,
73+
merge_child: &'b mut G,
74+
}
75+
76+
impl<'a, 'b, L: Clone, F, G> NodeVisitor<'a> for LabelingVisitor<'a, 'b, L, F, G>
77+
where
78+
F: Fn(&Expression) -> L,
79+
G: FnMut(L, &L) -> L,
80+
{
81+
type NodeTy = Expression;
82+
83+
fn visit_down(&mut self, _node: &'a Self::NodeTy) -> VortexResult<TraversalOrder> {
84+
// Continue traversing down
85+
Ok(TraversalOrder::Continue)
86+
}
87+
88+
fn visit_up(&mut self, node: &'a Expression) -> VortexResult<TraversalOrder> {
89+
// Compute self-label for this node
90+
let self_label = (self.label_edge)(node);
91+
92+
// Fold all child labels into the self label
93+
let final_label = node.children().iter().fold(self_label, |acc, child| {
94+
let child_label = self
95+
.labels
96+
.get(child)
97+
.vortex_expect("child must have label");
98+
(self.merge_child)(acc, child_label)
99+
});
100+
101+
self.labels.insert(node, final_label);
102+
103+
Ok(TraversalOrder::Continue)
104+
}
105+
}
106+
107+
#[cfg(test)]
108+
mod tests {
109+
use super::*;
110+
use crate::expr::exprs::binary::eq;
111+
use crate::expr::exprs::get_item::col;
112+
use crate::expr::exprs::literal::lit;
113+
114+
#[test]
115+
fn test_tree_depth() {
116+
// Expression: $.col1 = 5
117+
// Tree: eq(get_item(root(), "col1"), lit(5))
118+
// Depth: root = 1, get_item = 2, lit = 1, eq = 3
119+
let expr = eq(col("col1"), lit(5));
120+
let depths = label_tree(
121+
&expr,
122+
|_node| 1, // Each node has depth 1 by itself
123+
|self_depth, child_depth| self_depth.max(*child_depth + 1),
124+
);
125+
126+
// The root (eq) should have depth 3
127+
assert_eq!(depths.get(&expr), Some(&3));
128+
}
129+
130+
#[test]
131+
fn test_node_count() {
132+
// Count total nodes in subtree (including self)
133+
// Tree: eq(get_item(root(), "col1"), lit(5))
134+
// Nodes: eq, get_item, root, lit = 4
135+
let expr = eq(col("col1"), lit(5));
136+
let counts = label_tree(
137+
&expr,
138+
|_node| 1, // Each node counts as 1
139+
|self_count, child_count| self_count + *child_count,
140+
);
141+
142+
// Root should have count of 4 (eq, get_item, root, lit)
143+
assert_eq!(counts.get(&expr), Some(&4));
144+
}
145+
}
Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,12 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
pub mod annotations;
4+
pub mod annotation_union_set;
55
pub mod immediate_access;
6+
mod labeling;
67
mod null_sensitive;
78

8-
pub use annotations::*;
9+
pub use annotation_union_set::*;
910
pub use immediate_access::*;
11+
pub use labeling::*;
1012
pub use null_sensitive::*;
11-
use vortex_dtype::FieldPath;
12-
13-
use crate::expr::Expression;
14-
use crate::stats::Stat;
15-
16-
/// A catalog of available stats that are associated with field paths.
17-
pub trait StatsCatalog {
18-
/// Given a field path and statistic, return an expression that when evaluated over the catalog
19-
/// will return that stat for the referenced field.
20-
///
21-
/// This is likely to be a column expression, or a literal.
22-
///
23-
/// Returns `None` if the stat is not available for the field path.
24-
fn stats_ref(&self, _field_path: &FieldPath, _stat: Stat) -> Option<Expression> {
25-
None
26-
}
27-
}

0 commit comments

Comments
 (0)