@@ -13,20 +13,17 @@ pub static OPTIMIZERS: &[(&str, Optimizer)] = &[
1313 ( "Constant folding" , fold_constants) ,
1414 ( "Filter pushdown" , push_filters) ,
1515 ( "Index lookup" , index_lookup) ,
16- ( "Join type " , join_type ) ,
16+ ( "Hash join " , hash_join ) ,
1717 ( "Short circuit" , short_circuit) ,
1818] ;
1919
20- /// Folds constant (sub)expressions by pre-evaluating them, instead of
21- /// re-evaluating then for every row during execution.
20+ /// Folds constant (sub)expressions by pre-evaluating them once , instead of
21+ /// re-evaluating them for every row during execution.
2222pub fn fold_constants ( node : Node ) -> Result < Node > {
23- use Expression :: * ;
24- use Value :: * ;
23+ fn xform ( mut expr : Expression ) -> Result < Expression > {
24+ use Expression :: * ;
25+ use Value :: * ;
2526
26- // Transform expressions. Called after descending, to perform logical
27- // short-circuiting on child expressions that have already been folded, and
28- // to reduce the quadratic cost when an expression contains a column.
29- let xform = |mut expr : Expression | {
3027 // If the expression is constant, evaluate it.
3128 //
3229 // This is a very simple approach, which doesn't handle more complex
@@ -39,7 +36,9 @@ pub fn fold_constants(node: Node) -> Result<Node> {
3936 }
4037
4138 // If the expression is a logical operator, and one of the sides is
42- // known, we may be able to short-circuit it.
39+ // constant, we may be able to evaluate it even if it has a column
40+ // reference. For example, a AND FALSE is always FALSE, regardless of
41+ // what a is.
4342 expr = match expr {
4443 And ( lhs, rhs) => match ( * lhs, * rhs) {
4544 // If either side of an AND is false, the AND is false.
@@ -62,14 +61,18 @@ pub fn fold_constants(node: Node) -> Result<Node> {
6261 expr => expr,
6362 } ;
6463 Ok ( expr)
65- } ;
64+ }
6665
66+ // Transform after descending, to perform logical short-circuiting on child
67+ // expressions that have already been folded, and to reduce the quadratic
68+ // cost when an expression contains a column.
6769 node. transform ( & |node| node. transform_expressions ( & Ok , & xform) , & Ok )
6870}
6971
7072/// Pushes filter predicates down into child nodes where possible. In
7173/// particular, this can allow filtering during storage scans (below Raft),
72- /// instead of reading and transmitting all rows then filtering.
74+ /// instead of reading and transmitting all rows then filtering, by pushing
75+ /// a predicate from a Filter node down into a Scan node.
7376pub fn push_filters ( node : Node ) -> Result < Node > {
7477 /// Pushes an expression into a node if possible. Otherwise, returns the the
7578 /// unpushed expression.
@@ -92,23 +95,19 @@ pub fn push_filters(node: Node) -> Result<Node> {
9295 None => Some ( expr) ,
9396 } ;
9497 }
95- // We don't handle HashJoin here, since we assume the join_type()
96- // optimizer runs after this.
97- Node :: HashJoin { .. } => panic ! ( "filter pushdown must run before join optimizer" ) ,
9898 // Unable to push down, just return the original expression.
9999 _ => return Some ( expr) ,
100100 }
101101 None
102102 }
103103
104- /// Pushes down a filter node if possible.
105- fn push_filter ( node : Node ) -> Node {
104+ /// Pushes a filter node predicate down into its source, if possible.
105+ fn maybe_push_filter ( node : Node ) -> Node {
106106 let Node :: Filter { mut source, predicate } = node else {
107107 return node;
108108 } ;
109- // Attempt to push the filter into the source.
109+ // Attempt to push the filter into the source, or return the original .
110110 if let Some ( predicate) = push_into ( predicate, & mut source) {
111- // Push failed, return the original filter node.
112111 return Node :: Filter { source, predicate } ;
113112 }
114113 // Push succeded, return the source that was pushed into. When we
@@ -120,7 +119,7 @@ pub fn push_filters(node: Node) -> Result<Node> {
120119
121120 // Pushes down parts of a join predicate into the left or right sources
122121 // where possible.
123- fn push_join ( node : Node ) -> Node {
122+ fn maybe_push_join ( node : Node ) -> Node {
124123 let Node :: NestedLoopJoin { mut left, mut right, predicate : Some ( predicate) , outer } = node
125124 else {
126125 return node;
@@ -213,8 +212,10 @@ pub fn push_filters(node: Node) -> Result<Node> {
213212 }
214213
215214 /// Applies pushdown transformations to a node.
216- fn xform ( node : Node ) -> Node {
217- push_join ( push_filter ( node) )
215+ fn xform ( mut node : Node ) -> Node {
216+ node = maybe_push_filter ( node) ;
217+ node = maybe_push_join ( node) ;
218+ node
218219 }
219220
220221 // Push down before descending, so we can keep recursively pushing down.
@@ -223,7 +224,7 @@ pub fn push_filters(node: Node) -> Result<Node> {
223224
224225/// Uses an index or primary key lookup for a filter when possible.
225226pub fn index_lookup ( node : Node ) -> Result < Node > {
226- let transform = | mut node| {
227+ fn xform ( mut node : Node ) -> Node {
227228 // Only handle scan filters. filter_pushdown() must have pushed filters
228229 // into scan nodes first.
229230 let Node :: Scan { table, alias, filter : Some ( filter) } = node else {
@@ -237,7 +238,7 @@ pub fn index_lookup(node: Node) -> Result<Node> {
237238 // index lookup. We could be more clever here, but this is fine.
238239 let Some ( ( i, column) ) = cnf. iter ( ) . enumerate ( ) . find_map ( |( i, expr) | {
239240 expr. is_column_lookup ( )
240- . filter ( |c| * c == table. primary_key || table. columns [ * c] . index )
241+ . filter ( |& c| c == table. primary_key || table. columns [ c] . index )
241242 . map ( |column| ( i, column) )
242243 } ) else {
243244 return Node :: Scan { table, alias, filter : Some ( filter) } ;
@@ -259,19 +260,26 @@ pub fn index_lookup(node: Node) -> Result<Node> {
259260 }
260261
261262 node
262- } ;
263- node. transform ( & Ok , & |n| Ok ( transform ( n) ) )
263+ }
264+
265+ node. transform ( & Ok , & |n| Ok ( xform ( n) ) )
264266}
265267
266268/// Uses a hash join instead of a nested loop join for single-column equijoins.
267- pub fn join_type ( node : Node ) -> Result < Node > {
268- let xform = | node| match node {
269- Node :: NestedLoopJoin {
269+ pub fn hash_join ( node : Node ) -> Result < Node > {
270+ fn xform ( node : Node ) -> Node {
271+ let Node :: NestedLoopJoin {
270272 left,
271273 right,
272274 predicate : Some ( Expression :: Equal ( lhs, rhs) ) ,
273275 outer,
274- } => match ( * lhs, * rhs) {
276+ } = node
277+ else {
278+ return node;
279+ } ;
280+
281+ match ( * lhs, * rhs) {
282+ // If this is a single-column equijoin, use a hash join.
275283 ( Expression :: Column ( mut left_column) , Expression :: Column ( mut right_column) ) => {
276284 // The LHS column may be a column in the right table; swap them.
277285 if right_column < left_column {
@@ -283,18 +291,20 @@ pub fn join_type(node: Node) -> Result<Node> {
283291 right_column -= left. columns ( ) ;
284292 Node :: HashJoin { left, left_column, right, right_column, outer }
285293 }
294+ // Otherwise, retain the nested loop join.
286295 ( lhs, rhs) => {
287296 let predicate = Some ( Expression :: Equal ( lhs. into ( ) , rhs. into ( ) ) ) ;
288297 Node :: NestedLoopJoin { left, right, predicate, outer }
289298 }
290- } ,
291- node => node ,
292- } ;
299+ }
300+ }
301+
293302 node. transform ( & |node| Ok ( xform ( node) ) , & Ok )
294303}
295304
296- /// Short-circuits useless nodes and expressions, by removing them and/or
297- /// replacing them with Nothing nodes that yield no rows.
305+ /// Short-circuits useless nodes and expressions (for example a Filter node that
306+ /// always evaluates to false), by removing them and/or replacing them with
307+ /// Nothing nodes that yield no rows.
298308pub fn short_circuit ( node : Node ) -> Result < Node > {
299309 use Expression :: * ;
300310 use Value :: * ;
0 commit comments