Skip to content

Commit 6f6cec4

Browse files
committed
sql: clean up planner module
1 parent 0839215 commit 6f6cec4

File tree

7 files changed

+154
-98
lines changed

7 files changed

+154
-98
lines changed

src/sql/planner/optimizer.rs

Lines changed: 45 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,17 @@ pub static OPTIMIZERS: &[(&str, Optimizer)] = &[
1313
("Constant folding", fold_constants),
1414
("Filter pushdown", push_filters),
1515
("Index lookup", index_lookup),
16-
("Join type", join_type),
16+
("Hash join", hash_join),
1717
("Short circuit", short_circuit),
1818
];
1919

20-
/// Folds constant (sub)expressions by pre-evaluating them, instead of
21-
/// re-evaluating then for every row during execution.
20+
/// Folds constant (sub)expressions by pre-evaluating them once, instead of
21+
/// re-evaluating them for every row during execution.
2222
pub fn fold_constants(node: Node) -> Result<Node> {
23-
use Expression::*;
24-
use Value::*;
23+
fn xform(mut expr: Expression) -> Result<Expression> {
24+
use Expression::*;
25+
use Value::*;
2526

26-
// Transform expressions. Called after descending, to perform logical
27-
// short-circuiting on child expressions that have already been folded, and
28-
// to reduce the quadratic cost when an expression contains a column.
29-
let xform = |mut expr: Expression| {
3027
// If the expression is constant, evaluate it.
3128
//
3229
// This is a very simple approach, which doesn't handle more complex
@@ -39,7 +36,9 @@ pub fn fold_constants(node: Node) -> Result<Node> {
3936
}
4037

4138
// If the expression is a logical operator, and one of the sides is
42-
// known, we may be able to short-circuit it.
39+
// constant, we may be able to evaluate it even if it has a column
40+
// reference. For example, a AND FALSE is always FALSE, regardless of
41+
// what a is.
4342
expr = match expr {
4443
And(lhs, rhs) => match (*lhs, *rhs) {
4544
// If either side of an AND is false, the AND is false.
@@ -62,14 +61,18 @@ pub fn fold_constants(node: Node) -> Result<Node> {
6261
expr => expr,
6362
};
6463
Ok(expr)
65-
};
64+
}
6665

66+
// Transform after descending, to perform logical short-circuiting on child
67+
// expressions that have already been folded, and to reduce the quadratic
68+
// cost when an expression contains a column.
6769
node.transform(&|node| node.transform_expressions(&Ok, &xform), &Ok)
6870
}
6971

7072
/// Pushes filter predicates down into child nodes where possible. In
7173
/// particular, this can allow filtering during storage scans (below Raft),
72-
/// instead of reading and transmitting all rows then filtering.
74+
/// instead of reading and transmitting all rows then filtering, by pushing
75+
/// a predicate from a Filter node down into a Scan node.
7376
pub fn push_filters(node: Node) -> Result<Node> {
7477
/// Pushes an expression into a node if possible. Otherwise, returns the the
7578
/// unpushed expression.
@@ -92,23 +95,19 @@ pub fn push_filters(node: Node) -> Result<Node> {
9295
None => Some(expr),
9396
};
9497
}
95-
// We don't handle HashJoin here, since we assume the join_type()
96-
// optimizer runs after this.
97-
Node::HashJoin { .. } => panic!("filter pushdown must run before join optimizer"),
9898
// Unable to push down, just return the original expression.
9999
_ => return Some(expr),
100100
}
101101
None
102102
}
103103

104-
/// Pushes down a filter node if possible.
105-
fn push_filter(node: Node) -> Node {
104+
/// Pushes a filter node predicate down into its source, if possible.
105+
fn maybe_push_filter(node: Node) -> Node {
106106
let Node::Filter { mut source, predicate } = node else {
107107
return node;
108108
};
109-
// Attempt to push the filter into the source.
109+
// Attempt to push the filter into the source, or return the original.
110110
if let Some(predicate) = push_into(predicate, &mut source) {
111-
// Push failed, return the original filter node.
112111
return Node::Filter { source, predicate };
113112
}
114113
// Push succeded, return the source that was pushed into. When we
@@ -120,7 +119,7 @@ pub fn push_filters(node: Node) -> Result<Node> {
120119

121120
// Pushes down parts of a join predicate into the left or right sources
122121
// where possible.
123-
fn push_join(node: Node) -> Node {
122+
fn maybe_push_join(node: Node) -> Node {
124123
let Node::NestedLoopJoin { mut left, mut right, predicate: Some(predicate), outer } = node
125124
else {
126125
return node;
@@ -213,8 +212,10 @@ pub fn push_filters(node: Node) -> Result<Node> {
213212
}
214213

215214
/// Applies pushdown transformations to a node.
216-
fn xform(node: Node) -> Node {
217-
push_join(push_filter(node))
215+
fn xform(mut node: Node) -> Node {
216+
node = maybe_push_filter(node);
217+
node = maybe_push_join(node);
218+
node
218219
}
219220

220221
// Push down before descending, so we can keep recursively pushing down.
@@ -223,7 +224,7 @@ pub fn push_filters(node: Node) -> Result<Node> {
223224

224225
/// Uses an index or primary key lookup for a filter when possible.
225226
pub fn index_lookup(node: Node) -> Result<Node> {
226-
let transform = |mut node| {
227+
fn xform(mut node: Node) -> Node {
227228
// Only handle scan filters. filter_pushdown() must have pushed filters
228229
// into scan nodes first.
229230
let Node::Scan { table, alias, filter: Some(filter) } = node else {
@@ -237,7 +238,7 @@ pub fn index_lookup(node: Node) -> Result<Node> {
237238
// index lookup. We could be more clever here, but this is fine.
238239
let Some((i, column)) = cnf.iter().enumerate().find_map(|(i, expr)| {
239240
expr.is_column_lookup()
240-
.filter(|c| *c == table.primary_key || table.columns[*c].index)
241+
.filter(|&c| c == table.primary_key || table.columns[c].index)
241242
.map(|column| (i, column))
242243
}) else {
243244
return Node::Scan { table, alias, filter: Some(filter) };
@@ -259,19 +260,26 @@ pub fn index_lookup(node: Node) -> Result<Node> {
259260
}
260261

261262
node
262-
};
263-
node.transform(&Ok, &|n| Ok(transform(n)))
263+
}
264+
265+
node.transform(&Ok, &|n| Ok(xform(n)))
264266
}
265267

266268
/// Uses a hash join instead of a nested loop join for single-column equijoins.
267-
pub fn join_type(node: Node) -> Result<Node> {
268-
let xform = |node| match node {
269-
Node::NestedLoopJoin {
269+
pub fn hash_join(node: Node) -> Result<Node> {
270+
fn xform(node: Node) -> Node {
271+
let Node::NestedLoopJoin {
270272
left,
271273
right,
272274
predicate: Some(Expression::Equal(lhs, rhs)),
273275
outer,
274-
} => match (*lhs, *rhs) {
276+
} = node
277+
else {
278+
return node;
279+
};
280+
281+
match (*lhs, *rhs) {
282+
// If this is a single-column equijoin, use a hash join.
275283
(Expression::Column(mut left_column), Expression::Column(mut right_column)) => {
276284
// The LHS column may be a column in the right table; swap them.
277285
if right_column < left_column {
@@ -283,18 +291,20 @@ pub fn join_type(node: Node) -> Result<Node> {
283291
right_column -= left.columns();
284292
Node::HashJoin { left, left_column, right, right_column, outer }
285293
}
294+
// Otherwise, retain the nested loop join.
286295
(lhs, rhs) => {
287296
let predicate = Some(Expression::Equal(lhs.into(), rhs.into()));
288297
Node::NestedLoopJoin { left, right, predicate, outer }
289298
}
290-
},
291-
node => node,
292-
};
299+
}
300+
}
301+
293302
node.transform(&|node| Ok(xform(node)), &Ok)
294303
}
295304

296-
/// Short-circuits useless nodes and expressions, by removing them and/or
297-
/// replacing them with Nothing nodes that yield no rows.
305+
/// Short-circuits useless nodes and expressions (for example a Filter node that
306+
/// always evaluates to false), by removing them and/or replacing them with
307+
/// Nothing nodes that yield no rows.
298308
pub fn short_circuit(node: Node) -> Result<Node> {
299309
use Expression::*;
300310
use Value::*;

0 commit comments

Comments
 (0)