Skip to content
This repository was archived by the owner on Sep 27, 2019. It is now read-only.

Commit eaf7bfb

Browse files
committed
Address comments
1 parent dcabc33 commit eaf7bfb

File tree

8 files changed

+106
-81
lines changed

8 files changed

+106
-81
lines changed

src/include/expression/expression_util.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -401,21 +401,24 @@ class ExpressionUtil {
401401
}
402402

403403
/**
404-
* TODO(boweic): this function may not be efficient, in the future we may want
405-
* to add expressions to groups so that we do not need to walk through the
406-
* expression tree when judging '==' each time
404+
* @brief TODO(boweic): this function may not be efficient, in the future we
405+
* may want to add expressions to groups so that we do not need to walk
406+
* through the expression tree when judging '==' each time
407407
*
408408
* Convert all expression in the current expression tree that is in
409409
* child_expr_map to tuple value expression with corresponding column offset
410410
* of the input child tuple. This is used for handling projection
411411
* on situations like aggregate function (e.g. SELECT sum(a)+max(b) FROM ...
412412
* GROUP BY ...) when input columns contain sum(a) and sum(b). We need to
413413
* treat them as tuple value expression in the projection plan. This function
414-
*should always be called before calling EvaluateExpression
414+
* should always be called before calling EvaluateExpression
415415
*
416416
* Please notice that this function should only apply to copied expression
417-
*since it would modify the current expression. We do not want to modify the
418-
*original expression since it may be referenced in other places
417+
* since it would modify the current expression. We do not want to modify the
418+
* original expression since it may be referenced in other places
419+
*
420+
* @param expr The expression to modify
421+
* @param child_expr_maps map from child column ids to expression
419422
*/
420423
static void ConvertToTvExpr(AbstractExpression *expr,
421424
std::vector<ExprMap> child_expr_maps) {

src/include/optimizer/query_to_operator_transformer.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,6 @@ class QueryToOperatorTransformer : public SqlNodeVisitor {
8585
expression::AbstractExpression *expr,
8686
std::vector<AnnotatedExpression> predicates = {});
8787

88-
// TODO(boweic): Since we haven't migrated all the functionalities needed to
89-
// generate mark-join and single-join to the optimizer, currently this
90-
// function has not been tested, and it may be a bit hard to understand. We
91-
// may integrate the unnesting functionality in the next PR
9288
/**
9389
* @brief Transform a sub-query in an expression to use
9490
*

src/include/optimizer/rule_impls.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,8 @@ class CombineConsecutiveFilter : public Rule {
291291
};
292292

293293
/**
294-
* @brief perform predicate push-down to push a filter through aggregation, also will embed filter into aggregation operator if appropriate.
294+
* @brief perform predicate push-down to push a filter through aggregation, also
295+
* will embed filter into aggregation operator if appropriate.
295296
*/
296297
class PushFilterThroughAggregation : public Rule {
297298
public:
@@ -323,6 +324,9 @@ class EmbedFilterIntoGet : public Rule {
323324

324325
///////////////////////////////////////////////////////////////////////////////
325326
/// Unnesting rules
327+
// We use this promise to determine which rules should be applied first if
328+
// multiple rules are applicable, we need to first pull filters up through mark-join
329+
// then turn mark-join into a regular join operator
326330
enum class UnnestPromise { Low = 1, High };
327331
// TODO(boweic): MarkJoin and SingleJoin should not be transformed into inner
328332
// join. Sometimes MarkJoin could be transformed into semi-join, but for now we

src/include/optimizer/util.h

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,23 @@ class DataTable;
3333
namespace optimizer {
3434
namespace util {
3535

36+
/**
37+
* @brief Convert upper case letters into lower case in a string
38+
*
39+
* @param str The string to operate on
40+
*/
3641
inline void to_lower_string(std::string &str) {
3742
std::transform(str.begin(), str.end(), str.begin(), ::tolower);
3843
}
3944

45+
/**
46+
* @brief Check if a hashset is a subset of the other one
47+
*
48+
* @param super_set The potential super set
49+
* @param child_set The potential child set
50+
*
51+
* @return True if the second set is a subset of the first one
52+
*/
4053
template <class T>
4154
bool IsSubset(const std::unordered_set<T> &super_set,
4255
const std::unordered_set<T> &child_set) {
@@ -46,47 +59,107 @@ bool IsSubset(const std::unordered_set<T> &super_set,
4659
return true;
4760
}
4861

62+
/**
63+
* @brief Perform union operation on 2 hashsets
64+
*
65+
* @param new_set The first set, will hold the result after the union operation
66+
* @param old_set the other set
67+
*/
4968
template <class T>
5069
void SetUnion(std::unordered_set<T> &new_set, std::unordered_set<T> &old_set) {
5170
for (auto element : old_set) new_set.insert(element);
5271
}
5372

73+
/**
74+
* @breif Split conjunction expression tree into a vector of expressions with
75+
* AND
76+
*/
5477
void SplitPredicates(expression::AbstractExpression *expr,
5578
std::vector<expression::AbstractExpression *> &predicates);
5679

80+
/**
81+
* @breif Combine a vector of expressions with AND
82+
*/
5783
expression::AbstractExpression *CombinePredicates(
5884
std::vector<std::shared_ptr<expression::AbstractExpression>> predicates);
5985

86+
/**
87+
* @breif Combine a vector of expressions with AND
88+
*/
6089
expression::AbstractExpression *CombinePredicates(
6190
std::vector<AnnotatedExpression> predicates);
6291

92+
/**
93+
* @brief Extract single table precates and multi-table predicates from the expr
94+
*
95+
* @param expr The original predicate
96+
* @param annotated_predicates The extracted conjunction predicates
97+
*/
6398
std::vector<AnnotatedExpression> ExtractPredicates(
6499
expression::AbstractExpression *expr,
65100
std::vector<AnnotatedExpression> annotated_predicates = {});
66101

102+
/**
103+
* @breif Construct a qualified join predicate (contains a subset of alias in
104+
* the table_alias_set)
105+
* and remove the multitable expressions in the original join_predicates
106+
*
107+
* @return The final join predicate
108+
*/
67109
expression::AbstractExpression *ConstructJoinPredicate(
68110
std::unordered_set<std::string> &table_alias_set,
69111
MultiTablePredicates &join_predicates);
70112

113+
114+
/**
115+
* @breif Check if there are any join columns in the join expression
116+
* For example, expr = (expr_1) AND (expr_2) AND (expr_3)
117+
* we only extract expr_i that have the format (l_table.a = r_table.b)
118+
* i.e. expr that is equality check for tuple columns from both of
119+
* the underlying tables
120+
*/
71121
bool ContainsJoinColumns(const std::unordered_set<std::string> &l_group_alias,
72122
const std::unordered_set<std::string> &r_group_alias,
73123
const expression::AbstractExpression *expr);
74124

125+
/**
126+
* @brief Create a copy plan based on the copy statement
127+
*/
75128
std::unique_ptr<planner::AbstractPlan> CreateCopyPlan(
76129
parser::CopyStatement *copy_stmt);
77130

131+
/**
132+
* @brief Construct the map from subquery column name to the actual expression
133+
* at the subquery level, for example SELECT a FROM (SELECT a + b as a FROM
134+
* test), we'll build the map {"a" -> a + b}
135+
*
136+
* @param select_list The select list of a subquery
137+
*
138+
* @return The mapping mentioned above
139+
*/
78140
std::unordered_map<std::string, std::shared_ptr<expression::AbstractExpression>>
79141
ConstructSelectElementMap(
80142
std::vector<std::unique_ptr<expression::AbstractExpression>> &select_list);
81143

144+
/**
145+
* @brief Walk a predicate, transform the tuple value expression into the actual
146+
* expression in the sub-query
147+
*
148+
* @param alias_to_expr_map The column name to expression map
149+
* @param expr the predicate
150+
*
151+
* @return the transformed predicate
152+
*/
82153
expression::AbstractExpression *TransformQueryDerivedTablePredicates(
83154
const std::unordered_map<std::string,
84155
std::shared_ptr<expression::AbstractExpression>>
85156
&alias_to_expr_map,
86157
expression::AbstractExpression *expr);
87158

88-
// Extract equi-join keys from the join predicates (conjunction is already
89-
// removed in each unit)
159+
/**
160+
* @brief Walk through a set of join predicates, generate join keys base on the
161+
* left/right table aliases set
162+
*/
90163
void ExtractEquiJoinKeys(
91164
const std::vector<AnnotatedExpression> join_predicates,
92165
std::vector<std::unique_ptr<expression::AbstractExpression>> &left_keys,

src/optimizer/input_column_deriver.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,8 @@ void InputColumnDeriver::AggregateHelper(const BaseOperatorNode *op) {
219219
// Check having predicate, since the predicate may use columns other than
220220
// output columns
221221
for (auto &having_expr : having_exprs) {
222+
// We perform aggregate here so the output contains aggregate exprs while
223+
// input should contain all tuple value exprs used to perform aggregation.
222224
expression::ExpressionUtil::GetTupleValueExprs(input_cols_set,
223225
having_expr.expr.get());
224226
expression::ExpressionUtil::GetTupleAndAggregateExprs(
@@ -275,7 +277,8 @@ void InputColumnDeriver::JoinHelper(const BaseOperatorNode *op) {
275277
}
276278
ExprMap output_cols_map;
277279
for (auto expr : required_cols_) {
278-
expression::ExpressionUtil::GetTupleAndAggregateExprs(output_cols_map, expr);
280+
expression::ExpressionUtil::GetTupleAndAggregateExprs(output_cols_map,
281+
expr);
279282
}
280283
for (auto &expr_idx_pair : output_cols_map) {
281284
input_cols_set.insert(expr_idx_pair.first);

src/optimizer/optimizer_task.cpp

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,7 @@ void OptimizerTask::ConstructValidRules(
2929
GroupExpression *group_expr, OptimizeContext *context,
3030
std::vector<std::unique_ptr<Rule>> &rules,
3131
std::vector<RuleWithPromise> &valid_rules) {
32-
// LOG_DEBUG("Child size %lu", group_expr->GetChildrenGroupsSize());
3332
for (auto &rule : rules) {
34-
// LOG_DEBUG("Op %d, Pattern Op %d",
35-
// static_cast<int>(group_expr->Op().type()),
36-
// static_cast<int>(rule->GetMatchPattern()->Type()));
3733
if (group_expr->Op().type() !=
3834
rule->GetMatchPattern()->Type() || // Root pattern type mismatch
3935
group_expr->HasRuleExplored(rule.get()) || // Rule has been applied
@@ -61,7 +57,7 @@ RuleSet &OptimizerTask::GetRuleSet() const {
6157
// OptimizeGroup
6258
//===--------------------------------------------------------------------===//
6359
void OptimizeGroup::execute() {
64-
// LOG_DEBUG("OptimizeGroup::Execute() group %d", group_->GetID());
60+
LOG_TRACE("OptimizeGroup::Execute() group %d", group_->GetID());
6561
if (group_->GetCostLB() > context_->cost_upper_bound || // Cost LB > Cost UB
6662
group_->GetBestExpression(context_->required_prop) !=
6763
nullptr) // Has optimized given the context
@@ -98,8 +94,8 @@ void OptimizeExpression::execute() {
9894
GetRuleSet().GetImplementationRules(), valid_rules);
9995

10096
std::sort(valid_rules.begin(), valid_rules.end());
101-
// LOG_DEBUG("OptimizeExpression::execute() op %d, valid rules : %lu",
102-
// static_cast<int>(group_expr_->Op().type()), valid_rules.size());
97+
LOG_TRACE("OptimizeExpression::execute() op %d, valid rules : %lu",
98+
static_cast<int>(group_expr_->Op().type()), valid_rules.size());
10399

104100
// Apply rule
105101
for (auto &r : valid_rules) {
@@ -125,7 +121,7 @@ void OptimizeExpression::execute() {
125121
//===--------------------------------------------------------------------===//
126122
void ExploreGroup::execute() {
127123
if (group_->HasExplored()) return;
128-
// LOG_DEBUG("ExploreGroup::execute() ");
124+
LOG_TRACE("ExploreGroup::execute() ");
129125

130126
for (auto &logical_expr : group_->GetLogicalExpressions()) {
131127
PushTask(new ExploreExpression(logical_expr.get(), context_));
@@ -140,7 +136,7 @@ void ExploreGroup::execute() {
140136
// ExploreExpression
141137
//===--------------------------------------------------------------------===//
142138
void ExploreExpression::execute() {
143-
// LOG_DEBUG("ExploreExpression::execute() ");
139+
LOG_TRACE("ExploreExpression::execute() ");
144140
std::vector<RuleWithPromise> valid_rules;
145141

146142
// Construct valid transformation rules from rule set
@@ -172,7 +168,7 @@ void ExploreExpression::execute() {
172168
// ApplyRule
173169
//===--------------------------------------------------------------------===//
174170
void ApplyRule::execute() {
175-
// LOG_DEBUG("ApplyRule::execute() ");
171+
LOG_TRACE("ApplyRule::execute() ");
176172
if (group_expr_->HasRuleExplored(rule_)) return;
177173

178174
GroupExprBindingIterator iterator(GetMemo(), group_expr_,
@@ -262,7 +258,7 @@ void DeriveStats::execute() {
262258
//===--------------------------------------------------------------------===//
263259
void OptimizeInputs::execute() {
264260
// Init logic: only run once per task
265-
// LOG_DEBUG("OptimizeInputs::execute() ");
261+
LOG_TRACE("OptimizeInputs::execute() ");
266262
if (cur_child_idx_ == -1) {
267263
// TODO(patrick):
268264
// 1. We can init input cost using non-zero value for pruning

src/optimizer/query_to_operator_transformer.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ void QueryToOperatorTransformer::Visit(parser::SelectStatement *op) {
7474
// Plain aggregation
7575
std::shared_ptr<OperatorExpression> agg_expr;
7676
if (op->group_by == nullptr) {
77-
// TODO(boweic): aggregation without groupby could still have having clause
77+
// TODO(boweic): aggregation without groupby could still have having
78+
// clause
7879
agg_expr = std::make_shared<OperatorExpression>(
7980
LogicalAggregateAndGroupBy::make());
8081
agg_expr->PushChild(output_expr_);
@@ -421,7 +422,7 @@ bool QueryToOperatorTransformer::IsSupportedConjunctivePredicate(
421422
expr_type == ExpressionType::COMPARE_GREATERTHANOREQUALTO ||
422423
expr_type == ExpressionType::COMPARE_LESSTHAN ||
423424
expr_type == ExpressionType::COMPARE_LESSTHANOREQUALTO) {
424-
// Supported is one child is subquery and the other is not
425+
// Supported if one child is subquery and the other is not
425426
if ((!expr->GetChild(0)->HasSubquery() &&
426427
expr->GetChild(1)->GetExpressionType() ==
427428
ExpressionType::ROW_SUBQUERY) ||
@@ -450,7 +451,9 @@ bool QueryToOperatorTransformer::IsSupportedSubSelect(
450451
std::vector<expression::AbstractExpression *> predicates;
451452
util::SplitPredicates(op->where_clause.get(), predicates);
452453
for (const auto &pred : predicates) {
453-
// If correlated predicate
454+
// Depth is used to detect correlated subquery, it is set in the binder,
455+
// if a TVE has depth less than the depth of the current operator, then it
456+
// is correlated predicate
454457
if (pred->GetDepth() < op->depth) {
455458
if (pred->GetExpressionType() != ExpressionType::COMPARE_EQUAL) {
456459
return false;

0 commit comments

Comments
 (0)