Skip to content
This repository was archived by the owner on Sep 27, 2019. It is now read-only.

Commit 1fc8b55

Browse files
GustavoAnguloapavlo
authored andcommitted
Add cardinality estimate to AbstractPlan object (#1475)
* Clarified variable name * Added cardinality and test * Add optimizer testing class * Remove debugging code * Revert default estimate to fix broken test * Formatting * Removed unecessary override in Cardinality test * More comments and clean up tests
1 parent e738acb commit 1fc8b55

14 files changed

+254
-49
lines changed

src/include/optimizer/optimizer_task.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ class OptimizeInputs : public OptimizerTask {
211211
GroupExpression *group_expr_;
212212
double cur_total_cost_;
213213
int cur_child_idx_ = -1;
214-
int pre_child_idx_ = -1;
214+
int prev_child_idx_ = -1;
215215
int cur_prop_pair_idx_ = 0;
216216
};
217217

src/include/optimizer/plan_generator.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
//
33
// Peloton
44
//
5-
// operator_to_plan_transformer.h
5+
// plan_generator.h
66
//
7-
// Identification: src/include/optimizer/operator_to_plan_transformer.h
7+
// Identification: src/include/optimizer/plan_generator.h
88
//
9-
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
9+
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
1010
//
1111
//===----------------------------------------------------------------------===//
1212

@@ -46,7 +46,8 @@ class PlanGenerator : public OperatorVisitor {
4646
std::vector<expression::AbstractExpression *> required_cols,
4747
std::vector<expression::AbstractExpression *> output_cols,
4848
std::vector<std::unique_ptr<planner::AbstractPlan>> &children_plans,
49-
std::vector<ExprMap> children_expr_map);
49+
std::vector<ExprMap> children_expr_map,
50+
int estimated_cardinality);
5051

5152
void Visit(const DummyScan *) override;
5253

src/include/optimizer/stats_calculator.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
//
33
// Peloton
44
//
5-
// cost_and_stats_calculator.h
5+
// stats_calculator.h
66
//
77
// Identification: src/include/optimizer/stats_calculator.h
88
//
9-
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
9+
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
1010
//
1111
//===----------------------------------------------------------------------===//
1212

@@ -56,13 +56,13 @@ class StatsCalculator : public OperatorVisitor {
5656
std::unordered_map<std::string, std::shared_ptr<ColumnStats>> &stats,
5757
bool copy);
5858
/**
59-
* @brief Update selectivity for predicate evaluation
59+
* @brief Return estimated cardinality for a filter
6060
*
6161
* @param num_rows Number of rows of base table
6262
* @param predicate_stats The stats for columns in the expression
6363
* @param predicates conjunction predicates
6464
*/
65-
void UpdateStatsForFilter(
65+
size_t EstimateCardinalityForFilter(
6666
size_t num_rows,
6767
std::unordered_map<std::string, std::shared_ptr<ColumnStats>>
6868
&predicate_stats,

src/include/planner/abstract_plan.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ class AbstractPlan : public Printable {
8585
// Get the estimated cardinality of this plan
8686
int GetCardinality() const { return estimated_cardinality_; }
8787

88-
// TODO: This is only for testing now. When the optimizer is ready, we should
89-
// delete this function and pass this information to constructor
88+
// FOR TESTING ONLY. This function should only be called during construction of plan (ConvertOpExpression) or
89+
// for tests.
9090
void SetCardinality(int cardinality) { estimated_cardinality_ = cardinality; }
9191

9292
//===--------------------------------------------------------------------===//
@@ -152,9 +152,7 @@ class AbstractPlan : public Printable {
152152
std::vector<std::unique_ptr<AbstractPlan>> children_;
153153

154154
AbstractPlan *parent_ = nullptr;
155-
156-
// TODO: This field is harded coded now. This needs to be changed when
157-
// optimizer has the cost model and cardinality estimation
155+
158156
int estimated_cardinality_ = 500000;
159157

160158
private:

src/optimizer/child_stats_deriver.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
//
33
// Peloton
44
//
5-
// cost_and_stats_calculator.h
5+
// child_stats_deriver.cpp
66
//
7-
// Identification: src/optimizer/stats_calculator.cpp
7+
// Identification: src/optimizer/child_stats_deriver.cpp
88
//
9-
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
9+
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
1010
//
1111
//===----------------------------------------------------------------------===//
1212

@@ -67,7 +67,7 @@ void ChildStatsDeriver::PassDownColumn(expression::AbstractExpression *col) {
6767
auto child_group = memo_->GetGroupByID(gexpr_->GetChildGroupId(idx));
6868
if (child_group->GetTableAliases().count(tv_expr->GetTableName()) &&
6969
// If we have not derived the column stats yet
70-
child_group->HasColumnStats(tv_expr->GetColFullName())) {
70+
!child_group->HasColumnStats(tv_expr->GetColFullName())) {
7171
output_[idx].insert(col);
7272
break;
7373
}

src/optimizer/optimizer.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
//
77
// Identification: src/optimizer/optimizer.cpp
88
//
9-
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
9+
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
1010
//
1111
//===----------------------------------------------------------------------===//
1212

@@ -366,8 +366,7 @@ unique_ptr<planner::AbstractPlan> Optimizer::ChooseBestPlan(
366366
PlanGenerator generator;
367367
auto plan = generator.ConvertOpExpression(op, required_props, required_cols,
368368
output_cols, children_plans,
369-
children_expr_map);
370-
369+
children_expr_map, group->GetNumRows());
371370
LOG_TRACE("Finish Choosing best plan for group %d", id);
372371
return plan;
373372
}

src/optimizer/optimizer_task.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// Peloton
44
//
5-
// rule.h
5+
// optimizer_task.cpp
66
//
77
// Identification: src/optimizer/optimizer_task.cpp
88
//
@@ -313,14 +313,13 @@ void OptimizeInputs::execute() {
313313
cur_total_cost_ += child_best_expr->GetCost(i_prop);
314314
// Pruning
315315
if (cur_total_cost_ > context_->cost_upper_bound) break;
316-
} else if (pre_child_idx_ !=
317-
cur_child_idx_) { // First time to optimize child group
318-
pre_child_idx_ = cur_child_idx_;
316+
} else if (prev_child_idx_ !=
317+
cur_child_idx_) { // We haven't optimized child group
318+
prev_child_idx_ = cur_child_idx_;
319319
PushTask(new OptimizeInputs(this));
320320
PushTask(new OptimizeGroup(
321321
child_group, std::make_shared<OptimizeContext>(
322-
context_->metadata, i_prop,
323-
context_->cost_upper_bound - cur_total_cost_)));
322+
context_->metadata, i_prop, context_->cost_upper_bound - cur_total_cost_)));
324323
return;
325324
} else { // If we return from OptimizeGroup, then there is no expr for
326325
// the context
@@ -401,7 +400,7 @@ void OptimizeInputs::execute() {
401400
}
402401

403402
// Reset child idx and total cost
404-
pre_child_idx_ = -1;
403+
prev_child_idx_ = -1;
405404
cur_child_idx_ = 0;
406405
cur_total_cost_ = 0;
407406
}

src/optimizer/plan_generator.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
//
33
// Peloton
44
//
5-
// operator_to_plan_transformer.cpp
5+
// plan_generator.cpp
66
//
7-
// Identification: src/optimizer/operator_to_plan_transformer.cpp
7+
// Identification: src/optimizer/plan_generator.cpp
88
//
99
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
1010
//
@@ -59,14 +59,16 @@ unique_ptr<planner::AbstractPlan> PlanGenerator::ConvertOpExpression(
5959
vector<expression::AbstractExpression *> required_cols,
6060
vector<expression::AbstractExpression *> output_cols,
6161
vector<unique_ptr<planner::AbstractPlan>> &children_plans,
62-
vector<ExprMap> children_expr_map) {
62+
vector<ExprMap> children_expr_map,
63+
int estimated_cardinality) {
6364
required_props_ = move(required_props);
6465
required_cols_ = move(required_cols);
6566
output_cols_ = move(output_cols);
6667
children_plans_ = move(children_plans);
6768
children_expr_map_ = move(children_expr_map);
6869
op->Op().Accept(this);
6970
BuildProjectionPlan();
71+
output_plan_->SetCardinality(estimated_cardinality);
7072
return move(output_plan_);
7173
}
7274

src/optimizer/stats/table_stats.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,12 +120,10 @@ double TableStats::GetCardinality(const std::string column_name) {
120120
return column_stats->cardinality;
121121
}
122122

123+
// Returns true if we have column stats for a specific column
123124
bool TableStats::HasColumnStats(const std::string col_name) {
124125
auto it = col_name_to_stats_map_.find(col_name);
125-
if (it == col_name_to_stats_map_.end()) {
126-
return false;
127-
}
128-
return true;
126+
return it != col_name_to_stats_map_.end();
129127
}
130128

131129
std::shared_ptr<ColumnStats> TableStats::GetColumnStats(

src/optimizer/stats_calculator.cpp

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
//
33
// Peloton
44
//
5-
// cost_and_stats_calculator.h
5+
// stats_calculator.cpp
66
//
77
// Identification: src/optimizer/stats_calculator.cpp
88
//
9-
// Copyright (c) 2015-16, Carnegie Mellon University Database Group
9+
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
1010
//
1111
//===----------------------------------------------------------------------===//
1212

@@ -64,18 +64,19 @@ void StatsCalculator::Visit(const LogicalGet *op) {
6464
AddBaseTableStats(col, table_stats, predicate_stats, false);
6565
}
6666
}
67-
// Use predicates to update the stats accordingly
68-
UpdateStatsForFilter(
69-
table_stats->GetColumnCount() == 0 ? 0 : table_stats->num_rows,
70-
predicate_stats, op->predicates);
67+
// Use predicates to estimate cardinality. If we were unable to find any column stats from the catalog, default to 0
68+
if (table_stats->GetColumnCount() == 0) {
69+
root_group->SetNumRows(0);
70+
} else {
71+
root_group->SetNumRows(EstimateCardinalityForFilter(table_stats->num_rows, predicate_stats, op->predicates));
72+
}
7173
}
7274
// Add the stats to the group
7375
for (auto &column_name_stats_pair : required_stats) {
7476
auto &column_name = column_name_stats_pair.first;
7577
auto &column_stats = column_name_stats_pair.second;
7678
column_stats->num_rows = root_group->GetNumRows();
77-
memo_->GetGroupByID(gexpr_->GetGroupID())
78-
->AddStats(column_name, column_stats);
79+
root_group->AddStats(column_name, column_stats);
7980
}
8081
}
8182

@@ -233,7 +234,7 @@ void StatsCalculator::AddBaseTableStats(
233234
}
234235
}
235236

236-
void StatsCalculator::UpdateStatsForFilter(
237+
size_t StatsCalculator::EstimateCardinalityForFilter(
237238
size_t num_rows,
238239
std::unordered_map<std::string, std::shared_ptr<ColumnStats>>
239240
&predicate_stats,
@@ -255,7 +256,7 @@ void StatsCalculator::UpdateStatsForFilter(
255256
annotated_expr.expr.get());
256257
}
257258
// Update selectivity
258-
memo_->GetGroupByID(gexpr_->GetGroupID())->SetNumRows(num_rows * selectivity);
259+
return num_rows * selectivity;
259260
}
260261

261262
// Calculate the selectivity given the predicate and the stats of columns in the

0 commit comments

Comments
 (0)