Skip to content
This repository was archived by the owner on Sep 27, 2019. It is now read-only.

Commit f83d538

Browse files
chenboytli2
authored andcommitted
Apply limit + order optimization in the optimizer (#1385)
* Apply limit + order optimization in the optimizer
1 parent a045cfc commit f83d538

File tree

9 files changed

+151
-42
lines changed

9 files changed

+151
-42
lines changed

src/include/optimizer/operators.h

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,9 +299,18 @@ class LogicalDistinct : public OperatorNode<LogicalDistinct> {
299299
//===--------------------------------------------------------------------===//
300300
class LogicalLimit : public OperatorNode<LogicalLimit> {
301301
public:
302-
static Operator make(int64_t offset, int64_t limit);
302+
static Operator make(
303+
int64_t offset, int64_t limit,
304+
std::vector<expression::AbstractExpression *> &&sort_exprs,
305+
std::vector<bool> &&sort_ascending);
303306
int64_t offset;
304307
int64_t limit;
308+
// When we get a query like "SELECT * FROM tab ORDER BY a LIMIT 5"
309+
// We'll let the limit operator keep the order by clause's content as an
310+
// internal order, then the limit operator will generate sort plan with
311+
// limit as a optimization.
312+
std::vector<expression::AbstractExpression *> sort_exprs;
313+
std::vector<bool> sort_ascending;
305314
};
306315

307316
//===--------------------------------------------------------------------===//
@@ -470,9 +479,18 @@ class PhysicalOrderBy : public OperatorNode<PhysicalOrderBy> {
470479
//===--------------------------------------------------------------------===//
471480
class PhysicalLimit : public OperatorNode<PhysicalLimit> {
472481
public:
473-
static Operator make(int64_t offset, int64_t limit);
482+
static Operator make(
483+
int64_t offset, int64_t limit,
484+
std::vector<expression::AbstractExpression *> sort_columns,
485+
std::vector<bool> sort_ascending);
474486
int64_t offset;
475487
int64_t limit;
488+
// When we get a query like "SELECT * FROM tab ORDER BY a LIMIT 5"
489+
// We'll let the limit operator keep the order by clause's content as an
490+
// internal order, then the limit operator will generate sort plan with
491+
// limit as a optimization.
492+
std::vector<expression::AbstractExpression *> sort_exprs;
493+
std::vector<bool> sort_acsending;
476494
};
477495

478496
//===--------------------------------------------------------------------===//

src/include/planner/order_by_plan.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ class OrderByPlan : public AbstractPlan {
2828
const std::vector<bool> &descend_flags,
2929
const std::vector<oid_t> &output_column_ids);
3030

31+
OrderByPlan(const std::vector<oid_t> &sort_keys,
32+
const std::vector<bool> &descend_flags,
33+
const std::vector<oid_t> &output_column_ids, const uint64_t limit,
34+
const uint64_t offset);
35+
3136
void PerformBinding(BindingContext &binding_context) override;
3237

3338
//===--------------------------------------------------------------------===//
@@ -51,13 +56,21 @@ class OrderByPlan : public AbstractPlan {
5156
return output_ais_;
5257
}
5358

54-
inline PlanNodeType GetPlanNodeType() const override { return PlanNodeType::ORDERBY; }
59+
inline PlanNodeType GetPlanNodeType() const override {
60+
return PlanNodeType::ORDERBY;
61+
}
5562

5663
void GetOutputColumns(std::vector<oid_t> &columns) const override {
5764
columns = GetOutputColumnIds();
5865
}
5966

60-
const std::string GetInfo() const override { return "OrderBy"; }
67+
const std::string GetInfo() const override {
68+
return std::string("OrderBy") +
69+
(limit_
70+
? "(Limit : " + std::to_string(limit_number_) + ", Offset : " +
71+
std::to_string(limit_offset_) + ")"
72+
: "");
73+
}
6174

6275
void SetUnderlyingOrder(bool same_order) { underling_ordered_ = same_order; }
6376

src/optimizer/child_property_deriver.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,17 @@ void ChildPropertyDeriver::Visit(const PhysicalAggregate *) {
136136
vector<shared_ptr<PropertySet>>{make_shared<PropertySet>()}));
137137
}
138138

139-
void ChildPropertyDeriver::Visit(const PhysicalLimit *) {
140-
// Let child fulfil all the required properties
141-
vector<shared_ptr<PropertySet>> child_input_properties{requirements_};
139+
void ChildPropertyDeriver::Visit(const PhysicalLimit *op) {
140+
// Limit fulfill the internal sort property
141+
vector<shared_ptr<PropertySet>> child_input_properties{
142+
std::make_shared<PropertySet>()};
143+
std::shared_ptr<PropertySet> provided_prop(new PropertySet);
144+
if (!op->sort_exprs.empty()) {
145+
provided_prop->AddProperty(
146+
std::make_shared<PropertySort>(op->sort_exprs, op->sort_acsending));
147+
}
142148

143-
output_.push_back(make_pair(requirements_, move(child_input_properties)));
149+
output_.push_back(make_pair(provided_prop, move(child_input_properties)));
144150
}
145151

146152
void ChildPropertyDeriver::Visit(const PhysicalDistinct *) {

src/optimizer/input_column_deriver.cpp

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,18 +84,40 @@ void InputColumnDeriver::Visit(const QueryDerivedScan *op) {
8484
output_cols, {input_cols}};
8585
}
8686

87-
void InputColumnDeriver::Visit(const PhysicalLimit *) { Passdown(); }
87+
void InputColumnDeriver::Visit(const PhysicalLimit *op) {
88+
// All aggregate expressions and TVEs in the required columns and internal
89+
// sort columns are needed by the child node
90+
ExprSet input_cols_set;
91+
for (auto expr : required_cols_) {
92+
if (expression::ExpressionUtil::IsAggregateExpression(expr)) {
93+
input_cols_set.insert(expr);
94+
} else {
95+
expression::ExpressionUtil::GetTupleValueExprs(input_cols_set, expr);
96+
}
97+
}
98+
for (const auto& sort_column : op->sort_exprs) {
99+
input_cols_set.insert(sort_column);
100+
}
101+
vector<AbstractExpression *> cols;
102+
for (const auto &expr : input_cols_set) {
103+
cols.push_back(expr);
104+
}
105+
output_input_cols_ =
106+
pair<vector<AbstractExpression *>, vector<vector<AbstractExpression *>>>{
107+
cols, {cols}};
108+
}
88109

89110
void InputColumnDeriver::Visit(const PhysicalOrderBy *) {
90111
// we need to pass down both required columns and sort columns
91112
auto prop = properties_->GetPropertyOfType(PropertyType::SORT);
92113
PELOTON_ASSERT(prop.get() != nullptr);
93114
ExprSet input_cols_set;
94115
for (auto expr : required_cols_) {
95-
if (expression::ExpressionUtil::IsAggregateExpression(expr))
116+
if (expression::ExpressionUtil::IsAggregateExpression(expr)) {
96117
input_cols_set.insert(expr);
97-
else
118+
} else {
98119
expression::ExpressionUtil::GetTupleValueExprs(input_cols_set, expr);
120+
}
99121
}
100122
auto sort_prop = prop->As<PropertySort>();
101123
size_t sort_col_size = sort_prop->GetSortColumnSize();

src/optimizer/operators.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -443,10 +443,15 @@ Operator LogicalDistinct::make() {
443443
//===--------------------------------------------------------------------===//
444444
// Limit
445445
//===--------------------------------------------------------------------===//
446-
Operator LogicalLimit::make(int64_t offset, int64_t limit) {
446+
Operator LogicalLimit::make(
447+
int64_t offset, int64_t limit,
448+
std::vector<expression::AbstractExpression *> &&sort_exprs,
449+
std::vector<bool> &&sort_ascending) {
447450
LogicalLimit *limit_op = new LogicalLimit;
448451
limit_op->offset = offset;
449452
limit_op->limit = limit;
453+
limit_op->sort_exprs = std::move(sort_exprs);
454+
limit_op->sort_ascending = std::move(sort_ascending);
450455
return Operator(limit_op);
451456
}
452457

@@ -656,10 +661,15 @@ Operator PhysicalOrderBy::make() {
656661
//===--------------------------------------------------------------------===//
657662
// PhysicalLimit
658663
//===--------------------------------------------------------------------===//
659-
Operator PhysicalLimit::make(int64_t offset, int64_t limit) {
664+
Operator PhysicalLimit::make(
665+
int64_t offset, int64_t limit,
666+
std::vector<expression::AbstractExpression *> sort_exprs,
667+
std::vector<bool> sort_ascending) {
660668
PhysicalLimit *limit_op = new PhysicalLimit;
661669
limit_op->offset = offset;
662670
limit_op->limit = limit;
671+
limit_op->sort_exprs = sort_exprs;
672+
limit_op->sort_acsending = sort_ascending;
663673
return Operator(limit_op);
664674
}
665675

src/optimizer/plan_generator.cpp

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,35 @@ void PlanGenerator::Visit(const QueryDerivedScan *) {
156156
}
157157

158158
void PlanGenerator::Visit(const PhysicalLimit *op) {
159+
// Generate order by + limit plan when there's internal sort order
160+
output_plan_ = std::move(children_plans_[0]);
161+
if (!op->sort_exprs.empty()) {
162+
vector<oid_t> column_ids;
163+
PELOTON_ASSERT(children_expr_map_.size() == 1);
164+
auto &child_cols_map = children_expr_map_[0];
165+
for (size_t i = 0; i < output_cols_.size(); ++i) {
166+
column_ids.push_back(child_cols_map[output_cols_[i]]);
167+
}
168+
169+
PELOTON_ASSERT(op->sort_exprs.size() == op->sort_acsending.size());
170+
auto sort_columns_size = op->sort_exprs.size();
171+
vector<oid_t> sort_col_ids;
172+
vector<bool> sort_flags;
173+
for (size_t i = 0; i < sort_columns_size; ++i) {
174+
sort_col_ids.push_back(child_cols_map[op->sort_exprs[i]]);
175+
// planner use desc flag
176+
sort_flags.push_back(!op->sort_acsending[i]);
177+
}
178+
unique_ptr<planner::AbstractPlan> order_by_plan(new planner::OrderByPlan(
179+
sort_col_ids, sort_flags, column_ids, op->limit, op->offset));
180+
order_by_plan->AddChild(std::move(output_plan_));
181+
output_plan_ = std::move(order_by_plan);
182+
}
183+
159184
unique_ptr<planner::AbstractPlan> limit_plan(
160185
new planner::LimitPlan(op->limit, op->offset));
161-
limit_plan->AddChild(move(children_plans_[0]));
162-
output_plan_ = move(limit_plan);
186+
limit_plan->AddChild(move(output_plan_));
187+
output_plan_ = std::move(limit_plan);
163188
}
164189

165190
void PlanGenerator::Visit(const PhysicalOrderBy *) {
@@ -508,8 +533,8 @@ void PlanGenerator::BuildProjectionPlan() {
508533

509534
void PlanGenerator::BuildAggregatePlan(
510535
AggregateType aggr_type,
511-
const std::vector<std::shared_ptr<expression::AbstractExpression>> *
512-
groupby_cols,
536+
const std::vector<std::shared_ptr<expression::AbstractExpression>>
537+
*groupby_cols,
513538
std::unique_ptr<expression::AbstractExpression> having_predicate) {
514539
vector<planner::AggregatePlan::AggTerm> aggr_terms;
515540
vector<catalog::Column> output_schema_columns;

src/optimizer/query_to_operator_transformer.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,18 @@ void QueryToOperatorTransformer::Visit(parser::SelectStatement *op) {
111111
}
112112

113113
if (op->limit != nullptr) {
114+
const auto &order_info = op->order;
115+
std::vector<expression::AbstractExpression *> sort_exprs;
116+
std::vector<bool> sort_ascending;
117+
for (auto &expr : order_info->exprs) {
118+
sort_exprs.push_back(expr.get());
119+
}
120+
for (auto &type : order_info->types) {
121+
sort_ascending.push_back(type == parser::kOrderAsc);
122+
}
114123
auto limit_expr = std::make_shared<OperatorExpression>(
115-
LogicalLimit::make(op->limit->offset, op->limit->limit));
124+
LogicalLimit::make(op->limit->offset, op->limit->limit,
125+
std::move(sort_exprs), std::move(sort_ascending)));
116126
limit_expr->PushChild(output_expr_);
117127
output_expr_ = limit_expr;
118128
}

src/optimizer/rule_impls.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -811,7 +811,8 @@ void ImplementLimit::Transform(
811811
const LogicalLimit *limit_op = input->Op().As<LogicalLimit>();
812812

813813
auto result_plan = std::make_shared<OperatorExpression>(
814-
PhysicalLimit::make(limit_op->offset, limit_op->limit));
814+
PhysicalLimit::make(limit_op->offset, limit_op->limit,
815+
limit_op->sort_exprs, limit_op->sort_ascending));
815816
std::vector<std::shared_ptr<OperatorExpression>> children = input->Children();
816817
PELOTON_ASSERT(children.size() == 1);
817818

src/planner/order_by_plan.cpp

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
// Copyright (c) 2015-17, Carnegie Mellon University Database Group
1010
//
1111
//===----------------------------------------------------------------------===//
12-
12+
1313
#include <memory>
1414
#include <vector>
1515

@@ -21,24 +21,35 @@ namespace peloton {
2121
namespace planner {
2222

2323
OrderByPlan::OrderByPlan(const std::vector<oid_t> &sort_keys,
24-
const std::vector<bool> &descend_flags,
25-
const std::vector<oid_t> &output_column_ids)
26-
: sort_keys_(sort_keys),
27-
descend_flags_(descend_flags),
28-
output_column_ids_(output_column_ids) {}
24+
const std::vector<bool> &descend_flags,
25+
const std::vector<oid_t> &output_column_ids)
26+
: sort_keys_(sort_keys),
27+
descend_flags_(descend_flags),
28+
output_column_ids_(output_column_ids) {}
29+
30+
OrderByPlan::OrderByPlan(const std::vector<oid_t> &sort_keys,
31+
const std::vector<bool> &descend_flags,
32+
const std::vector<oid_t> &output_column_ids,
33+
const uint64_t limit, const uint64_t offset)
34+
: sort_keys_(sort_keys),
35+
descend_flags_(descend_flags),
36+
output_column_ids_(output_column_ids),
37+
limit_(true),
38+
limit_number_(limit),
39+
limit_offset_(offset) {}
2940

3041
void OrderByPlan::PerformBinding(BindingContext &binding_context) {
3142
// Let the child do its binding first
3243
AbstractPlan::PerformBinding(binding_context);
3344

3445
for (const oid_t col_id : GetOutputColumnIds()) {
35-
auto* ai = binding_context.Find(col_id);
46+
auto *ai = binding_context.Find(col_id);
3647
PELOTON_ASSERT(ai != nullptr);
3748
output_ais_.push_back(ai);
3849
}
3950

4051
for (const oid_t sort_key_col_id : GetSortKeys()) {
41-
auto* ai = binding_context.Find(sort_key_col_id);
52+
auto *ai = binding_context.Find(sort_key_col_id);
4253
PELOTON_ASSERT(ai != nullptr);
4354
sort_key_ais_.push_back(ai);
4455
}
@@ -64,39 +75,32 @@ hash_t OrderByPlan::Hash() const {
6475
}
6576

6677
bool OrderByPlan::operator==(const AbstractPlan &rhs) const {
67-
if (GetPlanNodeType() != rhs.GetPlanNodeType())
68-
return false;
78+
if (GetPlanNodeType() != rhs.GetPlanNodeType()) return false;
6979

7080
auto &other = static_cast<const planner::OrderByPlan &>(rhs);
7181

72-
// Sort Keys
82+
// Sort Keys
7383
size_t sort_keys_count = GetSortKeys().size();
74-
if (sort_keys_count != other.GetSortKeys().size())
75-
return false;
84+
if (sort_keys_count != other.GetSortKeys().size()) return false;
7685

7786
for (size_t i = 0; i < sort_keys_count; i++) {
78-
if (GetSortKeys()[i] != other.GetSortKeys()[i])
79-
return false;
87+
if (GetSortKeys()[i] != other.GetSortKeys()[i]) return false;
8088
}
8189

8290
// Descend Flags
8391
size_t descend_flags_count = GetDescendFlags().size();
84-
if (descend_flags_count != other.GetDescendFlags().size())
85-
return false;
92+
if (descend_flags_count != other.GetDescendFlags().size()) return false;
8693

8794
for (size_t i = 0; i < descend_flags_count; i++) {
88-
if (GetDescendFlags()[i] != other.GetDescendFlags()[i])
89-
return false;
95+
if (GetDescendFlags()[i] != other.GetDescendFlags()[i]) return false;
9096
}
9197

9298
// Output Column Ids
9399
size_t column_id_count = GetOutputColumnIds().size();
94-
if (column_id_count != other.GetOutputColumnIds().size())
95-
return false;
100+
if (column_id_count != other.GetOutputColumnIds().size()) return false;
96101

97102
for (size_t i = 0; i < column_id_count; i++) {
98-
if (GetOutputColumnIds()[i] != other.GetOutputColumnIds()[i])
99-
return false;
103+
if (GetOutputColumnIds()[i] != other.GetOutputColumnIds()[i]) return false;
100104
}
101105

102106
return AbstractPlan::operator==(rhs);

0 commit comments

Comments
 (0)