Skip to content
This repository was archived by the owner on Sep 27, 2019. It is now read-only.

Commit 721323c

Browse files
committed
Merge branch 'templatize' into abstract.
Still need to make fixes to Pattern to support both OpType and ExpType without templatizing. Will also need to clean up code after before build will work.
2 parents 3266f29 + f4d4e8f commit 721323c

38 files changed

+1133
-234
lines changed

src/include/common/internal_types.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1383,6 +1383,10 @@ enum class RuleType : uint32_t {
13831383
PULL_FILTER_THROUGH_MARK_JOIN,
13841384
PULL_FILTER_THROUGH_AGGREGATION,
13851385

1386+
// AST rewrite rules (logical -> logical)
1387+
// Removes ConstantValueExpression = ConstantValueExpression
1388+
COMP_EQUALITY_ELIMINATION,
1389+
13861390
// Place holder to generate number of rules compile time
13871391
NUM_RULES
13881392

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Peloton
4+
//
5+
// absexpr_expression.h
6+
//
7+
// Identification: src/include/optimizer/absexpr_expression.h
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#pragma once
12+
13+
// AbstractExpression Definition
14+
#include "expression/abstract_expression.h"
15+
#include "expression/conjunction_expression.h"
16+
#include "expression/comparison_expression.h"
17+
#include "expression/constant_value_expression.h"
18+
19+
#include <memory>
20+
#include <vector>
21+
22+
namespace peloton {
23+
namespace optimizer {
24+
25+
// (TODO): rethink the AbsExpr_Container/Expression approach in comparion to abstract
26+
// Most of the core rule/optimizer code relies on the concept of an Operator /
27+
// OperatorExpression and the interface that the two functions respectively expose.
28+
//
29+
// The annoying part is that an AbstractExpression blends together an Operator
30+
// and OperatorExpression. Second part, the AbstractExpression does not export the
31+
// correct interface that the rest of the system depends on.
32+
//
33+
// As an extreme level of simplification (sort of hacky), an AbsExpr_Container is
34+
// analogous to Operator and wraps a single AbstractExpression node. AbsExpr_Expression
35+
// is analogous to OperatorExpression.
36+
//
37+
// AbsExpr_Container does *not* handle memory correctly w.r.t internal instantiations
38+
// from Rule transformation. This is since Peloton itself mixes unique_ptrs and
39+
// hands out raw pointers which makes adding a shared_ptr here extremely problematic.
40+
// terrier uses only shared_ptr when dealing with AbstractExpression trees.
41+
42+
class AbsExpr_Container {
43+
public:
44+
AbsExpr_Container();
45+
46+
AbsExpr_Container(const expression::AbstractExpression *expr) {
47+
node = expr;
48+
}
49+
50+
// Return operator type
51+
ExpressionType GetType() const {
52+
if (IsDefined()) {
53+
return node->GetExpressionType();
54+
}
55+
return ExpressionType::INVALID;
56+
}
57+
58+
const expression::AbstractExpression *GetExpr() const {
59+
return node;
60+
}
61+
62+
// Operator contains Logical node
63+
bool IsLogical() const {
64+
return true;
65+
}
66+
67+
// Operator contains Physical node
68+
bool IsPhysical() const {
69+
return false;
70+
}
71+
72+
std::string GetName() const {
73+
if (IsDefined()) {
74+
return node->GetExpressionName();
75+
}
76+
77+
return "Undefined";
78+
}
79+
80+
hash_t Hash() const {
81+
if (IsDefined()) {
82+
return node->Hash();
83+
}
84+
return 0;
85+
}
86+
87+
bool operator==(const AbsExpr_Container &r) {
88+
if (IsDefined() && r.IsDefined()) {
89+
// (TODO): need a better way to determine deep equality
90+
91+
// NOTE:
92+
// Without proper equality determinations, the groups will
93+
// not be assigned correctly. Arguably, terrier does this
94+
// better because a blind ExactlyEquals on different types
95+
// of ConstantValueExpression under Peloton will crash!
96+
97+
// For now, just return (false).
98+
// I don't anticipate this will affect correctness, just
99+
// performance, since duplicate trees will have to evaluated
100+
// over and over again, rather than being able to "borrow"
101+
// a previous tree's rewrite.
102+
//
103+
// Probably not worth to create a "validator" since porting
104+
// this to terrier anyways (?). == does not check Value
105+
// so it's broken. ExactlyEqual requires precondition checking.
106+
return false;
107+
} else if (!IsDefined() && !r.IsDefined()) {
108+
return true;
109+
}
110+
return false;
111+
}
112+
113+
// Operator contains physical or logical operator node
114+
bool IsDefined() const {
115+
return node != nullptr;
116+
}
117+
118+
//(TODO): fix memory management once go to terrier
119+
expression::AbstractExpression *Rebuild(std::vector<expression::AbstractExpression*> children) {
120+
switch (GetType()) {
121+
case ExpressionType::COMPARE_EQUAL:
122+
case ExpressionType::COMPARE_NOTEQUAL:
123+
case ExpressionType::COMPARE_LESSTHAN:
124+
case ExpressionType::COMPARE_GREATERTHAN:
125+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
126+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
127+
case ExpressionType::COMPARE_LIKE:
128+
case ExpressionType::COMPARE_NOTLIKE:
129+
case ExpressionType::COMPARE_IN:
130+
case ExpressionType::COMPARE_DISTINCT_FROM: {
131+
PELOTON_ASSERT(children.size() == 2);
132+
return new expression::ComparisonExpression(GetType(), children[0], children[1]);
133+
}
134+
case ExpressionType::CONJUNCTION_AND:
135+
case ExpressionType::CONJUNCTION_OR: {
136+
PELOTON_ASSERT(children.size() == 2);
137+
return new expression::ConjunctionExpression(GetType(), children[0], children[1]);
138+
}
139+
case ExpressionType::VALUE_CONSTANT: {
140+
PELOTON_ASSERT(children.size() == 0);
141+
auto cve = static_cast<const expression::ConstantValueExpression*>(node);
142+
return new expression::ConstantValueExpression(cve->GetValue());
143+
}
144+
default: {
145+
int type = static_cast<int>(GetType());
146+
LOG_ERROR("Unimplemented Rebuild() for %d found", type);
147+
return nullptr;
148+
}
149+
}
150+
}
151+
152+
private:
153+
const expression::AbstractExpression *node;
154+
};
155+
156+
class AbsExpr_Expression {
157+
public:
158+
AbsExpr_Expression(AbsExpr_Container op): op(op) {};
159+
160+
void PushChild(std::shared_ptr<AbsExpr_Expression> op) {
161+
children.push_back(op);
162+
}
163+
164+
void PopChild() {
165+
children.pop_back();
166+
}
167+
168+
const std::vector<std::shared_ptr<AbsExpr_Expression>> &Children() const {
169+
return children;
170+
}
171+
172+
const AbsExpr_Container &Op() const {
173+
return op;
174+
}
175+
176+
private:
177+
AbsExpr_Container op;
178+
std::vector<std::shared_ptr<AbsExpr_Expression>> children;
179+
};
180+
181+
} // namespace optimizer
182+
} // namespace peloton
183+

src/include/optimizer/binding.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class BindingIterator {
4646

4747
class GroupBindingIterator : public BindingIterator {
4848
public:
49+
// TODO(ncx): pattern
4950
GroupBindingIterator(Memo& memo, GroupID id,
5051
std::shared_ptr<Pattern> pattern);
5152

@@ -59,21 +60,25 @@ class GroupBindingIterator : public BindingIterator {
5960
Group *target_group_;
6061
size_t num_group_items_;
6162

63+
// Internal function for HasNext()
64+
bool HasNextBinding();
65+
6266
size_t current_item_index_;
6367
std::unique_ptr<BindingIterator> current_iterator_;
6468
};
6569

6670
class GroupExprBindingIterator : public BindingIterator {
6771
public:
68-
GroupExprBindingIterator(Memo& memo,
69-
GroupExpression *gexpr,
70-
std::shared_ptr<Pattern> pattern);
72+
// TODO(ncx): pattern
73+
GroupExprBindingIterator(Memo& memo, GroupExpression *gexpr,
74+
std::shared_ptr<Pattern> pattern);
7175

7276
bool HasNext() override;
7377

7478
std::shared_ptr<AbstractNodeExpression> Next() override;
7579

7680
private:
81+
// TODO(ncx): pattern
7782
GroupExpression* gexpr_;
7883
std::shared_ptr<Pattern> pattern_;
7984

src/include/optimizer/child_property_deriver.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
#pragma once
1414
#include <memory>
1515
#include "optimizer/operator_visitor.h"
16+
#include "optimizer/operator_expression.h"
1617

1718
namespace peloton {
1819

1920
namespace optimizer {
21+
template <class Node, class OpType, class OperatorExpr>
2022
class Memo;
2123
}
2224

@@ -33,8 +35,10 @@ class ChildPropertyDeriver : public OperatorVisitor {
3335
public:
3436
std::vector<std::pair<std::shared_ptr<PropertySet>,
3537
std::vector<std::shared_ptr<PropertySet>>>>
36-
GetProperties(GroupExpression *gexpr,
37-
std::shared_ptr<PropertySet> requirements, Memo *memo);
38+
39+
GetProperties(GroupExpression<Operator,OpType,OperatorExpression> *gexpr,
40+
std::shared_ptr<PropertySet> requirements,
41+
Memo<Operator,OpType,OperatorExpression> *memo);
3842

3943
void Visit(const DummyScan *) override;
4044
void Visit(const PhysicalSeqScan *) override;
@@ -74,8 +78,8 @@ class ChildPropertyDeriver : public OperatorVisitor {
7478
* @brief We need the memo and gexpr because some property may depend on
7579
* child's schema
7680
*/
77-
Memo *memo_;
78-
GroupExpression *gexpr_;
81+
Memo<Operator,OpType,OperatorExpression> *memo_;
82+
GroupExpression<Operator,OpType,OperatorExpression> *gexpr_;
7983
};
8084

8185
} // namespace optimizer

src/include/optimizer/cost_model/abstract_cost_model.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
#pragma once
1414

1515
#include "optimizer/operator_visitor.h"
16+
#include "optimizer/operator_expression.h"
1617

1718
namespace peloton {
1819
namespace optimizer {
1920

21+
template <class Node, class OperatorType, class OperatorExpr>
2022
class Memo;
2123

2224
// Default cost when cost model cannot compute correct cost.
@@ -34,7 +36,8 @@ static constexpr double DEFAULT_OPERATOR_COST = 0.0025;
3436

3537
class AbstractCostModel : public OperatorVisitor {
3638
public:
37-
virtual double CalculateCost(GroupExpression *gexpr, Memo *memo,
39+
virtual double CalculateCost(GroupExpression<Operator,OpType,OperatorExpression> *gexpr,
40+
Memo<Operator,OpType,OperatorExpression> *memo,
3841
concurrency::TransactionContext *txn) = 0;
3942
};
4043

src/include/optimizer/cost_model/default_cost_model.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,14 @@ namespace peloton {
2424
namespace optimizer {
2525

2626
class Memo;
27+
2728
// Derive cost for a physical group expression
2829
class DefaultCostModel : public AbstractCostModel {
2930
public:
3031
DefaultCostModel(){};
3132

3233
double CalculateCost(GroupExpression *gexpr, Memo *memo,
33-
concurrency::TransactionContext *txn) {
34+
concurrency::TransactionContext *txn) {
3435
gexpr_ = gexpr;
3536
memo_ = memo;
3637
txn_ = txn;

src/include/optimizer/cost_model/postgres_cost_model.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ namespace peloton {
2929
namespace optimizer {
3030

3131
class Memo;
32+
3233
// Derive cost for a physical group expression
3334
class PostgresCostModel : public AbstractCostModel {
3435
public:

src/include/optimizer/cost_model/trivial_cost_model.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ namespace peloton {
3232
namespace optimizer {
3333

3434
class Memo;
35+
3536
class TrivialCostModel : public AbstractCostModel {
3637
public:
3738
TrivialCostModel(){};

src/include/optimizer/group.h

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,36 +32,38 @@ class ColumnStats;
3232
//===--------------------------------------------------------------------===//
3333
// Group
3434
//===--------------------------------------------------------------------===//
35+
template <class Node, class OperatorType, class OperatorExpr>
3536
class Group : public Printable {
3637
public:
3738
Group(GroupID id, std::unordered_set<std::string> table_alias);
3839

3940
// If the GroupExpression is generated by applying a
4041
// property enforcer, we add them to enforced_exprs_
4142
// which will not be enumerated during OptimizeExpression
42-
void AddExpression(std::shared_ptr<GroupExpression> expr, bool enforced);
43+
void AddExpression(std::shared_ptr<GroupExpression<Node,OperatorType,OperatorExpr>> expr,
44+
bool enforced);
4345

4446
void RemoveLogicalExpression(size_t idx) {
4547
logical_expressions_.erase(logical_expressions_.begin() + idx);
4648
}
4749

48-
bool SetExpressionCost(GroupExpression *expr, double cost,
50+
bool SetExpressionCost(GroupExpression<Node,OperatorType,OperatorExpr> *expr, double cost,
4951
std::shared_ptr<PropertySet> &properties);
5052

51-
GroupExpression *GetBestExpression(std::shared_ptr<PropertySet> &properties);
53+
GroupExpression<Node,OperatorType,OperatorExpr> *GetBestExpression(std::shared_ptr<PropertySet> &properties);
5254

5355
inline const std::unordered_set<std::string> &GetTableAliases() const {
5456
return table_aliases_;
5557
}
5658

5759
// TODO: thread safety?
58-
const std::vector<std::shared_ptr<GroupExpression>> GetLogicalExpressions()
60+
const std::vector<std::shared_ptr<GroupExpression<Node,OperatorType,OperatorExpr>>> GetLogicalExpressions()
5961
const {
6062
return logical_expressions_;
6163
}
6264

6365
// TODO: thread safety?
64-
const std::vector<std::shared_ptr<GroupExpression>> GetPhysicalExpressions()
66+
const std::vector<std::shared_ptr<GroupExpression<Node,OperatorType,OperatorExpr>>> GetPhysicalExpressions()
6567
const {
6668
return physical_expressions_;
6769
}
@@ -105,7 +107,7 @@ class Group : public Printable {
105107

106108
// This should only be called in rewrite phase to retrieve the only logical
107109
// expr in the group
108-
inline GroupExpression *GetLogicalExpression() {
110+
inline GroupExpression<Node,OperatorType,OperatorExpr> *GetLogicalExpression() {
109111
PELOTON_ASSERT(logical_expressions_.size() == 1);
110112
PELOTON_ASSERT(physical_expressions_.size() == 0);
111113
return logical_expressions_[0].get();
@@ -117,15 +119,15 @@ class Group : public Printable {
117119
// TODO(boweic) Do not use string, store table alias id
118120
std::unordered_set<std::string> table_aliases_;
119121
std::unordered_map<std::shared_ptr<PropertySet>,
120-
std::tuple<double, GroupExpression *>, PropSetPtrHash,
122+
std::tuple<double, GroupExpression<Node,OperatorType,OperatorExpr> *>, PropSetPtrHash,
121123
PropSetPtrEq> lowest_cost_expressions_;
122124

123125
// Whether equivalent logical expressions have been explored for this group
124126
bool has_explored_;
125127

126-
std::vector<std::shared_ptr<GroupExpression>> logical_expressions_;
127-
std::vector<std::shared_ptr<GroupExpression>> physical_expressions_;
128-
std::vector<std::shared_ptr<GroupExpression>> enforced_exprs_;
128+
std::vector<std::shared_ptr<GroupExpression<Node,OperatorType,OperatorExpr>>> logical_expressions_;
129+
std::vector<std::shared_ptr<GroupExpression<Node,OperatorType,OperatorExpr>>> physical_expressions_;
130+
std::vector<std::shared_ptr<GroupExpression<Node,OperatorType,OperatorExpr>>> enforced_exprs_;
129131

130132
// We'll add stats lazily
131133
// TODO(boweic):

0 commit comments

Comments
 (0)