Skip to content

Commit b325fa1

Browse files
authored
feat(executor): support left joins, fix tuple bug (#435)
Support left joins
1 parent 88d123e commit b325fa1

File tree

9 files changed

+58
-20
lines changed

9 files changed

+58
-20
lines changed

src/include/binder/table_ref/bound_join_ref.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ class BoundJoinRef : public BoundTableRef {
3535
condition_(std::move(condition)) {}
3636

3737
auto ToString() const -> std::string override {
38-
return fmt::format("BoundJoin {{ type={}, left={}, right={}, condition={} }}", type_, left_, right_, condition_);
38+
return fmt::format("BoundJoin {{ type={}, left={}, right={}, condition={} }}", join_type_, left_, right_,
39+
condition_);
3940
}
4041

4142
/** Type of join. */

src/include/execution/plans/hash_join_plan.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <utility>
1717
#include <vector>
1818

19+
#include "binder/table_ref/bound_join_ref.h"
1920
#include "execution/expressions/abstract_expression.h"
2021
#include "execution/plans/abstract_plan.h"
2122

@@ -34,10 +35,12 @@ class HashJoinPlanNode : public AbstractPlanNode {
3435
* @param right_key_expression The expression for the right JOIN key
3536
*/
3637
HashJoinPlanNode(SchemaRef output_schema, AbstractPlanNodeRef left, AbstractPlanNodeRef right,
37-
AbstractExpressionRef left_key_expression, AbstractExpressionRef right_key_expression)
38+
AbstractExpressionRef left_key_expression, AbstractExpressionRef right_key_expression,
39+
JoinType join_type)
3840
: AbstractPlanNode(std::move(output_schema), {std::move(left), std::move(right)}),
3941
left_key_expression_{std::move(left_key_expression)},
40-
right_key_expression_{std::move(right_key_expression)} {}
42+
right_key_expression_{std::move(right_key_expression)},
43+
join_type_(join_type) {}
4144

4245
/** @return The type of the plan node */
4346
auto GetType() const -> PlanType override { return PlanType::HashJoin; }
@@ -60,18 +63,25 @@ class HashJoinPlanNode : public AbstractPlanNode {
6063
return GetChildAt(1);
6164
}
6265

66+
/** @return The join type used in the hash join */
67+
auto GetJoinType() const -> JoinType { return join_type_; };
68+
6369
BUSTUB_PLAN_NODE_CLONE_WITH_CHILDREN(HashJoinPlanNode);
6470

6571
protected:
6672
auto PlanNodeToString() const -> std::string override {
67-
return fmt::format("HashJoin {{ left_key={}, right_key={} }}", left_key_expression_, right_key_expression_);
73+
return fmt::format("HashJoin {{ type={}, left_key={}, right_key={} }}", join_type_, left_key_expression_,
74+
right_key_expression_);
6875
}
6976

7077
private:
7178
/** The expression to compute the left JOIN key */
7279
AbstractExpressionRef left_key_expression_;
7380
/** The expression to compute the right JOIN key */
7481
AbstractExpressionRef right_key_expression_;
82+
83+
/** The join type */
84+
JoinType join_type_;
7585
};
7686

7787
} // namespace bustub

src/include/execution/plans/nested_index_join_plan.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <utility>
1717
#include <vector>
1818

19+
#include "binder/table_ref/bound_join_ref.h"
1920
#include "catalog/catalog.h"
2021
#include "catalog/schema.h"
2122
#include "concurrency/transaction.h"
@@ -34,20 +35,24 @@ class NestedIndexJoinPlanNode : public AbstractPlanNode {
3435
public:
3536
NestedIndexJoinPlanNode(SchemaRef output, AbstractPlanNodeRef child, AbstractExpressionRef key_predicate,
3637
table_oid_t inner_table_oid, index_oid_t index_oid, std::string index_name,
37-
std::string index_table_name, SchemaRef inner_table_schema)
38+
std::string index_table_name, SchemaRef inner_table_schema, JoinType join_type)
3839
: AbstractPlanNode(std::move(output), {std::move(child)}),
3940
key_predicate_(std::move(key_predicate)),
4041
inner_table_oid_(inner_table_oid),
4142
index_oid_(index_oid),
4243
index_name_(std::move(index_name)),
4344
index_table_name_(std::move(index_table_name)),
44-
inner_table_schema_(std::move(inner_table_schema)) {}
45+
inner_table_schema_(std::move(inner_table_schema)),
46+
join_type_(join_type) {}
4547

4648
auto GetType() const -> PlanType override { return PlanType::NestedIndexJoin; }
4749

4850
/** @return the predicate to be used to extract the join key from the child */
4951
auto KeyPredicate() const -> const AbstractExpressionRef & { return key_predicate_; }
5052

53+
/** @return The join type used in the nested index join */
54+
auto GetJoinType() const -> JoinType { return join_type_; };
55+
5156
/** @return the plan node for the outer table of the nested index join */
5257
auto GetChildPlan() const -> AbstractPlanNodeRef { return GetChildAt(0); }
5358

@@ -67,8 +72,8 @@ class NestedIndexJoinPlanNode : public AbstractPlanNode {
6772

6873
protected:
6974
auto PlanNodeToString() const -> std::string override {
70-
return fmt::format("NestedIndexJoin {{ key_predicate={}, index={}, index_table={} }}", key_predicate_, index_name_,
71-
index_table_name_);
75+
return fmt::format("NestedIndexJoin {{ type={}, key_predicate={}, index={}, index_table={} }}", join_type_,
76+
key_predicate_, index_name_, index_table_name_);
7277
}
7378

7479
private:
@@ -79,5 +84,8 @@ class NestedIndexJoinPlanNode : public AbstractPlanNode {
7984
const std::string index_name_;
8085
const std::string index_table_name_;
8186
SchemaRef inner_table_schema_;
87+
88+
/** The join type */
89+
JoinType join_type_;
8290
};
8391
} // namespace bustub

src/include/execution/plans/nested_loop_join_plan.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <utility>
1717
#include <vector>
1818

19+
#include "binder/table_ref/bound_join_ref.h"
1920
#include "catalog/catalog.h"
2021
#include "execution/expressions/abstract_expression.h"
2122
#include "execution/plans/abstract_plan.h"
@@ -36,16 +37,20 @@ class NestedLoopJoinPlanNode : public AbstractPlanNode {
3637
* if predicate(tuple) = true or predicate = `nullptr`
3738
*/
3839
NestedLoopJoinPlanNode(SchemaRef output_schema, AbstractPlanNodeRef left, AbstractPlanNodeRef right,
39-
AbstractExpressionRef predicate)
40+
AbstractExpressionRef predicate, JoinType join_type)
4041
: AbstractPlanNode(std::move(output_schema), {std::move(left), std::move(right)}),
41-
predicate_(std::move(predicate)) {}
42+
predicate_(std::move(predicate)),
43+
join_type_(join_type) {}
4244

4345
/** @return The type of the plan node */
4446
auto GetType() const -> PlanType override { return PlanType::NestedLoopJoin; }
4547

4648
/** @return The predicate to be used in the nested loop join */
4749
auto Predicate() const -> const AbstractExpression & { return *predicate_; }
4850

51+
/** @return The join type used in the nested loop join */
52+
auto GetJoinType() const -> JoinType { return join_type_; };
53+
4954
/** @return The left plan node of the nested loop join, by convention it should be the smaller table */
5055
auto GetLeftPlan() const -> AbstractPlanNodeRef { return GetChildAt(0); }
5156

@@ -60,9 +65,12 @@ class NestedLoopJoinPlanNode : public AbstractPlanNode {
6065
/** The join predicate */
6166
AbstractExpressionRef predicate_;
6267

68+
/** The join type */
69+
JoinType join_type_;
70+
6371
protected:
6472
auto PlanNodeToString() const -> std::string override {
65-
return fmt::format("NestedLoopJoin {{ predicate={} }}", predicate_);
73+
return fmt::format("NestedLoopJoin {{ type={}, predicate={} }}", join_type_, predicate_);
6674
}
6775
};
6876

src/optimizer/merge_filter_nlj.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ auto Optimizer::OptimizeMergeFilterNLJ(const AbstractPlanNodeRef &plan) -> Abstr
6666
filter_plan.output_schema_, nlj_plan.GetLeftPlan(), nlj_plan.GetRightPlan(),
6767
RewriteExpressionForJoin(filter_plan.GetPredicate(),
6868
nlj_plan.GetLeftPlan()->OutputSchema().GetColumnCount(),
69-
nlj_plan.GetRightPlan()->OutputSchema().GetColumnCount()));
69+
nlj_plan.GetRightPlan()->OutputSchema().GetColumnCount()),
70+
nlj_plan.GetJoinType());
7071
}
7172
}
7273
}

src/optimizer/nlj_as_hash_join.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ auto Optimizer::OptimizeNLJAsHashJoin(const AbstractPlanNodeRef &plan) -> Abstra
2828
const auto &nlj_plan = dynamic_cast<const NestedLoopJoinPlanNode &>(*optimized_plan);
2929
// Has exactly two children
3030
BUSTUB_ENSURE(nlj_plan.children_.size() == 2, "NLJ should have exactly 2 children.");
31+
3132
// Check if expr is equal condition where one is for the left table, and one is for the right table.
3233
if (const auto *expr = dynamic_cast<const ComparisonExpression *>(&nlj_plan.Predicate()); expr != nullptr) {
3334
if (expr->comp_type_ == ComparisonType::Equal) {
@@ -45,12 +46,12 @@ auto Optimizer::OptimizeNLJAsHashJoin(const AbstractPlanNodeRef &plan) -> Abstra
4546
if (left_expr->GetTupleIdx() == 0 && right_expr->GetTupleIdx() == 1) {
4647
return std::make_shared<HashJoinPlanNode>(nlj_plan.output_schema_, nlj_plan.GetLeftPlan(),
4748
nlj_plan.GetRightPlan(), std::move(left_expr_tuple_0),
48-
std::move(right_expr_tuple_0));
49+
std::move(right_expr_tuple_0), nlj_plan.GetJoinType());
4950
}
5051
if (left_expr->GetTupleIdx() == 1 && right_expr->GetTupleIdx() == 0) {
5152
return std::make_shared<HashJoinPlanNode>(nlj_plan.output_schema_, nlj_plan.GetLeftPlan(),
5253
nlj_plan.GetRightPlan(), std::move(right_expr_tuple_0),
53-
std::move(left_expr_tuple_0));
54+
std::move(left_expr_tuple_0), nlj_plan.GetJoinType());
5455
}
5556
}
5657
}

src/optimizer/nlj_as_index_join.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ auto Optimizer::OptimizeNLJAsIndexJoin(const AbstractPlanNodeRef &plan) -> Abstr
6767
return std::make_shared<NestedIndexJoinPlanNode>(
6868
nlj_plan.output_schema_, nlj_plan.GetLeftPlan(), std::move(left_expr_tuple_0),
6969
right_seq_scan.GetTableOid(), index_oid, std::move(index_name), right_seq_scan.table_name_,
70-
right_seq_scan.output_schema_);
70+
right_seq_scan.output_schema_, nlj_plan.GetJoinType());
7171
}
7272
}
7373
if (left_expr->GetTupleIdx() == 1 && right_expr->GetTupleIdx() == 0) {
@@ -77,7 +77,7 @@ auto Optimizer::OptimizeNLJAsIndexJoin(const AbstractPlanNodeRef &plan) -> Abstr
7777
return std::make_shared<NestedIndexJoinPlanNode>(
7878
nlj_plan.output_schema_, nlj_plan.GetLeftPlan(), std::move(right_expr_tuple_0),
7979
right_seq_scan.GetTableOid(), index_oid, std::move(index_name), right_seq_scan.table_name_,
80-
right_seq_scan.output_schema_);
80+
right_seq_scan.output_schema_, nlj_plan.GetJoinType());
8181
}
8282
}
8383
}

src/planner/plan_table_ref.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ auto Planner::PlanCrossProductRef(const BoundCrossProductRef &table_ref) -> Abst
115115
auto right = PlanTableRef(*table_ref.right_);
116116
return std::make_shared<NestedLoopJoinPlanNode>(
117117
std::make_shared<Schema>(NestedLoopJoinPlanNode::InferJoinSchema(*left, *right)), std::move(left),
118-
std::move(right), std::make_shared<ConstantValueExpression>(ValueFactory::GetBooleanValue(true)));
118+
std::move(right), std::make_shared<ConstantValueExpression>(ValueFactory::GetBooleanValue(true)),
119+
JoinType::INNER);
119120
}
120121

121122
auto Planner::PlanCTERef(const BoundCTERef &table_ref) -> AbstractPlanNodeRef {
@@ -133,7 +134,7 @@ auto Planner::PlanJoinRef(const BoundJoinRef &table_ref) -> AbstractPlanNodeRef
133134
auto [_, join_condition] = PlanExpression(*table_ref.condition_, {left, right});
134135
auto nlj_node = std::make_shared<NestedLoopJoinPlanNode>(
135136
std::make_shared<Schema>(NestedLoopJoinPlanNode::InferJoinSchema(*left, *right)), std::move(left),
136-
std::move(right), std::move(join_condition));
137+
std::move(right), std::move(join_condition), table_ref.join_type_);
137138
return nlj_node;
138139
}
139140

src/storage/table/tuple.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,11 @@ Tuple::Tuple(std::vector<Value> values, const Schema *schema) : allocated_(true)
2727
// 1. Calculate the size of the tuple.
2828
uint32_t tuple_size = schema->GetLength();
2929
for (auto &i : schema->GetUnlinedColumns()) {
30-
tuple_size += (values[i].GetLength() + sizeof(uint32_t));
30+
auto len = values[i].GetLength();
31+
if (len == BUSTUB_VALUE_NULL) {
32+
len = 0;
33+
}
34+
tuple_size += (len + sizeof(uint32_t));
3135
}
3236

3337
// 2. Allocate memory.
@@ -46,7 +50,11 @@ Tuple::Tuple(std::vector<Value> values, const Schema *schema) : allocated_(true)
4650
*reinterpret_cast<uint32_t *>(data_ + col.GetOffset()) = offset;
4751
// Serialize varchar value, in place (size+data).
4852
values[i].SerializeTo(data_ + offset);
49-
offset += (values[i].GetLength() + sizeof(uint32_t));
53+
auto len = values[i].GetLength();
54+
if (len == BUSTUB_VALUE_NULL) {
55+
len = 0;
56+
}
57+
offset += (len + sizeof(uint32_t));
5058
} else {
5159
values[i].SerializeTo(data_ + col.GetOffset());
5260
}

0 commit comments

Comments
 (0)