Skip to content

Commit 0074882

Browse files
authored
Rj/col pruning (#704)
* col prune prep files * add s24 tas :)
1 parent dba9b72 commit 0074882

File tree

15 files changed

+157
-68
lines changed

15 files changed

+157
-68
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ set(P3_FILES
351351
"src/optimizer/sort_limit_as_topn.cpp"
352352
"src/optimizer/optimizer_internal.cpp"
353353
"src/optimizer/seqscan_as_indexscan.cpp"
354+
"src/optimizer/column_pruning.cpp"
354355
"src/common/bustub_ddl.cpp"
355356
"src/include/execution/plans/topn_per_group_plan.h"
356357
${P2_FILES}

src/common/util/string_util.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ void StringUtil::RTrim(std::string *str) {
4242
str->erase(std::find_if(str->rbegin(), str->rend(), [](int ch) { return std::isspace(ch) == 0; }).base(), str->end());
4343
}
4444

45+
void StringUtil::LTrim(std::string *str) {
46+
// remove leading ' ', \f, \n, \r, \t, \v
47+
str->erase(str->begin(), std::find_if(str->begin(), str->end(), [](int ch) { return std::isspace(ch) == 0; }));
48+
}
49+
4550
auto StringUtil::Indent(int num_indent) -> std::string { return std::string(num_indent, ' '); } // NOLINT
4651

4752
auto StringUtil::StartsWith(const std::string &str, const std::string &prefix) -> bool {
@@ -200,6 +205,16 @@ auto StringUtil::Split(const std::string &input, const std::string &split) -> st
200205
return splits;
201206
}
202207

208+
auto StringUtil::Count(const std::string &input, const std::string &str) -> size_t {
209+
size_t count = 0;
210+
size_t n_pos = input.find(str, 0); // first occurrence
211+
while (n_pos != std::string::npos) {
212+
count++;
213+
n_pos = input.find(str, n_pos + 1);
214+
}
215+
return count;
216+
}
217+
203218
auto StringUtil::Strip(const std::string &str, char c) -> std::string {
204219
// There's a copy here which is wasteful, so don't use this in performance-critical code!
205220
std::string tmp = str;

src/execution/mock_scan_executor.cpp

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ static const char *ta_list_2023_fall[] = {"skyzh", "yliang412", "ferna
3333
"anurag-23", "Mayank-Baranwal", "abigalekim", "ChaosZhai",
3434
"aoleizhou", "averyqi115", "kswim8"};
3535

36+
static const char *ta_list_2024[] = {"AlSchlo", "walkingcabbages", "averyqi115", "lanlou1554", "sweetsuro",
37+
"ChaosZhai", "SDTheSlayer", "xx01cyx", "yliang412", "thelongmarch-azx"};
38+
3639
static const char *ta_oh_2022[] = {"Tuesday", "Wednesday", "Monday", "Wednesday", "Thursday", "Friday",
3740
"Wednesday", "Randomly", "Tuesday", "Monday", "Tuesday"};
3841

@@ -42,12 +45,15 @@ static const char *ta_oh_2023[] = {"Friday", "Thursday", "Tuesday", "Monday",
4245
static const char *ta_oh_2023_fall[] = {"Randomly", "Tuesday", "Wednesday", "Tuesday", "Thursday", "Tuesday",
4346
"Friday", "Yesterday", "Friday", "Friday", "Never"};
4447

48+
static const char *ta_oh_2024[] = {"Friday", "Thursday", "Friday", "Wednesday", "Thursday",
49+
"Yesterday", "Monday", "Tuesday", "Tuesday", "Monday"};
50+
4551
static const char *course_on_date[] = {"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"};
4652

4753
const char *mock_table_list[] = {"__mock_table_1", "__mock_table_2", "__mock_table_3", "__mock_table_tas_2022",
48-
"__mock_table_tas_2023", "__mock_table_tas_2023_fall", "__mock_agg_input_small",
49-
"__mock_agg_input_big", "__mock_table_schedule_2022", "__mock_table_schedule_2023",
50-
"__mock_table_123", "__mock_graph",
54+
"__mock_table_tas_2023", "__mock_table_tas_2023_fall", "__mock_table_tas_2024",
55+
"__mock_agg_input_small", "__mock_agg_input_big", "__mock_table_schedule_2022",
56+
"__mock_table_schedule", "__mock_table_123", "__mock_graph",
5157
// For leaderboard Q1
5258
"__mock_t1",
5359
// For leaderboard Q2
@@ -84,11 +90,15 @@ auto GetMockTableSchemaOf(const std::string &table) -> Schema {
8490
return Schema{std::vector{Column{"github_id", TypeId::VARCHAR, 128}, Column{"office_hour", TypeId::VARCHAR, 128}}};
8591
}
8692

93+
if (table == "__mock_table_tas_2024") {
94+
return Schema{std::vector{Column{"github_id", TypeId::VARCHAR, 128}, Column{"office_hour", TypeId::VARCHAR, 128}}};
95+
}
96+
8797
if (table == "__mock_table_schedule_2022") {
8898
return Schema{std::vector{Column{"day_of_week", TypeId::VARCHAR, 128}, Column{"has_lecture", TypeId::INTEGER}}};
8999
}
90100

91-
if (table == "__mock_table_schedule_2023") {
101+
if (table == "__mock_table_schedule") {
92102
return Schema{std::vector{Column{"day_of_week", TypeId::VARCHAR, 128}, Column{"has_lecture", TypeId::INTEGER}}};
93103
}
94104

@@ -168,11 +178,15 @@ auto GetSizeOf(const MockScanPlanNode *plan) -> size_t {
168178
return sizeof(ta_list_2023_fall) / sizeof(ta_list_2023_fall[0]);
169179
}
170180

181+
if (table == "__mock_table_tas_2024") {
182+
return sizeof(ta_list_2024) / sizeof(ta_list_2024[0]);
183+
}
184+
171185
if (table == "__mock_table_schedule_2022") {
172186
return sizeof(course_on_date) / sizeof(course_on_date[0]);
173187
}
174188

175-
if (table == "__mock_table_schedule_2023") {
189+
if (table == "__mock_table_schedule") {
176190
return sizeof(course_on_date) / sizeof(course_on_date[0]);
177191
}
178192

@@ -306,6 +320,15 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function<Tuple(size_t)>
306320
};
307321
}
308322

323+
if (table == "__mock_table_tas_2024") {
324+
return [plan](size_t cursor) {
325+
std::vector<Value> values{};
326+
values.push_back(ValueFactory::GetVarcharValue(ta_list_2024[cursor]));
327+
values.push_back(ValueFactory::GetVarcharValue(ta_oh_2024[cursor]));
328+
return Tuple{values, &plan->OutputSchema()};
329+
};
330+
}
331+
309332
if (table == "__mock_table_schedule_2022") {
310333
return [plan](size_t cursor) {
311334
std::vector<Value> values{};
@@ -315,7 +338,7 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function<Tuple(size_t)>
315338
};
316339
}
317340

318-
if (table == "__mock_table_schedule_2023") {
341+
if (table == "__mock_table_schedule") {
319342
return [plan](size_t cursor) {
320343
std::vector<Value> values{};
321344
values.push_back(ValueFactory::GetVarcharValue(course_on_date[cursor]));

src/include/common/util/string_util.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,12 +85,21 @@ class StringUtil {
8585
/** @return input string split based on the split string */
8686
static auto Split(const std::string &input, const std::string &split) -> std::vector<std::string>;
8787

88+
/** @return count occurrence of specified string in input string */
89+
static auto Count(const std::string &input, const std::string &str) -> size_t;
90+
8891
/**
8992
* Removes the whitespace characters from the right side of the string.
9093
* @param[in,out] str string to be trimmed on the right
9194
*/
9295
static void RTrim(std::string *str);
9396

97+
/**
98+
* Removes the whitespace characters from the left side of the string.
99+
* @param[in,out] str string to be trimmed on the left
100+
*/
101+
static void LTrim(std::string *str);
102+
94103
/** @return indented string */
95104
static auto Indent(int num_indent) -> std::string;
96105

src/include/optimizer/optimizer.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,13 @@ class Optimizer {
9393
auto MatchIndex(const std::string &table_name, uint32_t index_key_idx)
9494
-> std::optional<std::tuple<index_oid_t, std::string>>;
9595

96+
/**
97+
* @brief column pruning for child plan following a projection plan
98+
* @param plan the plan to optimize
99+
* @return the new plan with column pruning
100+
*/
101+
auto OptimizeColumnPruning(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef;
102+
96103
/**
97104
* @brief optimize sort + limit as top N
98105
*/

src/optimizer/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ add_library(
1212
optimizer_internal.cpp
1313
order_by_index_scan.cpp
1414
sort_limit_as_topn.cpp
15-
seqscan_as_indexscan.cpp)
15+
seqscan_as_indexscan.cpp
16+
column_pruning.cpp)
1617

1718
set(ALL_OBJECT_FILES
1819
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:bustub_optimizer>

src/optimizer/column_pruning.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#include "optimizer/optimizer.h"
2+
3+
namespace bustub {
4+
5+
/**
6+
* @note You may use this function to implement column pruning optimization.
7+
*/
8+
auto Optimizer::OptimizeColumnPruning(const bustub::AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef {
9+
// Your code here
10+
return plan;
11+
}
12+
13+
} // namespace bustub

test/sql/p0.03-string-scan.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
query rowsort
2-
select day_of_week, upper(day_of_week), lower(day_of_week), has_lecture from __mock_table_schedule_2023;
2+
select day_of_week, upper(day_of_week), lower(day_of_week), has_lecture from __mock_table_schedule;
33
----
44
Monday MONDAY monday 1
55
Tuesday TUESDAY tuesday 0

test/sql/p3.00-primer.slt

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
query rowsort
2-
select github_id, office_hour from __mock_table_tas_2023_fall;
2+
select github_id, office_hour from __mock_table_tas_2024;
33
----
4-
skyzh Randomly
5-
yliang412 Tuesday
6-
fernandolis10 Wednesday
7-
wiam8 Tuesday
8-
anurag-23 Thursday
9-
Mayank-Baranwal Tuesday
10-
abigalekim Friday
11-
ChaosZhai Yesterday
12-
aoleizhou Friday
4+
AlSchlo Friday
5+
walkingcabbages Thursday
136
averyqi115 Friday
14-
kswim8 Never
7+
lanlou1554 Wednesday
8+
sweetsuro Thursday
9+
ChaosZhai Yesterday
10+
SDTheSlayer Monday
11+
xx01cyx Tuesday
12+
yliang412 Tuesday
13+
thelongmarch-azx Monday

test/sql/p3.07-simple-agg.slt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# 4 pts
22

3-
# How many TAs are there in 2023 Fall?
3+
# How many TAs are there in 2024 Spring?
44
query
5-
select count(*) from __mock_table_tas_2023_fall;
5+
select count(*) from __mock_table_tas_2024;
66
----
7-
11
7+
10
88

99
# The real test process begins...
1010

0 commit comments

Comments
 (0)