Skip to content

Commit 94e02b5

Browse files
authored
feat(p3): add leaderboard tests (#441)
Signed-off-by: Alex Chi <[email protected]>
1 parent 1997bae commit 94e02b5

File tree

11 files changed

+324
-71
lines changed

11 files changed

+324
-71
lines changed

src/execution/mock_scan_executor.cpp

Lines changed: 180 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "execution/executors/mock_scan_executor.h"
14+
#include <algorithm>
15+
#include <random>
1416

1517
#include "common/exception.h"
1618
#include "common/util/string_util.h"
@@ -31,16 +33,15 @@ static const char *course_on_date[] = {"Monday", "Tuesday", "Wednesday", "Thursd
3133

3234
static int course_on_bool[] = {0, 1, 0, 1, 0, 1, 1};
3335

34-
const char *mock_table_list[] = {"__mock_table_1",
35-
"__mock_table_2",
36-
"__mock_table_3",
37-
"__mock_table_tas_2022",
38-
"__mock_agg_input_small",
39-
"__mock_agg_input_big",
40-
"__mock_table_schedule_2022",
41-
"__mock_table_123",
42-
"__mock_graph",
43-
nullptr};
36+
const char *mock_table_list[] = {"__mock_table_1", "__mock_table_2", "__mock_table_3", "__mock_table_tas_2022",
37+
"__mock_agg_input_small", "__mock_agg_input_big", "__mock_table_schedule_2022",
38+
"__mock_table_123", "__mock_graph",
39+
// For leaderboard Q1
40+
"__mock_t1_50k", "__mock_t2_100k", "__mock_t3_1k",
41+
// For leaderboard Q2
42+
"__mock_t4_1m", "__mock_t5_1m", "__mock_t6_1m",
43+
// For leaderboard Q3
44+
"__mock_t7", "__mock_t8", nullptr};
4445

4546
static const int GRAPH_NODE_CNT = 10;
4647

@@ -81,51 +82,111 @@ auto GetMockTableSchemaOf(const std::string &table) -> Schema {
8182
return Schema{std::vector{Column{"number", TypeId::INTEGER}}};
8283
}
8384

85+
if (table == "__mock_t1_50k" || table == "__mock_t2_100k" || table == "__mock_t3_1k" || table == "__mock_t4_1m" ||
86+
table == "__mock_t5_1m" || table == "__mock_t6_1m") {
87+
return Schema{std::vector{Column{"x", TypeId::INTEGER}, Column{"y", TypeId::INTEGER}}};
88+
}
89+
90+
if (table == "__mock_t7") {
91+
return Schema{
92+
std::vector{Column{"v", TypeId::INTEGER}, Column{"v1", TypeId::INTEGER}, Column{"v2", TypeId::INTEGER}}};
93+
}
94+
95+
if (table == "__mock_t8") {
96+
return Schema{std::vector{Column{"v4", TypeId::INTEGER}}};
97+
}
98+
8499
throw bustub::Exception(fmt::format("mock table {} not found", table));
85100
}
86101

87102
auto GetSizeOf(const MockScanPlanNode *plan) -> size_t {
88-
if (plan->GetTable() == "__mock_table_1") {
103+
const auto &table = plan->GetTable();
104+
105+
if (table == "__mock_table_1") {
89106
return 100;
90107
}
91108

92-
if (plan->GetTable() == "__mock_table_2") {
109+
if (table == "__mock_table_2") {
93110
return 100;
94111
}
95112

96-
if (plan->GetTable() == "__mock_table_3") {
113+
if (table == "__mock_table_3") {
97114
return 100;
98115
}
99116

100-
if (plan->GetTable() == "__mock_table_tas_2022") {
117+
if (table == "__mock_table_tas_2022") {
101118
return sizeof(ta_list_2022) / sizeof(ta_list_2022[0]);
102119
}
103120

104-
if (plan->GetTable() == "__mock_table_schedule_2022") {
121+
if (table == "__mock_table_schedule_2022") {
105122
return sizeof(course_on_date) / sizeof(course_on_date[0]);
106123
}
107124

108-
if (plan->GetTable() == "__mock_agg_input_small") {
125+
if (table == "__mock_agg_input_small") {
109126
return 1000;
110127
}
111128

112-
if (plan->GetTable() == "__mock_agg_input_big") {
129+
if (table == "__mock_agg_input_big") {
113130
return 10000;
114131
}
115132

116-
if (plan->GetTable() == "__mock_graph") {
133+
if (table == "__mock_graph") {
117134
return GRAPH_NODE_CNT * GRAPH_NODE_CNT;
118135
}
119136

120-
if (plan->GetTable() == "__mock_table_123") {
137+
if (table == "__mock_table_123") {
121138
return 3;
122139
}
123140

124-
return 100;
141+
if (table == "__mock_t1_50k") {
142+
return 50000;
143+
}
144+
145+
if (table == "__mock_t2_100k") {
146+
return 100000;
147+
}
148+
149+
if (table == "__mock_t3_1k") {
150+
return 1000;
151+
}
152+
153+
if (table == "__mock_t4_1m" || table == "__mock_t5_1m" || table == "__mock_t6_1m") {
154+
return 1000000;
155+
}
156+
157+
if (table == "__mock_t7") {
158+
return 1000000;
159+
}
160+
161+
if (table == "__mock_t8") {
162+
return 10;
163+
}
164+
165+
return 0;
166+
}
167+
168+
auto GetShuffled(const MockScanPlanNode *plan) -> bool {
169+
const auto &table = plan->GetTable();
170+
171+
if (table == "__mock_t1_50k") {
172+
return true;
173+
}
174+
175+
if (table == "__mock_t2_100k") {
176+
return true;
177+
}
178+
179+
if (table == "__mock_t3_1k") {
180+
return true;
181+
}
182+
183+
return false;
125184
}
126185

127186
auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function<Tuple(size_t)> {
128-
if (plan->GetTable() == "__mock_table_1") {
187+
const auto &table = plan->GetTable();
188+
189+
if (table == "__mock_table_1") {
129190
return [plan](size_t cursor) {
130191
std::vector<Value> values{};
131192
values.reserve(2);
@@ -135,7 +196,7 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function<Tuple(size_t)>
135196
};
136197
}
137198

138-
if (plan->GetTable() == "__mock_table_2") {
199+
if (table == "__mock_table_2") {
139200
return [plan](size_t cursor) {
140201
std::vector<Value> values{};
141202
values.reserve(2);
@@ -146,7 +207,7 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function<Tuple(size_t)>
146207
};
147208
}
148209

149-
if (plan->GetTable() == "__mock_table_3") {
210+
if (table == "__mock_table_3") {
150211
return [plan](size_t cursor) {
151212
std::vector<Value> values{};
152213
values.reserve(2);
@@ -160,7 +221,7 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function<Tuple(size_t)>
160221
};
161222
}
162223

163-
if (plan->GetTable() == "__mock_table_tas_2022") {
224+
if (table == "__mock_table_tas_2022") {
164225
return [plan](size_t cursor) {
165226
std::vector<Value> values{};
166227
values.push_back(ValueFactory::GetVarcharValue(ta_list_2022[cursor]));
@@ -169,7 +230,7 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function<Tuple(size_t)>
169230
};
170231
}
171232

172-
if (plan->GetTable() == "__mock_table_schedule_2022") {
233+
if (table == "__mock_table_schedule_2022") {
173234
return [plan](size_t cursor) {
174235
std::vector<Value> values{};
175236
values.push_back(ValueFactory::GetVarcharValue(course_on_date[cursor]));
@@ -178,7 +239,7 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function<Tuple(size_t)>
178239
};
179240
}
180241

181-
if (plan->GetTable() == "__mock_agg_input_small") {
242+
if (table == "__mock_agg_input_small") {
182243
return [plan](size_t cursor) {
183244
std::vector<Value> values{};
184245
values.push_back(ValueFactory::GetIntegerValue((cursor + 2) % 10));
@@ -192,7 +253,7 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function<Tuple(size_t)>
192253
};
193254
}
194255

195-
if (plan->GetTable() == "__mock_agg_input_big") {
256+
if (table == "__mock_agg_input_big") {
196257
return [plan](size_t cursor) {
197258
std::vector<Value> values{};
198259
values.push_back(ValueFactory::GetIntegerValue((cursor + 2) % 10));
@@ -206,15 +267,15 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function<Tuple(size_t)>
206267
};
207268
}
208269

209-
if (plan->GetTable() == "__mock_table_123") {
270+
if (table == "__mock_table_123") {
210271
return [plan](size_t cursor) {
211272
std::vector<Value> values{};
212273
values.push_back(ValueFactory::GetIntegerValue(cursor + 1));
213274
return Tuple{values, &plan->OutputSchema()};
214275
};
215276
}
216277

217-
if (plan->GetTable() == "__mock_graph") {
278+
if (table == "__mock_graph") {
218279
return [plan](size_t cursor) {
219280
std::vector<Value> values{};
220281
int src = cursor % GRAPH_NODE_CNT;
@@ -232,6 +293,81 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function<Tuple(size_t)>
232293
};
233294
}
234295

296+
if (table == "__mock_t1_50k") {
297+
return [plan](size_t cursor) {
298+
std::vector<Value> values{};
299+
values.push_back(ValueFactory::GetIntegerValue(cursor * 10));
300+
values.push_back(ValueFactory::GetIntegerValue(cursor * 1000));
301+
return Tuple{values, &plan->OutputSchema()};
302+
};
303+
}
304+
305+
if (table == "__mock_t2_100k") {
306+
return [plan](size_t cursor) {
307+
std::vector<Value> values{};
308+
values.push_back(ValueFactory::GetIntegerValue(cursor));
309+
values.push_back(ValueFactory::GetIntegerValue(cursor * 100));
310+
return Tuple{values, &plan->OutputSchema()};
311+
};
312+
}
313+
314+
if (table == "__mock_t3_1k") {
315+
return [plan](size_t cursor) {
316+
std::vector<Value> values{};
317+
values.push_back(ValueFactory::GetIntegerValue(cursor * 100));
318+
values.push_back(ValueFactory::GetIntegerValue(cursor * 10000));
319+
return Tuple{values, &plan->OutputSchema()};
320+
};
321+
}
322+
323+
if (table == "__mock_t4_1m") {
324+
return [plan](size_t cursor) {
325+
std::vector<Value> values{};
326+
cursor = cursor % 500000;
327+
values.push_back(ValueFactory::GetIntegerValue(cursor));
328+
values.push_back(ValueFactory::GetIntegerValue(cursor * 10));
329+
return Tuple{values, &plan->OutputSchema()};
330+
};
331+
}
332+
333+
if (table == "__mock_t5_1m") {
334+
return [plan](size_t cursor) {
335+
std::vector<Value> values{};
336+
cursor = (cursor + 30000) % 500000;
337+
values.push_back(ValueFactory::GetIntegerValue(cursor));
338+
values.push_back(ValueFactory::GetIntegerValue(cursor * 10));
339+
return Tuple{values, &plan->OutputSchema()};
340+
};
341+
}
342+
343+
if (table == "__mock_t6_1m") {
344+
return [plan](size_t cursor) {
345+
std::vector<Value> values{};
346+
cursor = (cursor + 60000) % 500000;
347+
values.push_back(ValueFactory::GetIntegerValue(cursor));
348+
values.push_back(ValueFactory::GetIntegerValue(cursor * 10));
349+
return Tuple{values, &plan->OutputSchema()};
350+
};
351+
}
352+
353+
if (table == "__mock_t7") {
354+
return [plan](size_t cursor) {
355+
std::vector<Value> values{};
356+
values.push_back(ValueFactory::GetIntegerValue(cursor % 20));
357+
values.push_back(ValueFactory::GetIntegerValue(cursor));
358+
values.push_back(ValueFactory::GetIntegerValue(cursor));
359+
return Tuple{values, &plan->OutputSchema()};
360+
};
361+
}
362+
363+
if (table == "__mock_t8") {
364+
return [plan](size_t cursor) {
365+
std::vector<Value> values{};
366+
values.push_back(ValueFactory::GetIntegerValue(cursor));
367+
return Tuple{values, &plan->OutputSchema()};
368+
};
369+
}
370+
235371
// By default, return table of all 0.
236372
return [plan](size_t cursor) {
237373
std::vector<Value> values{};
@@ -244,7 +380,16 @@ auto GetFunctionOf(const MockScanPlanNode *plan) -> std::function<Tuple(size_t)>
244380
}
245381

246382
MockScanExecutor::MockScanExecutor(ExecutorContext *exec_ctx, const MockScanPlanNode *plan)
247-
: AbstractExecutor{exec_ctx}, plan_{plan}, func_(GetFunctionOf(plan)), size_(GetSizeOf(plan)) {}
383+
: AbstractExecutor{exec_ctx}, plan_{plan}, func_(GetFunctionOf(plan)), size_(GetSizeOf(plan)) {
384+
if (GetShuffled(plan)) {
385+
for (size_t i = 0; i < size_; i++) {
386+
shuffled_idx_.push_back(i);
387+
}
388+
std::random_device rd;
389+
std::mt19937 g(rd());
390+
std::shuffle(shuffled_idx_.begin(), shuffled_idx_.end(), g);
391+
}
392+
}
248393

249394
void MockScanExecutor::Init() {
250395
// Reset the cursor
@@ -256,7 +401,11 @@ auto MockScanExecutor::Next(Tuple *tuple, RID *rid) -> bool {
256401
// Scan complete
257402
return EXECUTOR_EXHAUSTED;
258403
}
259-
*tuple = func_(cursor_);
404+
if (shuffled_idx_.empty()) {
405+
*tuple = func_(cursor_);
406+
} else {
407+
*tuple = func_(shuffled_idx_[cursor_]);
408+
}
260409
++cursor_;
261410
*rid = MakeDummyRID();
262411
return EXECUTOR_ACTIVE;

src/include/common/bustub_instance.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,19 @@ class ResultWriter {
5757
bool simplified_output_{false};
5858
};
5959

60+
class NoopWriter : public ResultWriter {
61+
public:
62+
NoopWriter() = default;
63+
void WriteCell(const std::string &cell) override {}
64+
void WriteHeaderCell(const std::string &cell) override {}
65+
void BeginHeader() override {}
66+
void EndHeader() override {}
67+
void BeginRow() override {}
68+
void EndRow() override {}
69+
void BeginTable(bool simplified_output) override {}
70+
void EndTable() override {}
71+
};
72+
6073
class SimpleStreamWriter : public ResultWriter {
6174
public:
6275
explicit SimpleStreamWriter(std::ostream &stream, bool disable_header = false, const char *separator = "\t")

src/include/execution/executors/mock_scan_executor.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ class MockScanExecutor : public AbstractExecutor {
7676

7777
/** The size of the mock table */
7878
std::size_t size_;
79+
80+
/** The shuffled output */
81+
std::vector<size_t> shuffled_idx_;
7982
};
8083

8184
} // namespace bustub

src/include/optimizer/optimizer.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,15 @@ class Optimizer {
8282
*/
8383
auto OptimizeSortLimitAsTopN(const AbstractPlanNodeRef &plan) -> AbstractPlanNodeRef;
8484

85+
/**
86+
* @brief get the estimated cardinality for a table based on the table name. Useful when join reordering. BusTub
87+
* doesn't support statistics for now, so it's the only way for you to get the table size :(
88+
*
89+
* @param table_name
90+
* @return std::optional<size_t>
91+
*/
92+
auto EstimatedCardinality(const std::string &table_name) -> std::optional<size_t>;
93+
8594
/** Catalog will be used during the planning process. USERS SHOULD ENSURE IT OUTLIVES
8695
* OPTIMIZER, otherwise it's a dangling reference.
8796
*/

0 commit comments

Comments
 (0)