Skip to content

Commit 04ddafb

Browse files
authored
feat: add btree bench (#521)
* feat: add btree bench Signed-off-by: Alex Chi <[email protected]> * scan range key Signed-off-by: Alex Chi <[email protected]> * run Signed-off-by: Alex Chi <[email protected]> --------- Signed-off-by: Alex Chi <[email protected]>
1 parent fd2a01b commit 04ddafb

File tree

5 files changed

+291
-6
lines changed

5 files changed

+291
-6
lines changed

test/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ file(GLOB_RECURSE BUSTUB_TEST_SOURCES "${PROJECT_SOURCE_DIR}/test/*/*test.cpp")
1313
# #########################################
1414
add_custom_target(build-tests COMMAND ${CMAKE_CTEST_COMMAND} --show-only)
1515
add_custom_target(check-tests COMMAND ${CMAKE_CTEST_COMMAND} --verbose)
16-
add_custom_target(check-public-ci-tests COMMAND ${CMAKE_CTEST_COMMAND} --verbose -E "\"SQLLogicTest|Trie\"")
16+
add_custom_target(check-public-ci-tests COMMAND ${CMAKE_CTEST_COMMAND} --verbose -E "\"SQLLogicTest|Trie|BPlusTreeContentionTest\"")
1717

1818
# #########################################
1919
# "make XYZ_test"

test/storage/b_plus_tree_contention_test.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,13 @@ bool BPlusTreeLockBenchmarkCall(size_t num_threads, int leaf_node_size, bool wit
7676
return success;
7777
}
7878

79-
TEST(BPlusTreeTest, DISABLED_BPlusTreeContentionBenchmark) { // NOLINT
79+
TEST(BPlusTreeContentionTest, BPlusTreeContentionBenchmark) { // NOLINT
80+
std::cout << "This test will see how your B+ tree performance differs with and without contention." << std::endl;
81+
std::cout << "If your submission timeout, segfault, or didn't implement lock crabbing, we will manually deduct all "
82+
"concurrent test points (maximum 25)."
83+
<< std::endl;
84+
std::cout << "left_node_size = 2" << std::endl;
85+
8086
std::vector<size_t> time_ms_with_mutex;
8187
std::vector<size_t> time_ms_wo_mutex;
8288
for (size_t iter = 0; iter < 20; iter++) {
@@ -91,7 +97,7 @@ TEST(BPlusTreeTest, DISABLED_BPlusTreeContentionBenchmark) { // NOLINT
9197
time_ms_wo_mutex.push_back(dur.count());
9298
}
9399
}
94-
std::cout << "This test will see how your B+ tree performance differs with and without contention." << std::endl;
100+
95101
std::cout << "<<< BEGIN" << std::endl;
96102
std::cout << "Normal Access Time: ";
97103
double ratio_1 = 0;
@@ -116,7 +122,13 @@ TEST(BPlusTreeTest, DISABLED_BPlusTreeContentionBenchmark) { // NOLINT
116122
<< std::endl;
117123
}
118124

119-
TEST(BPlusTreeTest, DISABLED_BPlusTreeContentionBenchmark2) { // NOLINT
125+
TEST(BPlusTreeContentionTest, BPlusTreeContentionBenchmark2) { // NOLINT
126+
std::cout << "This test will see how your B+ tree performance differs with and without contention." << std::endl;
127+
std::cout << "If your submission timeout, segfault, or didn't implement lock crabbing, we will manually deduct all "
128+
"concurrent test points (maximum 25)."
129+
<< std::endl;
130+
std::cout << "left_node_size = 10" << std::endl;
131+
120132
std::vector<size_t> time_ms_with_mutex;
121133
std::vector<size_t> time_ms_wo_mutex;
122134
for (size_t iter = 0; iter < 20; iter++) {
@@ -131,7 +143,7 @@ TEST(BPlusTreeTest, DISABLED_BPlusTreeContentionBenchmark2) { // NOLINT
131143
time_ms_wo_mutex.push_back(dur.count());
132144
}
133145
}
134-
std::cout << "This test will see how your B+ tree performance differs with and without contention." << std::endl;
146+
135147
std::cout << "<<< BEGIN2" << std::endl;
136148
std::cout << "Normal Access Time: ";
137149
double ratio_1 = 0;

tools/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ add_subdirectory(wasm-shell)
44
add_subdirectory(b_plus_tree_printer)
55
add_subdirectory(wasm-bpt-printer)
66
add_subdirectory(terrier_bench)
7-
add_subdirectory(bpm_bench)
7+
add_subdirectory(bpm_bench)
8+
add_subdirectory(btree_bench)

tools/btree_bench/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
set(BTREE_BENCH_SOURCES btree_bench.cpp)
2+
add_executable(btree-bench ${BTREE_BENCH_SOURCES})
3+
4+
target_link_libraries(btree-bench bustub)
5+
set_target_properties(btree-bench PROPERTIES OUTPUT_NAME bustub-btree-bench)

tools/btree_bench/btree_bench.cpp

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
#include <chrono>
2+
#include <iostream>
3+
#include <memory>
4+
#include <mutex> // NOLINT
5+
#include <random>
6+
#include <sstream>
7+
#include <string>
8+
#include <thread>
9+
#include <vector>
10+
11+
#include <cpp_random_distributions/zipfian_int_distribution.h>
12+
13+
#include "argparse/argparse.hpp"
14+
#include "binder/binder.h"
15+
#include "buffer/buffer_pool_manager.h"
16+
#include "buffer/lru_k_replacer.h"
17+
#include "common/config.h"
18+
#include "common/exception.h"
19+
#include "common/rid.h"
20+
#include "common/util/string_util.h"
21+
#include "fmt/format.h"
22+
#include "storage/disk/disk_manager_memory.h"
23+
#include "storage/index/b_plus_tree.h"
24+
#include "storage/index/generic_key.h"
25+
#include "test_util.h"
26+
27+
#include <sys/time.h>
28+
29+
auto ClockMs() -> uint64_t {
30+
struct timeval tm;
31+
gettimeofday(&tm, nullptr);
32+
return static_cast<uint64_t>(tm.tv_sec * 1000) + static_cast<uint64_t>(tm.tv_usec / 1000);
33+
}
34+
35+
static const size_t BUSTUB_READ_THREAD = 4;
36+
static const size_t BUSTUB_WRITE_THREAD = 2;
37+
static const size_t LRU_K_SIZE = 4;
38+
static const size_t BUSTUB_BPM_SIZE = 256;
39+
static const size_t TOTAL_KEYS = 100000;
40+
static const size_t KEY_MODIFY_RANGE = 2048;
41+
42+
struct BTreeTotalMetrics {
43+
uint64_t write_cnt_{0};
44+
uint64_t read_cnt_{0};
45+
uint64_t start_time_{0};
46+
std::mutex mutex_;
47+
48+
void Begin() { start_time_ = ClockMs(); }
49+
50+
void ReportWrite(uint64_t scan_cnt) {
51+
std::unique_lock<std::mutex> l(mutex_);
52+
write_cnt_ += scan_cnt;
53+
}
54+
55+
void ReportRead(uint64_t get_cnt) {
56+
std::unique_lock<std::mutex> l(mutex_);
57+
read_cnt_ += get_cnt;
58+
}
59+
60+
void Report() {
61+
auto now = ClockMs();
62+
auto elsped = now - start_time_;
63+
auto write_per_sec = write_cnt_ / static_cast<double>(elsped) * 1000;
64+
auto read_per_sec = read_cnt_ / static_cast<double>(elsped) * 1000;
65+
66+
fmt::print("<<< BEGIN\n");
67+
fmt::print("write: {}\n", write_per_sec);
68+
fmt::print("read: {}\n", read_per_sec);
69+
fmt::print(">>> END\n");
70+
}
71+
};
72+
73+
struct BTreeMetrics {
74+
uint64_t start_time_{0};
75+
uint64_t last_report_at_{0};
76+
uint64_t last_cnt_{0};
77+
uint64_t cnt_{0};
78+
std::string reporter_;
79+
uint64_t duration_ms_;
80+
81+
explicit BTreeMetrics(std::string reporter, uint64_t duration_ms)
82+
: reporter_(std::move(reporter)), duration_ms_(duration_ms) {}
83+
84+
void Tick() { cnt_ += 1; }
85+
86+
void Begin() { start_time_ = ClockMs(); }
87+
88+
void Report() {
89+
auto now = ClockMs();
90+
auto elsped = now - start_time_;
91+
if (elsped - last_report_at_ > 1000) {
92+
fmt::print(stderr, "[{:5.2f}] {}: total_cnt={:<10} throughput={:<10.3f} avg_throughput={:<10.3f}\n",
93+
elsped / 1000.0, reporter_, cnt_,
94+
(cnt_ - last_cnt_) / static_cast<double>(elsped - last_report_at_) * 1000,
95+
cnt_ / static_cast<double>(elsped) * 1000);
96+
last_report_at_ = elsped;
97+
last_cnt_ = cnt_;
98+
}
99+
}
100+
101+
auto ShouldFinish() -> bool {
102+
auto now = ClockMs();
103+
return now - start_time_ > duration_ms_;
104+
}
105+
};
106+
107+
// These keys will be deleted and inserted again
108+
auto KeyWillVanish(size_t key) -> bool { return key % 7 == 0; }
109+
110+
// These keys will be overwritten to a new value
111+
auto KeyWillChange(size_t key) -> bool { return key % 5 == 0; }
112+
113+
// NOLINTNEXTLINE
114+
auto main(int argc, char **argv) -> int {
115+
using bustub::AccessType;
116+
using bustub::BufferPoolManager;
117+
using bustub::DiskManagerUnlimitedMemory;
118+
using bustub::page_id_t;
119+
120+
argparse::ArgumentParser program("bustub-btree-bench");
121+
program.add_argument("--duration").help("run btree bench for n milliseconds");
122+
123+
try {
124+
program.parse_args(argc, argv);
125+
} catch (const std::runtime_error &err) {
126+
std::cerr << err.what() << std::endl;
127+
std::cerr << program;
128+
return 1;
129+
}
130+
131+
uint64_t duration_ms = 30000;
132+
if (program.present("--duration")) {
133+
duration_ms = std::stoi(program.get("--duration"));
134+
}
135+
136+
auto disk_manager = std::make_unique<DiskManagerUnlimitedMemory>();
137+
auto bpm = std::make_unique<BufferPoolManager>(BUSTUB_BPM_SIZE, disk_manager.get(), LRU_K_SIZE);
138+
139+
fmt::print(stderr, "[info] total_keys={}, duration_ms={}, lru_k_size={}, bpm_size={}\n", TOTAL_KEYS, duration_ms,
140+
LRU_K_SIZE, BUSTUB_BPM_SIZE);
141+
142+
auto key_schema = bustub::ParseCreateStatement("a bigint");
143+
bustub::GenericComparator<8> comparator(key_schema.get());
144+
145+
page_id_t page_id;
146+
auto header_page = bpm->NewPageGuarded(&page_id);
147+
148+
bustub::BPlusTree<bustub::GenericKey<8>, bustub::RID, bustub::GenericComparator<8>> index("foo_pk", page_id,
149+
bpm.get(), comparator);
150+
151+
for (size_t key = 0; key < TOTAL_KEYS; key++) {
152+
bustub::GenericKey<8> index_key;
153+
bustub::RID rid;
154+
uint32_t value = key;
155+
rid.Set(value, value);
156+
index_key.SetFromInteger(key);
157+
index.Insert(index_key, rid, nullptr);
158+
}
159+
160+
fmt::print(stderr, "[info] benchmark start\n");
161+
162+
BTreeTotalMetrics total_metrics;
163+
total_metrics.Begin();
164+
165+
std::vector<std::thread> threads;
166+
167+
for (size_t thread_id = 0; thread_id < BUSTUB_READ_THREAD; thread_id++) {
168+
threads.emplace_back(std::thread([thread_id, &index, duration_ms, &total_metrics] {
169+
BTreeMetrics metrics(fmt::format("read {:>2}", thread_id), duration_ms);
170+
metrics.Begin();
171+
172+
size_t key_start = TOTAL_KEYS / BUSTUB_READ_THREAD * thread_id;
173+
size_t key_end = TOTAL_KEYS / BUSTUB_READ_THREAD * (thread_id + 1);
174+
std::random_device r;
175+
std::default_random_engine gen(r());
176+
std::uniform_int_distribution<size_t> dis(key_start, key_end - 1);
177+
178+
bustub::GenericKey<8> index_key;
179+
std::vector<bustub::RID> rids;
180+
181+
while (!metrics.ShouldFinish()) {
182+
auto base_key = dis(gen);
183+
size_t cnt = 0;
184+
for (auto key = base_key; key < key_end && cnt < KEY_MODIFY_RANGE; key++, cnt++) {
185+
rids.clear();
186+
index_key.SetFromInteger(key);
187+
index.GetValue(index_key, &rids);
188+
189+
if (!KeyWillVanish(key) && rids.empty()) {
190+
std::string msg = fmt::format("key not found: {}", key);
191+
throw std::runtime_error(msg);
192+
}
193+
194+
if (!KeyWillVanish(key) && !KeyWillChange(key)) {
195+
if (rids.size() != 1) {
196+
std::string msg = fmt::format("key not found: {}", key);
197+
throw std::runtime_error(msg);
198+
}
199+
if (static_cast<size_t>(rids[0].GetPageId()) != key || static_cast<size_t>(rids[0].GetSlotNum()) != key) {
200+
std::string msg = fmt::format("invalid data: {} -> {}", key, rids[0].Get());
201+
throw std::runtime_error(msg);
202+
}
203+
}
204+
metrics.Tick();
205+
metrics.Report();
206+
}
207+
}
208+
209+
total_metrics.ReportRead(metrics.cnt_);
210+
}));
211+
}
212+
213+
for (size_t thread_id = 0; thread_id < BUSTUB_WRITE_THREAD; thread_id++) {
214+
threads.emplace_back(std::thread([thread_id, &index, duration_ms, &total_metrics] {
215+
BTreeMetrics metrics(fmt::format("write {:>2}", thread_id), duration_ms);
216+
metrics.Begin();
217+
218+
size_t key_start = TOTAL_KEYS / BUSTUB_WRITE_THREAD * thread_id;
219+
size_t key_end = TOTAL_KEYS / BUSTUB_WRITE_THREAD * (thread_id + 1);
220+
std::random_device r;
221+
std::default_random_engine gen(r());
222+
std::uniform_int_distribution<size_t> dis(key_start, key_end - 1);
223+
224+
bustub::GenericKey<8> index_key;
225+
bustub::RID rid;
226+
227+
bool do_insert = false;
228+
229+
while (!metrics.ShouldFinish()) {
230+
auto base_key = dis(gen);
231+
size_t cnt = 0;
232+
for (auto key = base_key; key < key_end && cnt < KEY_MODIFY_RANGE; key++, cnt++) {
233+
if (KeyWillVanish(key)) {
234+
uint32_t value = key;
235+
rid.Set(value, value);
236+
index_key.SetFromInteger(key);
237+
if (do_insert) {
238+
index.Insert(index_key, rid, nullptr);
239+
} else {
240+
index.Remove(index_key, nullptr);
241+
}
242+
metrics.Tick();
243+
metrics.Report();
244+
} else if (KeyWillChange(key)) {
245+
uint32_t value = key;
246+
rid.Set(value, dis(gen));
247+
index_key.SetFromInteger(key);
248+
index.Insert(index_key, rid, nullptr);
249+
metrics.Tick();
250+
metrics.Report();
251+
}
252+
}
253+
do_insert = !do_insert;
254+
}
255+
256+
total_metrics.ReportWrite(metrics.cnt_);
257+
}));
258+
}
259+
260+
for (auto &thread : threads) {
261+
thread.join();
262+
}
263+
264+
total_metrics.Report();
265+
266+
return 0;
267+
}

0 commit comments

Comments
 (0)