Skip to content

Commit 29cfa2a

Browse files
skyzhxzhseh
andauthored
p1: improve bpm bench (#604)
Signed-off-by: Alex Chi <[email protected]> Co-authored-by: Xu <[email protected]>
1 parent 04537ff commit 29cfa2a

File tree

4 files changed

+159
-50
lines changed

4 files changed

+159
-50
lines changed

src/include/buffer/lru_k_replacer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
namespace bustub {
2525

26-
enum class AccessType { Unknown = 0, Get, Scan };
26+
enum class AccessType { Unknown = 0, Lookup, Scan, Index };
2727

2828
class LRUKNode {
2929
private:

src/include/storage/disk/disk_manager_memory.h

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212
#include <array>
13+
#include <chrono> // NOLINT
1314
#include <cstring>
1415
#include <fstream>
1516
#include <future> // NOLINT
@@ -25,6 +26,7 @@
2526
#include "common/config.h"
2627
#include "common/exception.h"
2728
#include "common/logger.h"
29+
#include "fmt/core.h"
2830
#include "storage/disk/disk_manager.h"
2931

3032
namespace bustub {
@@ -63,17 +65,15 @@ class DiskManagerMemory : public DiskManager {
6365
*/
6466
class DiskManagerUnlimitedMemory : public DiskManager {
6567
public:
66-
DiskManagerUnlimitedMemory() = default;
68+
DiskManagerUnlimitedMemory() { std::fill(recent_access_.begin(), recent_access_.end(), -1); }
6769

6870
/**
6971
* Write a page to the database file.
7072
* @param page_id id of the page
7173
* @param page_data raw page data
7274
*/
7375
void WritePage(page_id_t page_id, const char *page_data) override {
74-
if (latency_ > 0) {
75-
std::this_thread::sleep_for(std::chrono::milliseconds(latency_));
76-
}
76+
ProcessLatency(page_id);
7777

7878
std::unique_lock<std::mutex> l(mutex_);
7979
if (page_id >= static_cast<int>(data_.size())) {
@@ -87,6 +87,8 @@ class DiskManagerUnlimitedMemory : public DiskManager {
8787
l.unlock();
8888

8989
memcpy(ptr->first.data(), page_data, BUSTUB_PAGE_SIZE);
90+
91+
PostProcessLatency(page_id);
9092
}
9193

9294
/**
@@ -95,34 +97,69 @@ class DiskManagerUnlimitedMemory : public DiskManager {
9597
* @param[out] page_data output buffer
9698
*/
9799
void ReadPage(page_id_t page_id, char *page_data) override {
98-
if (latency_ > 0) {
99-
std::this_thread::sleep_for(std::chrono::milliseconds(latency_));
100-
}
100+
ProcessLatency(page_id);
101101

102102
std::unique_lock<std::mutex> l(mutex_);
103103
if (page_id >= static_cast<int>(data_.size()) || page_id < 0) {
104-
LOG_WARN("page not exist");
104+
fmt::println(stderr, "page {} not in range", page_id);
105+
std::terminate();
105106
return;
106107
}
107108
if (data_[page_id] == nullptr) {
108-
LOG_WARN("page not exist");
109+
fmt::println(stderr, "page {} not exist", page_id);
110+
std::terminate();
109111
return;
110112
}
111113
std::shared_ptr<ProtectedPage> ptr = data_[page_id];
112114
std::shared_lock<std::shared_mutex> l_page(ptr->second);
113115
l.unlock();
114116

115117
memcpy(page_data, ptr->first.data(), BUSTUB_PAGE_SIZE);
118+
119+
PostProcessLatency(page_id);
116120
}
117121

118-
void SetLatency(size_t latency_ms) { latency_ = latency_ms; }
122+
void ProcessLatency(page_id_t page_id) {
123+
uint64_t sleep_micro_sec = 1000; // for random access, 1ms latency
124+
if (latency_simulator_enabled_) {
125+
std::unique_lock<std::mutex> lck(latency_processor_mutex_);
126+
for (auto &recent_page_id : recent_access_) {
127+
if ((recent_page_id & (~0x3)) == (page_id & (~0x3))) {
128+
sleep_micro_sec = 100; // for access in the same "block", 0.1ms latency
129+
break;
130+
}
131+
if (page_id >= recent_page_id && page_id <= recent_page_id + 3) {
132+
sleep_micro_sec = 100; // for sequential access, 0.1ms latency
133+
break;
134+
}
135+
}
136+
lck.unlock();
137+
std::this_thread::sleep_for(std::chrono::microseconds(sleep_micro_sec));
138+
}
139+
}
140+
141+
void PostProcessLatency(page_id_t page_id) {
142+
if (latency_simulator_enabled_) {
143+
std::scoped_lock<std::mutex> lck(latency_processor_mutex_);
144+
recent_access_[access_ptr_] = page_id;
145+
access_ptr_ = (access_ptr_ + 1) % recent_access_.size();
146+
}
147+
}
148+
149+
void EnableLatencySimulator(bool enabled) { latency_simulator_enabled_ = enabled; }
119150

120151
private:
121-
std::mutex mutex_;
152+
bool latency_simulator_enabled_{false};
153+
154+
std::mutex latency_processor_mutex_;
155+
std::array<page_id_t, 4> recent_access_;
156+
uint64_t access_ptr_{0};
157+
122158
using Page = std::array<char, BUSTUB_PAGE_SIZE>;
123159
using ProtectedPage = std::pair<Page, std::shared_mutex>;
160+
161+
std::mutex mutex_;
124162
std::vector<std::shared_ptr<ProtectedPage>> data_;
125-
size_t latency_{0};
126163
};
127164

128165
} // namespace bustub

tools/bpm_bench/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
set(BPM_BENCH_SOURCES bpm_bench.cpp)
2-
add_executable(bpm-bench ${BPM_BENCH_SOURCES})
2+
add_executable(bpm-bench ${BPM_BENCH_SOURCES} "${PROJECT_SOURCE_DIR}/tools/backtrace.cpp")
3+
add_backward(bpm-bench)
34

45
target_link_libraries(bpm-bench bustub)
56
set_target_properties(bpm-bench PROPERTIES OUTPUT_NAME bustub-bpm-bench)

tools/bpm_bench/bpm_bench.cpp

Lines changed: 107 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
#include <chrono>
2+
#include <exception>
23
#include <iostream>
34
#include <memory>
45
#include <mutex> // NOLINT
56
#include <random>
67
#include <sstream>
78
#include <string>
89
#include <thread>
10+
#include <unordered_map>
911
#include <vector>
1012

1113
#include <cpp_random_distributions/zipfian_int_distribution.h>
@@ -29,12 +31,6 @@ auto ClockMs() -> uint64_t {
2931
return static_cast<uint64_t>(tm.tv_sec * 1000) + static_cast<uint64_t>(tm.tv_usec / 1000);
3032
}
3133

32-
static const size_t BUSTUB_SCAN_THREAD = 8;
33-
static const size_t BUSTUB_GET_THREAD = 8;
34-
static const size_t LRU_K_SIZE = 16;
35-
static const size_t BUSTUB_PAGE_CNT = 6400;
36-
static const size_t BUSTUB_BPM_SIZE = 64;
37-
3834
struct BpmTotalMetrics {
3935
uint64_t scan_cnt_{0};
4036
uint64_t get_cnt_{0};
@@ -100,6 +96,45 @@ struct BpmMetrics {
10096
}
10197
};
10298

99+
struct BustubBenchPageHeader {
100+
uint64_t seed_;
101+
uint64_t page_id_;
102+
char data_[0];
103+
};
104+
105+
/// Modify the page and save some data inside
106+
auto ModifyPage(char *data, size_t page_idx, uint64_t seed) -> void {
107+
auto *pg = reinterpret_cast<BustubBenchPageHeader *>(data);
108+
pg->seed_ = seed;
109+
pg->page_id_ = page_idx;
110+
pg->data_[pg->seed_ % 4000] = pg->seed_ % 256;
111+
}
112+
113+
/// Check the page and verify the data inside
114+
auto CheckPageConsistentNoSeed(const char *data, size_t page_idx) -> void {
115+
const auto *pg = reinterpret_cast<const BustubBenchPageHeader *>(data);
116+
if (pg->page_id_ != page_idx) {
117+
fmt::println(stderr, "page header not consistent: page_id_={} page_idx={}", pg->page_id_, page_idx);
118+
std::terminate();
119+
}
120+
auto left = static_cast<unsigned int>(static_cast<unsigned char>(pg->data_[pg->seed_ % 4000]));
121+
auto right = static_cast<unsigned int>(pg->seed_ % 256);
122+
if (left != right) {
123+
fmt::println(stderr, "page content not consistent: data_[{}]={} seed_ % 256={}", pg->seed_ % 4000, left, right);
124+
std::terminate();
125+
}
126+
}
127+
128+
/// Check the page and verify the data inside
129+
auto CheckPageConsistent(const char *data, size_t page_idx, uint64_t seed) -> void {
130+
const auto *pg = reinterpret_cast<const BustubBenchPageHeader *>(data);
131+
if (pg->seed_ != seed) {
132+
fmt::println(stderr, "page seed not consistent: seed_={} seed={}", pg->seed_, seed);
133+
std::terminate();
134+
}
135+
CheckPageConsistentNoSeed(data, page_idx);
136+
}
137+
103138
// NOLINTNEXTLINE
104139
auto main(int argc, char **argv) -> int {
105140
using bustub::AccessType;
@@ -109,7 +144,12 @@ auto main(int argc, char **argv) -> int {
109144

110145
argparse::ArgumentParser program("bustub-bpm-bench");
111146
program.add_argument("--duration").help("run bpm bench for n milliseconds");
112-
program.add_argument("--latency").help("set disk latency to n milliseconds");
147+
program.add_argument("--latency").help("enable disk latency");
148+
program.add_argument("--scan-thread-n").help("number of scan threads");
149+
program.add_argument("--get-thread-n").help("number of lookup threads");
150+
program.add_argument("--bpm-size").help("buffer pool size");
151+
program.add_argument("--db-size").help("number of pages");
152+
program.add_argument("--lru-k-size").help("lru-k size");
113153

114154
try {
115155
program.parse_args(argc, argv);
@@ -124,102 +164,133 @@ auto main(int argc, char **argv) -> int {
124164
duration_ms = std::stoi(program.get("--duration"));
125165
}
126166

127-
uint64_t latency_ms = 0;
167+
uint64_t enable_latency = 0;
128168
if (program.present("--latency")) {
129-
latency_ms = std::stoi(program.get("--latency"));
169+
enable_latency = std::stoi(program.get("--latency"));
170+
}
171+
172+
uint64_t scan_thread_n = 8;
173+
if (program.present("--scan-thread-n")) {
174+
scan_thread_n = std::stoi(program.get("--scan-thread-n"));
175+
}
176+
177+
uint64_t get_thread_n = 8;
178+
if (program.present("--get-thread-n")) {
179+
get_thread_n = std::stoi(program.get("--get-thread-n"));
180+
}
181+
182+
uint64_t bustub_page_cnt = 6400;
183+
if (program.present("--db-size")) {
184+
bustub_page_cnt = std::stoi(program.get("--db-size"));
185+
}
186+
187+
uint64_t bustub_bpm_size = 64;
188+
if (program.present("--bpm-size")) {
189+
bustub_bpm_size = std::stoi(program.get("--bpm-size"));
190+
}
191+
192+
uint64_t lru_k_size = 16;
193+
if (program.present("--lru-k-size")) {
194+
bustub_page_cnt = std::stoi(program.get("--lru-k-size"));
130195
}
131196

132197
auto disk_manager = std::make_unique<DiskManagerUnlimitedMemory>();
133-
auto bpm = std::make_unique<BufferPoolManager>(BUSTUB_BPM_SIZE, disk_manager.get(), LRU_K_SIZE);
198+
auto bpm = std::make_unique<BufferPoolManager>(bustub_bpm_size, disk_manager.get(), lru_k_size);
134199
std::vector<page_id_t> page_ids;
135200

136-
fmt::print(stderr, "[info] total_page={}, duration_ms={}, latency_ms={}, lru_k_size={}, bpm_size={}\n",
137-
BUSTUB_PAGE_CNT, duration_ms, latency_ms, LRU_K_SIZE, BUSTUB_BPM_SIZE);
201+
fmt::print(stderr,
202+
"[info] total_page={}, duration_ms={}, latency={}, lru_k_size={}, bpm_size={}, scan_thread_cnt={}, "
203+
"get_thread_cnt={}\n",
204+
bustub_page_cnt, duration_ms, enable_latency, lru_k_size, bustub_bpm_size, scan_thread_n, get_thread_n);
138205

139-
for (size_t i = 0; i < BUSTUB_PAGE_CNT; i++) {
206+
for (size_t i = 0; i < bustub_page_cnt; i++) {
140207
page_id_t page_id;
141208
auto *page = bpm->NewPage(&page_id);
142209
if (page == nullptr) {
143210
throw std::runtime_error("new page failed");
144211
}
145-
char &ch = page->GetData()[i % 1024];
146-
ch = 1;
212+
213+
ModifyPage(page->GetData(), i, 0);
147214

148215
bpm->UnpinPage(page_id, true);
149216
page_ids.push_back(page_id);
150217
}
151218

152219
// enable disk latency after creating all pages
153-
disk_manager->SetLatency(latency_ms);
220+
disk_manager->EnableLatencySimulator(enable_latency != 0);
154221

155222
fmt::print(stderr, "[info] benchmark start\n");
156223

157224
BpmTotalMetrics total_metrics;
158225
total_metrics.Begin();
159226

160227
std::vector<std::thread> threads;
228+
using ModifyRecord = std::unordered_map<page_id_t, uint64_t>;
229+
230+
for (size_t thread_id = 0; thread_id < scan_thread_n; thread_id++) {
231+
threads.emplace_back([bustub_page_cnt, scan_thread_n, thread_id, &page_ids, &bpm, duration_ms, &total_metrics] {
232+
ModifyRecord records;
161233

162-
for (size_t thread_id = 0; thread_id < BUSTUB_SCAN_THREAD; thread_id++) {
163-
threads.emplace_back(std::thread([thread_id, &page_ids, &bpm, duration_ms, &total_metrics] {
164234
BpmMetrics metrics(fmt::format("scan {:>2}", thread_id), duration_ms);
165235
metrics.Begin();
166236

167-
size_t page_idx = BUSTUB_PAGE_CNT * thread_id / BUSTUB_SCAN_THREAD;
237+
size_t page_idx_start = bustub_page_cnt * thread_id / scan_thread_n;
238+
size_t page_idx_end = bustub_page_cnt * (thread_id + 1) / scan_thread_n;
239+
size_t page_idx = page_idx_start;
168240

169241
while (!metrics.ShouldFinish()) {
170242
auto *page = bpm->FetchPage(page_ids[page_idx], AccessType::Scan);
171243
if (page == nullptr) {
172244
continue;
173245
}
174246

175-
char &ch = page->GetData()[page_idx % 1024];
176247
page->WLatch();
177-
ch += 1;
178-
if (ch == 0) {
179-
ch = 1;
180-
}
248+
auto &seed = records[page_idx];
249+
CheckPageConsistent(page->GetData(), page_idx, seed);
250+
seed = seed + 1;
251+
ModifyPage(page->GetData(), page_idx, seed);
181252
page->WUnlatch();
182253

183254
bpm->UnpinPage(page->GetPageId(), true, AccessType::Scan);
184-
page_idx = (page_idx + 1) % BUSTUB_PAGE_CNT;
255+
page_idx += 1;
256+
if (page_idx >= page_idx_end) {
257+
page_idx = page_idx_start;
258+
}
185259
metrics.Tick();
186260
metrics.Report();
187261
}
188262

189263
total_metrics.ReportScan(metrics.cnt_);
190-
}));
264+
});
191265
}
192266

193-
for (size_t thread_id = 0; thread_id < BUSTUB_GET_THREAD; thread_id++) {
194-
threads.emplace_back(std::thread([thread_id, &page_ids, &bpm, duration_ms, &total_metrics] {
267+
for (size_t thread_id = 0; thread_id < get_thread_n; thread_id++) {
268+
threads.emplace_back([thread_id, &page_ids, &bpm, bustub_page_cnt, duration_ms, &total_metrics] {
195269
std::random_device r;
196270
std::default_random_engine gen(r());
197-
zipfian_int_distribution<size_t> dist(0, BUSTUB_PAGE_CNT - 1, 0.8);
271+
zipfian_int_distribution<size_t> dist(0, bustub_page_cnt - 1, 0.8);
198272

199273
BpmMetrics metrics(fmt::format("get {:>2}", thread_id), duration_ms);
200274
metrics.Begin();
201275

202276
while (!metrics.ShouldFinish()) {
203277
auto page_idx = dist(gen);
204-
auto *page = bpm->FetchPage(page_ids[page_idx], AccessType::Get);
278+
auto *page = bpm->FetchPage(page_ids[page_idx], AccessType::Lookup);
205279
if (page == nullptr) {
206280
continue;
207281
}
208282

209283
page->RLatch();
210-
char ch = page->GetData()[page_idx % 1024];
284+
CheckPageConsistentNoSeed(page->GetData(), page_idx);
211285
page->RUnlatch();
212-
if (ch == 0) {
213-
throw std::runtime_error("invalid data");
214-
}
215286

216-
bpm->UnpinPage(page->GetPageId(), false, AccessType::Get);
287+
bpm->UnpinPage(page->GetPageId(), false, AccessType::Lookup);
217288
metrics.Tick();
218289
metrics.Report();
219290
}
220291

221292
total_metrics.ReportGet(metrics.cnt_);
222-
}));
293+
});
223294
}
224295

225296
for (auto &thread : threads) {

0 commit comments

Comments
 (0)