Skip to content

Commit a8081ff

Browse files
authored
Buffer Pool Manager (Project 1) Refactor (#734)
* first API changes, not compiling * first round of BPM changes for starter code * fix formatting * fix lint * update public tests with new API * remove newpageguarded * what in the world * make buffer pool manager return a `std::optional` * no default values and clean up some docs * remove unnecessary imports * sync public files with private public files * add disabled_ * add starter code for bpm and page guard * update b plus tree with new API * make not private * fix lint * fix copyright * remove bad example * change assert to ensure * update bpm bench * rip out old bpm API * remove unnecessary file * add single concurrent test to public tests * sync * update leaderboard bench
1 parent 0ba573c commit a8081ff

30 files changed

+1678
-1189
lines changed

src/buffer/buffer_pool_manager.cpp

Lines changed: 300 additions & 33 deletions
Large diffs are not rendered by default.

src/container/disk/hash/disk_extendible_hash_table_utils.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ template <typename K, typename V, typename KC>
4040
void DiskExtendibleHashTable<K, V, KC>::PrintHT() const {
4141
std::cout << "\n";
4242
std::cout << "==================== PRINT! ====================\n";
43-
BasicPageGuard header_guard = bpm_->FetchPageBasic(header_page_id_);
44-
auto *header = header_guard.As<ExtendibleHTableHeaderPage>();
43+
ReadPageGuard header_guard = bpm_->ReadPage(header_page_id_);
44+
const auto *header = header_guard.As<ExtendibleHTableHeaderPage>();
4545

4646
header->PrintHeader();
4747

@@ -51,16 +51,16 @@ void DiskExtendibleHashTable<K, V, KC>::PrintHT() const {
5151
std::cout << "Directory " << idx << ", page id: " << directory_page_id << "\n";
5252
continue;
5353
}
54-
BasicPageGuard directory_guard = bpm_->FetchPageBasic(directory_page_id);
55-
auto *directory = directory_guard.As<ExtendibleHTableDirectoryPage>();
54+
ReadPageGuard directory_guard = bpm_->ReadPage(directory_page_id);
55+
const auto *directory = directory_guard.As<ExtendibleHTableDirectoryPage>();
5656

5757
std::cout << "Directory " << idx << ", page id: " << directory_page_id << "\n";
5858
directory->PrintDirectory();
5959

6060
for (uint32_t idx2 = 0; idx2 < directory->Size(); idx2++) {
6161
page_id_t bucket_page_id = directory->GetBucketPageId(idx2);
62-
BasicPageGuard bucket_guard = bpm_->FetchPageBasic(bucket_page_id);
63-
auto *bucket = bucket_guard.As<ExtendibleHTableBucketPage<K, V, KC>>();
62+
ReadPageGuard bucket_guard = bpm_->ReadPage(bucket_page_id);
63+
const auto *bucket = bucket_guard.As<ExtendibleHTableBucketPage<K, V, KC>>();
6464

6565
std::cout << "Bucket " << idx2 << ", page id: " << bucket_page_id << "\n";
6666
bucket->PrintBucket();
@@ -77,15 +77,15 @@ void DiskExtendibleHashTable<K, V, KC>::PrintHT() const {
7777
template <typename K, typename V, typename KC>
7878
void DiskExtendibleHashTable<K, V, KC>::VerifyIntegrity() const {
7979
BUSTUB_ASSERT(header_page_id_ != INVALID_PAGE_ID, "header page id is invalid");
80-
BasicPageGuard header_guard = bpm_->FetchPageBasic(header_page_id_);
81-
auto *header = header_guard.As<ExtendibleHTableHeaderPage>();
80+
ReadPageGuard header_guard = bpm_->ReadPage(header_page_id_);
81+
const auto *header = header_guard.As<ExtendibleHTableHeaderPage>();
8282

8383
// for each of the directory pages, check their integrity using directory page VerifyIntegrity
8484
for (uint32_t idx = 0; idx < header->MaxSize(); idx++) {
8585
auto directory_page_id = header->GetDirectoryPageId(idx);
8686
if (static_cast<int>(directory_page_id) != INVALID_PAGE_ID) {
87-
BasicPageGuard directory_guard = bpm_->FetchPageBasic(directory_page_id);
88-
auto *directory = directory_guard.As<ExtendibleHTableDirectoryPage>();
87+
ReadPageGuard directory_guard = bpm_->ReadPage(directory_page_id);
88+
const auto *directory = directory_guard.As<ExtendibleHTableDirectoryPage>();
8989
directory->VerifyIntegrity();
9090
}
9191
}

src/include/buffer/buffer_pool_manager.h

Lines changed: 114 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -6,210 +6,169 @@
66
//
77
// Identification: src/include/buffer/buffer_pool_manager.h
88
//
9-
// Copyright (c) 2015-2021, Carnegie Mellon University Database Group
9+
// Copyright (c) 2015-2024, Carnegie Mellon University Database Group
1010
//
1111
//===----------------------------------------------------------------------===//
1212

1313
#pragma once
1414

1515
#include <list>
1616
#include <memory>
17-
#include <mutex> // NOLINT
17+
#include <shared_mutex>
1818
#include <unordered_map>
19+
#include <vector>
1920

2021
#include "buffer/lru_k_replacer.h"
2122
#include "common/config.h"
2223
#include "recovery/log_manager.h"
2324
#include "storage/disk/disk_scheduler.h"
24-
#include "storage/disk/write_back_cache.h"
2525
#include "storage/page/page.h"
2626
#include "storage/page/page_guard.h"
2727

2828
namespace bustub {
2929

30+
class BufferPoolManager;
31+
class ReadPageGuard;
32+
class WritePageGuard;
33+
3034
/**
31-
* BufferPoolManager reads disk pages to and from its internal buffer pool.
35+
* @brief A helper class for `BufferPoolManager` that manages a frame of memory and related metadata.
36+
*
37+
* This class represents headers for frames of memory that the `BufferPoolManager` stores pages of data into. Note that
38+
* the actual frames of memory are not stored directly inside a `FrameHeader`, rather the `FrameHeader`s store pointer
39+
* to the frames and are stored separately them.
40+
*
41+
* ---
42+
*
43+
* Something that may (or may not) be of interest to you is why the field `data_` is stored as a vector that is
44+
* allocated on the fly instead of as a direct pointer to some pre-allocated chunk of memory.
45+
*
46+
* In a traditional production buffer pool manager, all memory that the buffer pool is intended to manage is allocated
47+
* in one large contiguous array (think of a very large `malloc` call that allocates several gigabytes of memory up
48+
* front). This large contiguous block of memory is then divided into contiguous frames. In other words, frames are
49+
* defined by an offset from the base of the array in page-sized (4 KB) intervals.
50+
*
51+
* In BusTub, we instead allocate each frame on its own (via a `std::vector<char>`) in order to easily detect buffer
52+
* overflow with address sanitizer. Since C++ has no notion of memory safety, it would be very easy to cast a page's
53+
* data pointer into some large data type and start overwriting other pages of data if they were all contiguous.
54+
*
55+
* If you would like to attempt to use more efficient data structures for your buffer pool manager, you are free to do
56+
* so. However, you will likely benefit significantly from detecting buffer overflow in future projects (especially
57+
* project 2).
3258
*/
33-
class BufferPoolManager {
59+
class FrameHeader {
60+
friend class BufferPoolManager;
61+
friend class ReadPageGuard;
62+
friend class WritePageGuard;
63+
3464
public:
35-
/**
36-
* @brief Creates a new BufferPoolManager.
37-
* @param pool_size the size of the buffer pool
38-
* @param disk_manager the disk manager
39-
* @param replacer_k the LookBack constant k for the LRU-K replacer
40-
* @param log_manager the log manager (for testing only: nullptr = disable logging). Please ignore this for P1.
41-
*/
42-
BufferPoolManager(size_t pool_size, DiskManager *disk_manager, size_t replacer_k = LRUK_REPLACER_K,
43-
LogManager *log_manager = nullptr);
65+
explicit FrameHeader(frame_id_t frame_id);
4466

45-
/**
46-
* @brief Destroy an existing BufferPoolManager.
47-
*/
48-
~BufferPoolManager();
67+
private:
68+
auto GetData() const -> const char *;
69+
auto GetDataMut() -> char *;
70+
void Reset();
4971

50-
/** @brief Return the size (number of frames) of the buffer pool. */
51-
auto GetPoolSize() -> size_t { return pool_size_; }
72+
/** @brief The frame ID / index of the frame this header represents. */
73+
const frame_id_t frame_id_;
5274

53-
/** @brief Return the pointer to all the pages in the buffer pool. */
54-
auto GetPages() -> Page * { return pages_; }
75+
/** @brief The readers / writer latch for this frame. */
76+
std::shared_mutex rwlatch_;
5577

56-
/**
57-
* TODO(P1): Add implementation
58-
*
59-
* @brief Create a new page in the buffer pool. Set page_id to the new page's id, or nullptr if all frames
60-
* are currently in use and not evictable (in another word, pinned).
61-
*
62-
* You should pick the replacement frame from either the free list or the replacer (always find from the free list
63-
* first), and then call the AllocatePage() method to get a new page id. If the replacement frame has a dirty page,
64-
* you should write it back to the disk first. You also need to reset the memory and metadata for the new page.
65-
*
66-
* Remember to "Pin" the frame by calling replacer.SetEvictable(frame_id, false)
67-
* so that the replacer wouldn't evict the frame before the buffer pool manager "Unpin"s it.
68-
* Also, remember to record the access history of the frame in the replacer for the lru-k algorithm to work.
69-
*
70-
* @param[out] page_id id of created page
71-
* @return nullptr if no new pages could be created, otherwise pointer to new page
72-
*/
73-
auto NewPage(page_id_t *page_id) -> Page *;
78+
/** @brief The number of pins on this frame keeping the page in memory. */
79+
std::atomic<size_t> pin_count_;
7480

75-
/**
76-
* TODO(P1): Add implementation
77-
*
78-
* @brief PageGuard wrapper for NewPage
79-
*
80-
* Functionality should be the same as NewPage, except that
81-
* instead of returning a pointer to a page, you return a
82-
* BasicPageGuard structure.
83-
*
84-
* @param[out] page_id, the id of the new page
85-
* @return BasicPageGuard holding a new page
86-
*/
87-
auto NewPageGuarded(page_id_t *page_id) -> BasicPageGuard;
81+
/** @brief The dirty flag. */
82+
bool is_dirty_;
8883

8984
/**
90-
* TODO(P1): Add implementation
85+
* @brief A pointer to the data of the page that this frame holds.
9186
*
92-
* @brief Fetch the requested page from the buffer pool. Return nullptr if page_id needs to be fetched from the disk
93-
* but all frames are currently in use and not evictable (in another word, pinned).
94-
*
95-
* First search for page_id in the buffer pool. If not found, pick a replacement frame from either the free list or
96-
* the replacer (always find from the free list first), read the page from disk by scheduling a read DiskRequest with
97-
* disk_scheduler_->Schedule(), and replace the old page in the frame. Similar to NewPage(), if the old page is dirty,
98-
* you need to write it back to disk and update the metadata of the new page
99-
*
100-
* In addition, remember to disable eviction and record the access history of the frame like you did for NewPage().
101-
*
102-
* @param page_id id of page to be fetched
103-
* @param access_type type of access to the page, only needed for leaderboard tests.
104-
* @return nullptr if page_id cannot be fetched, otherwise pointer to the requested page
87+
* If the frame does not hold any page data, the frame contains all null bytes.
10588
*/
106-
auto FetchPage(page_id_t page_id, AccessType access_type = AccessType::Unknown) -> Page *;
89+
std::vector<char> data_;
10790

10891
/**
109-
* TODO(P1): Add implementation
110-
*
111-
* @brief PageGuard wrappers for FetchPage
92+
* TODO(P1): You may add any fields or helper functions under here that you think are necessary.
11293
*
113-
* Functionality should be the same as FetchPage, except
114-
* that, depending on the function called, a guard is returned.
115-
* If FetchPageRead or FetchPageWrite is called, it is expected that
116-
* the returned page already has a read or write latch held, respectively.
117-
*
118-
* @param page_id, the id of the page to fetch
119-
* @return PageGuard holding the fetched page
94+
* One potential optimization you could make is storing an optional page ID of the page that the `FrameHeader` is
95+
* currently storing. This might allow you to skip searching for the corresponding (page ID, frame ID) pair somewhere
96+
* else in the buffer pool manager...
12097
*/
121-
auto FetchPageBasic(page_id_t page_id) -> BasicPageGuard;
122-
auto FetchPageRead(page_id_t page_id) -> ReadPageGuard;
123-
auto FetchPageWrite(page_id_t page_id) -> WritePageGuard;
98+
};
12499

125-
/**
126-
* TODO(P1): Add implementation
127-
*
128-
* @brief Unpin the target page from the buffer pool. If page_id is not in the buffer pool or its pin count is already
129-
* 0, return false.
130-
*
131-
* Decrement the pin count of a page. If the pin count reaches 0, the frame should be evictable by the replacer.
132-
* Also, set the dirty flag on the page to indicate if the page was modified.
133-
*
134-
* @param page_id id of page to be unpinned
135-
* @param is_dirty true if the page should be marked as dirty, false otherwise
136-
* @param access_type type of access to the page, only needed for leaderboard tests.
137-
* @return false if the page is not in the page table or its pin count is <= 0 before this call, true otherwise
138-
*/
139-
auto UnpinPage(page_id_t page_id, bool is_dirty, AccessType access_type = AccessType::Unknown) -> bool;
100+
/**
101+
* @brief The declaration of the `BufferPoolManager` class.
102+
*
103+
* As stated in the writeup, the buffer pool is responsible for moving physical pages of data back and forth from
104+
* buffers in main memory to persistent storage. It also behaves as a cache, keeping frequently used pages in memory for
105+
* faster access, and evicting unused or cold pages back out to storage.
106+
*
107+
* Make sure you read the writeup in its entirety before attempting to implement the buffer pool manager. You also need
108+
* to have completed the implementation of both the `LRUKReplacer` and `DiskManager` classes.
109+
*/
110+
class BufferPoolManager {
111+
public:
112+
BufferPoolManager(size_t num_frames, DiskManager *disk_manager, size_t k_dist = LRUK_REPLACER_K,
113+
LogManager *log_manager = nullptr);
114+
~BufferPoolManager();
140115

141-
/**
142-
* TODO(P1): Add implementation
143-
*
144-
* @brief Flush the target page to disk.
145-
*
146-
* Use the DiskManager::WritePage() method to flush a page to disk, REGARDLESS of the dirty flag.
147-
* Unset the dirty flag of the page after flushing.
148-
*
149-
* @param page_id id of page to be flushed, cannot be INVALID_PAGE_ID
150-
* @return false if the page could not be found in the page table, true otherwise
151-
*/
116+
auto Size() const -> size_t;
117+
auto NewPage() -> page_id_t;
118+
auto DeletePage(page_id_t page_id) -> bool;
119+
auto CheckedWritePage(page_id_t page_id, AccessType access_type = AccessType::Unknown)
120+
-> std::optional<WritePageGuard>;
121+
auto CheckedReadPage(page_id_t page_id, AccessType access_type = AccessType::Unknown) -> std::optional<ReadPageGuard>;
122+
auto WritePage(page_id_t page_id, AccessType access_type = AccessType::Unknown) -> WritePageGuard;
123+
auto ReadPage(page_id_t page_id, AccessType access_type = AccessType::Unknown) -> ReadPageGuard;
152124
auto FlushPage(page_id_t page_id) -> bool;
153-
154-
/**
155-
* TODO(P1): Add implementation
156-
*
157-
* @brief Flush all the pages in the buffer pool to disk.
158-
*/
159125
void FlushAllPages();
126+
auto GetPinCount(page_id_t page_id) -> std::optional<size_t>;
127+
128+
private:
129+
/** @brief The number of frames in the buffer pool. */
130+
const size_t num_frames_;
131+
132+
/** @brief The next page ID to be allocated. */
133+
std::atomic<page_id_t> next_page_id_;
160134

161135
/**
162-
* TODO(P1): Add implementation
163-
*
164-
* @brief Delete a page from the buffer pool. If page_id is not in the buffer pool, do nothing and return true. If the
165-
* page is pinned and cannot be deleted, return false immediately.
166-
*
167-
* After deleting the page from the page table, stop tracking the frame in the replacer and add the frame
168-
* back to the free list. Also, reset the page's memory and metadata. Finally, you should call DeallocatePage() to
169-
* imitate freeing the page on the disk.
136+
* @brief The latch protecting the buffer pool's inner data structures.
170137
*
171-
* @param page_id id of page to be deleted
172-
* @return false if the page exists but could not be deleted, true if the page didn't exist or deletion succeeded
138+
* TODO(P1) We recommend replacing this comment with details about what this latch actually protects.
173139
*/
174-
auto DeletePage(page_id_t page_id) -> bool;
140+
std::shared_ptr<std::mutex> bpm_latch_;
175141

176-
private:
177-
/** Number of pages in the buffer pool. */
178-
const size_t pool_size_;
179-
/** The next page id to be allocated */
180-
std::atomic<page_id_t> next_page_id_ = 0;
181-
182-
/** Array of buffer pool pages. */
183-
Page *pages_;
184-
/** Pointer to the disk scheduler. */
185-
std::unique_ptr<DiskScheduler> disk_scheduler_ __attribute__((__unused__));
186-
/** Pointer to the log manager. Please ignore this for P1. */
187-
LogManager *log_manager_ __attribute__((__unused__));
188-
/** Page table for keeping track of buffer pool pages. */
142+
/** @brief The frame headers of the frames that this buffer pool manages. */
143+
std::vector<std::shared_ptr<FrameHeader>> frames_;
144+
145+
/** @brief The page table that keeps track of the mapping between pages and buffer pool frames. */
189146
std::unordered_map<page_id_t, frame_id_t> page_table_;
190-
/** Replacer to find unpinned pages for replacement. */
191-
std::unique_ptr<LRUKReplacer> replacer_;
192-
/** List of free frames that don't have any pages on them. */
193-
std::list<frame_id_t> free_list_;
194-
/** This latch protects shared data structures. We recommend updating this comment to describe what it protects. */
195-
std::mutex latch_;
196-
/** This buffer is for the leaderboard task. You may want to use it to optimize the write requests. */
197-
WriteBackCache write_back_cache_ __attribute__((__unused__));
147+
148+
/** @brief A list of free frames that do not hold any page's data. */
149+
std::list<frame_id_t> free_frames_;
150+
151+
/** @brief The replacer to find unpinned / candidate pages for eviction. */
152+
std::shared_ptr<LRUKReplacer> replacer_;
153+
154+
/** @brief A pointer to the disk scheduler. */
155+
std::unique_ptr<DiskScheduler> disk_scheduler_;
198156

199157
/**
200-
* @brief Allocate a page on disk. Caller should acquire the latch before calling this function.
201-
* @return the id of the allocated page
158+
* @brief A pointer to the log manager.
159+
*
160+
* Note: Please ignore this for P1.
202161
*/
203-
auto AllocatePage() -> page_id_t;
162+
LogManager *log_manager_ __attribute__((__unused__));
204163

205164
/**
206-
* @brief Deallocate a page on disk. Caller should acquire the latch before calling this function.
207-
* @param page_id id of the page to deallocate
165+
* TODO(P1): You may add additional private members and helper functions if you find them necessary.
166+
*
167+
* There will likely be a lot of code duplication between the different modes of accessing a page.
168+
*
169+
* We would recommend implementing a helper function that returns the ID of a frame that is free and has nothing
170+
* stored inside of it. Additionally, you may also want to implement a helper function that returns a shared pointer
171+
* to a `FrameHeader` that already has a page's data stored inside of it.
208172
*/
209-
void DeallocatePage(__attribute__((unused)) page_id_t page_id) {
210-
// This is a no-nop right now without a more complex data structure to track deallocated pages
211-
}
212-
213-
// TODO(student): You may add additional private members and helper functions
214173
};
215174
} // namespace bustub

src/include/common/config.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,11 @@ extern std::chrono::duration<int64_t> log_timeout;
3333
static constexpr int INVALID_PAGE_ID = -1; // invalid page id
3434
static constexpr int INVALID_TXN_ID = -1; // invalid transaction id
3535
static constexpr int INVALID_LSN = -1; // invalid log sequence number
36-
static constexpr int HEADER_PAGE_ID = 0; // the header page id
3736
static constexpr int BUSTUB_PAGE_SIZE = 4096; // size of a data page in byte
3837
static constexpr int BUFFER_POOL_SIZE = 10; // size of buffer pool
3938
static constexpr int LOG_BUFFER_SIZE = ((BUFFER_POOL_SIZE + 1) * BUSTUB_PAGE_SIZE); // size of a log buffer in byte
4039
static constexpr int BUCKET_SIZE = 50; // size of extendible hash bucket
41-
static constexpr int LRUK_REPLACER_K = 10; // lookback window for lru-k replacer
40+
static constexpr int LRUK_REPLACER_K = 10; // default lookback window for lru-k replacer
4241

4342
using frame_id_t = int32_t; // frame id type
4443
using page_id_t = int32_t; // page id type

0 commit comments

Comments
 (0)