|
6 | 6 | //
|
7 | 7 | // Identification: src/include/buffer/buffer_pool_manager.h
|
8 | 8 | //
|
9 |
| -// Copyright (c) 2015-2021, Carnegie Mellon University Database Group |
| 9 | +// Copyright (c) 2015-2024, Carnegie Mellon University Database Group |
10 | 10 | //
|
11 | 11 | //===----------------------------------------------------------------------===//
|
12 | 12 |
|
13 | 13 | #pragma once
|
14 | 14 |
|
15 | 15 | #include <list>
|
16 | 16 | #include <memory>
|
17 |
| -#include <mutex> // NOLINT |
| 17 | +#include <shared_mutex> |
18 | 18 | #include <unordered_map>
|
| 19 | +#include <vector> |
19 | 20 |
|
20 | 21 | #include "buffer/lru_k_replacer.h"
|
21 | 22 | #include "common/config.h"
|
22 | 23 | #include "recovery/log_manager.h"
|
23 | 24 | #include "storage/disk/disk_scheduler.h"
|
24 |
| -#include "storage/disk/write_back_cache.h" |
25 | 25 | #include "storage/page/page.h"
|
26 | 26 | #include "storage/page/page_guard.h"
|
27 | 27 |
|
28 | 28 | namespace bustub {
|
29 | 29 |
|
| 30 | +class BufferPoolManager; |
| 31 | +class ReadPageGuard; |
| 32 | +class WritePageGuard; |
| 33 | + |
30 | 34 | /**
|
31 |
| - * BufferPoolManager reads disk pages to and from its internal buffer pool. |
| 35 | + * @brief A helper class for `BufferPoolManager` that manages a frame of memory and related metadata. |
| 36 | + * |
| 37 | + * This class represents headers for frames of memory that the `BufferPoolManager` stores pages of data into. Note that |
| 38 | + * the actual frames of memory are not stored directly inside a `FrameHeader`, rather the `FrameHeader`s store pointer |
| 39 | + * to the frames and are stored separately them. |
| 40 | + * |
| 41 | + * --- |
| 42 | + * |
| 43 | + * Something that may (or may not) be of interest to you is why the field `data_` is stored as a vector that is |
| 44 | + * allocated on the fly instead of as a direct pointer to some pre-allocated chunk of memory. |
| 45 | + * |
| 46 | + * In a traditional production buffer pool manager, all memory that the buffer pool is intended to manage is allocated |
| 47 | + * in one large contiguous array (think of a very large `malloc` call that allocates several gigabytes of memory up |
| 48 | + * front). This large contiguous block of memory is then divided into contiguous frames. In other words, frames are |
| 49 | + * defined by an offset from the base of the array in page-sized (4 KB) intervals. |
| 50 | + * |
| 51 | + * In BusTub, we instead allocate each frame on its own (via a `std::vector<char>`) in order to easily detect buffer |
| 52 | + * overflow with address sanitizer. Since C++ has no notion of memory safety, it would be very easy to cast a page's |
| 53 | + * data pointer into some large data type and start overwriting other pages of data if they were all contiguous. |
| 54 | + * |
| 55 | + * If you would like to attempt to use more efficient data structures for your buffer pool manager, you are free to do |
| 56 | + * so. However, you will likely benefit significantly from detecting buffer overflow in future projects (especially |
| 57 | + * project 2). |
32 | 58 | */
|
33 |
| -class BufferPoolManager { |
| 59 | +class FrameHeader { |
| 60 | + friend class BufferPoolManager; |
| 61 | + friend class ReadPageGuard; |
| 62 | + friend class WritePageGuard; |
| 63 | + |
34 | 64 | public:
|
35 |
| - /** |
36 |
| - * @brief Creates a new BufferPoolManager. |
37 |
| - * @param pool_size the size of the buffer pool |
38 |
| - * @param disk_manager the disk manager |
39 |
| - * @param replacer_k the LookBack constant k for the LRU-K replacer |
40 |
| - * @param log_manager the log manager (for testing only: nullptr = disable logging). Please ignore this for P1. |
41 |
| - */ |
42 |
| - BufferPoolManager(size_t pool_size, DiskManager *disk_manager, size_t replacer_k = LRUK_REPLACER_K, |
43 |
| - LogManager *log_manager = nullptr); |
| 65 | + explicit FrameHeader(frame_id_t frame_id); |
44 | 66 |
|
45 |
| - /** |
46 |
| - * @brief Destroy an existing BufferPoolManager. |
47 |
| - */ |
48 |
| - ~BufferPoolManager(); |
| 67 | + private: |
| 68 | + auto GetData() const -> const char *; |
| 69 | + auto GetDataMut() -> char *; |
| 70 | + void Reset(); |
49 | 71 |
|
50 |
| - /** @brief Return the size (number of frames) of the buffer pool. */ |
51 |
| - auto GetPoolSize() -> size_t { return pool_size_; } |
| 72 | + /** @brief The frame ID / index of the frame this header represents. */ |
| 73 | + const frame_id_t frame_id_; |
52 | 74 |
|
53 |
| - /** @brief Return the pointer to all the pages in the buffer pool. */ |
54 |
| - auto GetPages() -> Page * { return pages_; } |
| 75 | + /** @brief The readers / writer latch for this frame. */ |
| 76 | + std::shared_mutex rwlatch_; |
55 | 77 |
|
56 |
| - /** |
57 |
| - * TODO(P1): Add implementation |
58 |
| - * |
59 |
| - * @brief Create a new page in the buffer pool. Set page_id to the new page's id, or nullptr if all frames |
60 |
| - * are currently in use and not evictable (in another word, pinned). |
61 |
| - * |
62 |
| - * You should pick the replacement frame from either the free list or the replacer (always find from the free list |
63 |
| - * first), and then call the AllocatePage() method to get a new page id. If the replacement frame has a dirty page, |
64 |
| - * you should write it back to the disk first. You also need to reset the memory and metadata for the new page. |
65 |
| - * |
66 |
| - * Remember to "Pin" the frame by calling replacer.SetEvictable(frame_id, false) |
67 |
| - * so that the replacer wouldn't evict the frame before the buffer pool manager "Unpin"s it. |
68 |
| - * Also, remember to record the access history of the frame in the replacer for the lru-k algorithm to work. |
69 |
| - * |
70 |
| - * @param[out] page_id id of created page |
71 |
| - * @return nullptr if no new pages could be created, otherwise pointer to new page |
72 |
| - */ |
73 |
| - auto NewPage(page_id_t *page_id) -> Page *; |
| 78 | + /** @brief The number of pins on this frame keeping the page in memory. */ |
| 79 | + std::atomic<size_t> pin_count_; |
74 | 80 |
|
75 |
| - /** |
76 |
| - * TODO(P1): Add implementation |
77 |
| - * |
78 |
| - * @brief PageGuard wrapper for NewPage |
79 |
| - * |
80 |
| - * Functionality should be the same as NewPage, except that |
81 |
| - * instead of returning a pointer to a page, you return a |
82 |
| - * BasicPageGuard structure. |
83 |
| - * |
84 |
| - * @param[out] page_id, the id of the new page |
85 |
| - * @return BasicPageGuard holding a new page |
86 |
| - */ |
87 |
| - auto NewPageGuarded(page_id_t *page_id) -> BasicPageGuard; |
| 81 | + /** @brief The dirty flag. */ |
| 82 | + bool is_dirty_; |
88 | 83 |
|
89 | 84 | /**
|
90 |
| - * TODO(P1): Add implementation |
| 85 | + * @brief A pointer to the data of the page that this frame holds. |
91 | 86 | *
|
92 |
| - * @brief Fetch the requested page from the buffer pool. Return nullptr if page_id needs to be fetched from the disk |
93 |
| - * but all frames are currently in use and not evictable (in another word, pinned). |
94 |
| - * |
95 |
| - * First search for page_id in the buffer pool. If not found, pick a replacement frame from either the free list or |
96 |
| - * the replacer (always find from the free list first), read the page from disk by scheduling a read DiskRequest with |
97 |
| - * disk_scheduler_->Schedule(), and replace the old page in the frame. Similar to NewPage(), if the old page is dirty, |
98 |
| - * you need to write it back to disk and update the metadata of the new page |
99 |
| - * |
100 |
| - * In addition, remember to disable eviction and record the access history of the frame like you did for NewPage(). |
101 |
| - * |
102 |
| - * @param page_id id of page to be fetched |
103 |
| - * @param access_type type of access to the page, only needed for leaderboard tests. |
104 |
| - * @return nullptr if page_id cannot be fetched, otherwise pointer to the requested page |
| 87 | + * If the frame does not hold any page data, the frame contains all null bytes. |
105 | 88 | */
|
106 |
| - auto FetchPage(page_id_t page_id, AccessType access_type = AccessType::Unknown) -> Page *; |
| 89 | + std::vector<char> data_; |
107 | 90 |
|
108 | 91 | /**
|
109 |
| - * TODO(P1): Add implementation |
110 |
| - * |
111 |
| - * @brief PageGuard wrappers for FetchPage |
| 92 | + * TODO(P1): You may add any fields or helper functions under here that you think are necessary. |
112 | 93 | *
|
113 |
| - * Functionality should be the same as FetchPage, except |
114 |
| - * that, depending on the function called, a guard is returned. |
115 |
| - * If FetchPageRead or FetchPageWrite is called, it is expected that |
116 |
| - * the returned page already has a read or write latch held, respectively. |
117 |
| - * |
118 |
| - * @param page_id, the id of the page to fetch |
119 |
| - * @return PageGuard holding the fetched page |
| 94 | + * One potential optimization you could make is storing an optional page ID of the page that the `FrameHeader` is |
| 95 | + * currently storing. This might allow you to skip searching for the corresponding (page ID, frame ID) pair somewhere |
| 96 | + * else in the buffer pool manager... |
120 | 97 | */
|
121 |
| - auto FetchPageBasic(page_id_t page_id) -> BasicPageGuard; |
122 |
| - auto FetchPageRead(page_id_t page_id) -> ReadPageGuard; |
123 |
| - auto FetchPageWrite(page_id_t page_id) -> WritePageGuard; |
| 98 | +}; |
124 | 99 |
|
125 |
| - /** |
126 |
| - * TODO(P1): Add implementation |
127 |
| - * |
128 |
| - * @brief Unpin the target page from the buffer pool. If page_id is not in the buffer pool or its pin count is already |
129 |
| - * 0, return false. |
130 |
| - * |
131 |
| - * Decrement the pin count of a page. If the pin count reaches 0, the frame should be evictable by the replacer. |
132 |
| - * Also, set the dirty flag on the page to indicate if the page was modified. |
133 |
| - * |
134 |
| - * @param page_id id of page to be unpinned |
135 |
| - * @param is_dirty true if the page should be marked as dirty, false otherwise |
136 |
| - * @param access_type type of access to the page, only needed for leaderboard tests. |
137 |
| - * @return false if the page is not in the page table or its pin count is <= 0 before this call, true otherwise |
138 |
| - */ |
139 |
| - auto UnpinPage(page_id_t page_id, bool is_dirty, AccessType access_type = AccessType::Unknown) -> bool; |
| 100 | +/** |
| 101 | + * @brief The declaration of the `BufferPoolManager` class. |
| 102 | + * |
| 103 | + * As stated in the writeup, the buffer pool is responsible for moving physical pages of data back and forth from |
| 104 | + * buffers in main memory to persistent storage. It also behaves as a cache, keeping frequently used pages in memory for |
| 105 | + * faster access, and evicting unused or cold pages back out to storage. |
| 106 | + * |
| 107 | + * Make sure you read the writeup in its entirety before attempting to implement the buffer pool manager. You also need |
| 108 | + * to have completed the implementation of both the `LRUKReplacer` and `DiskManager` classes. |
| 109 | + */ |
| 110 | +class BufferPoolManager { |
| 111 | + public: |
| 112 | + BufferPoolManager(size_t num_frames, DiskManager *disk_manager, size_t k_dist = LRUK_REPLACER_K, |
| 113 | + LogManager *log_manager = nullptr); |
| 114 | + ~BufferPoolManager(); |
140 | 115 |
|
141 |
| - /** |
142 |
| - * TODO(P1): Add implementation |
143 |
| - * |
144 |
| - * @brief Flush the target page to disk. |
145 |
| - * |
146 |
| - * Use the DiskManager::WritePage() method to flush a page to disk, REGARDLESS of the dirty flag. |
147 |
| - * Unset the dirty flag of the page after flushing. |
148 |
| - * |
149 |
| - * @param page_id id of page to be flushed, cannot be INVALID_PAGE_ID |
150 |
| - * @return false if the page could not be found in the page table, true otherwise |
151 |
| - */ |
| 116 | + auto Size() const -> size_t; |
| 117 | + auto NewPage() -> page_id_t; |
| 118 | + auto DeletePage(page_id_t page_id) -> bool; |
| 119 | + auto CheckedWritePage(page_id_t page_id, AccessType access_type = AccessType::Unknown) |
| 120 | + -> std::optional<WritePageGuard>; |
| 121 | + auto CheckedReadPage(page_id_t page_id, AccessType access_type = AccessType::Unknown) -> std::optional<ReadPageGuard>; |
| 122 | + auto WritePage(page_id_t page_id, AccessType access_type = AccessType::Unknown) -> WritePageGuard; |
| 123 | + auto ReadPage(page_id_t page_id, AccessType access_type = AccessType::Unknown) -> ReadPageGuard; |
152 | 124 | auto FlushPage(page_id_t page_id) -> bool;
|
153 |
| - |
154 |
| - /** |
155 |
| - * TODO(P1): Add implementation |
156 |
| - * |
157 |
| - * @brief Flush all the pages in the buffer pool to disk. |
158 |
| - */ |
159 | 125 | void FlushAllPages();
|
| 126 | + auto GetPinCount(page_id_t page_id) -> std::optional<size_t>; |
| 127 | + |
| 128 | + private: |
| 129 | + /** @brief The number of frames in the buffer pool. */ |
| 130 | + const size_t num_frames_; |
| 131 | + |
| 132 | + /** @brief The next page ID to be allocated. */ |
| 133 | + std::atomic<page_id_t> next_page_id_; |
160 | 134 |
|
161 | 135 | /**
|
162 |
| - * TODO(P1): Add implementation |
163 |
| - * |
164 |
| - * @brief Delete a page from the buffer pool. If page_id is not in the buffer pool, do nothing and return true. If the |
165 |
| - * page is pinned and cannot be deleted, return false immediately. |
166 |
| - * |
167 |
| - * After deleting the page from the page table, stop tracking the frame in the replacer and add the frame |
168 |
| - * back to the free list. Also, reset the page's memory and metadata. Finally, you should call DeallocatePage() to |
169 |
| - * imitate freeing the page on the disk. |
| 136 | + * @brief The latch protecting the buffer pool's inner data structures. |
170 | 137 | *
|
171 |
| - * @param page_id id of page to be deleted |
172 |
| - * @return false if the page exists but could not be deleted, true if the page didn't exist or deletion succeeded |
| 138 | + * TODO(P1) We recommend replacing this comment with details about what this latch actually protects. |
173 | 139 | */
|
174 |
| - auto DeletePage(page_id_t page_id) -> bool; |
| 140 | + std::shared_ptr<std::mutex> bpm_latch_; |
175 | 141 |
|
176 |
| - private: |
177 |
| - /** Number of pages in the buffer pool. */ |
178 |
| - const size_t pool_size_; |
179 |
| - /** The next page id to be allocated */ |
180 |
| - std::atomic<page_id_t> next_page_id_ = 0; |
181 |
| - |
182 |
| - /** Array of buffer pool pages. */ |
183 |
| - Page *pages_; |
184 |
| - /** Pointer to the disk scheduler. */ |
185 |
| - std::unique_ptr<DiskScheduler> disk_scheduler_ __attribute__((__unused__)); |
186 |
| - /** Pointer to the log manager. Please ignore this for P1. */ |
187 |
| - LogManager *log_manager_ __attribute__((__unused__)); |
188 |
| - /** Page table for keeping track of buffer pool pages. */ |
| 142 | + /** @brief The frame headers of the frames that this buffer pool manages. */ |
| 143 | + std::vector<std::shared_ptr<FrameHeader>> frames_; |
| 144 | + |
| 145 | + /** @brief The page table that keeps track of the mapping between pages and buffer pool frames. */ |
189 | 146 | std::unordered_map<page_id_t, frame_id_t> page_table_;
|
190 |
| - /** Replacer to find unpinned pages for replacement. */ |
191 |
| - std::unique_ptr<LRUKReplacer> replacer_; |
192 |
| - /** List of free frames that don't have any pages on them. */ |
193 |
| - std::list<frame_id_t> free_list_; |
194 |
| - /** This latch protects shared data structures. We recommend updating this comment to describe what it protects. */ |
195 |
| - std::mutex latch_; |
196 |
| - /** This buffer is for the leaderboard task. You may want to use it to optimize the write requests. */ |
197 |
| - WriteBackCache write_back_cache_ __attribute__((__unused__)); |
| 147 | + |
| 148 | + /** @brief A list of free frames that do not hold any page's data. */ |
| 149 | + std::list<frame_id_t> free_frames_; |
| 150 | + |
| 151 | + /** @brief The replacer to find unpinned / candidate pages for eviction. */ |
| 152 | + std::shared_ptr<LRUKReplacer> replacer_; |
| 153 | + |
| 154 | + /** @brief A pointer to the disk scheduler. */ |
| 155 | + std::unique_ptr<DiskScheduler> disk_scheduler_; |
198 | 156 |
|
199 | 157 | /**
|
200 |
| - * @brief Allocate a page on disk. Caller should acquire the latch before calling this function. |
201 |
| - * @return the id of the allocated page |
| 158 | + * @brief A pointer to the log manager. |
| 159 | + * |
| 160 | + * Note: Please ignore this for P1. |
202 | 161 | */
|
203 |
| - auto AllocatePage() -> page_id_t; |
| 162 | + LogManager *log_manager_ __attribute__((__unused__)); |
204 | 163 |
|
205 | 164 | /**
|
206 |
| - * @brief Deallocate a page on disk. Caller should acquire the latch before calling this function. |
207 |
| - * @param page_id id of the page to deallocate |
| 165 | + * TODO(P1): You may add additional private members and helper functions if you find them necessary. |
| 166 | + * |
| 167 | + * There will likely be a lot of code duplication between the different modes of accessing a page. |
| 168 | + * |
| 169 | + * We would recommend implementing a helper function that returns the ID of a frame that is free and has nothing |
| 170 | + * stored inside of it. Additionally, you may also want to implement a helper function that returns a shared pointer |
| 171 | + * to a `FrameHeader` that already has a page's data stored inside of it. |
208 | 172 | */
|
209 |
| - void DeallocatePage(__attribute__((unused)) page_id_t page_id) { |
210 |
| - // This is a no-nop right now without a more complex data structure to track deallocated pages |
211 |
| - } |
212 |
| - |
213 |
| - // TODO(student): You may add additional private members and helper functions |
214 | 173 | };
|
215 | 174 | } // namespace bustub
|
0 commit comments