Skip to content

Commit 0f31396

Browse files
authored
feat: define new extendible hash table pages specification (#611)
Signed-off-by: Yuchen Liang <[email protected]>
1 parent 7c6bd1b commit 0f31396

8 files changed

+401
-0
lines changed
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// BusTub
4+
//
5+
// extendible_htable_bucket_page.h
6+
//
7+
// Identification: src/include/storage/page/extendible_htable_bucket_page.h
8+
//
9+
// Copyright (c) 2015-2023, Carnegie Mellon University Database Group
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
/**
14+
* Bucket page format:
15+
* ----------------------------------------------------------------------------
16+
* | METADATA | KEY(1) + VALUE(1) | KEY(2) + VALUE(2) | ... | KEY(n) + VALUE(n)
17+
* ----------------------------------------------------------------------------
18+
*
19+
* Metadata format (size in byte, 8 bytes in total):
20+
* --------------------------------
21+
* | CurrentSize (4) | MaxSize (4)
22+
* --------------------------------
23+
*/
24+
#pragma once
25+
26+
#include <utility>
27+
#include <vector>
28+
29+
#include "common/config.h"
30+
#include "common/macros.h"
31+
#include "storage/index/int_comparator.h"
32+
#include "storage/page/b_plus_tree_page.h"
33+
#include "storage/page/hash_table_page_defs.h"
34+
35+
namespace bustub {
36+
37+
static constexpr uint64_t HTABLE_BUCKET_PAGE_METADATA_SIZE = sizeof(uint32_t) * 2;
38+
39+
constexpr auto HTableBucketArraySize(uint64_t mapping_type_size) -> uint64_t {
40+
return (BUSTUB_PAGE_SIZE - HTABLE_BUCKET_PAGE_METADATA_SIZE) / mapping_type_size;
41+
};
42+
43+
/**
44+
* Bucket page for extendible hash table.
45+
*/
46+
template <typename KeyType, typename ValueType, typename KeyComparator>
47+
class ExtendibleHTableBucketPage {
48+
public:
49+
// Delete all constructor / destructor to ensure memory safety
50+
ExtendibleHTableBucketPage() = delete;
51+
DISALLOW_COPY_AND_MOVE(ExtendibleHTableBucketPage);
52+
53+
/**
54+
* After creating a new bucket page from buffer pool, must call initialize
55+
* method to set default values
56+
* @param max_size Max size of the bucket array
57+
*/
58+
void Init(int max_size = HTableBucketArraySize(sizeof(MappingType)));
59+
60+
auto Lookup(const KeyType &key, ValueType &value, const KeyComparator &cmp) const -> bool;
61+
62+
/**
63+
* Attempts to insert a key and value in the bucket.
64+
*
65+
* @param key key to insert
66+
* @param value value to insert
67+
* @return true if inserted, false if duplicate KV pair or bucket is full
68+
*/
69+
auto Insert(KeyType key, ValueType value, const KeyComparator &cmp) -> bool;
70+
71+
/**
72+
* Removes a key and value.
73+
*
74+
* @return true if removed, false if not found
75+
*/
76+
auto Remove(KeyType key, ValueType value, const KeyComparator &cmp) -> bool;
77+
78+
/**
79+
* Gets the key at an index in the bucket.
80+
*
81+
* @param bucket_idx the index in the bucket to get the key at
82+
* @return key at index bucket_idx of the bucket
83+
*/
84+
auto KeyAt(uint32_t bucket_idx) const -> KeyType;
85+
86+
/**
87+
* Gets the value at an index in the bucket.
88+
*
89+
* @param bucket_idx the index in the bucket to get the value at
90+
* @return value at index bucket_idx of the bucket
91+
*/
92+
auto ValueAt(uint32_t bucket_idx) const -> ValueType;
93+
94+
/**
95+
* Remove the KV pair at bucket_idx
96+
*/
97+
void RemoveAt(uint32_t bucket_idx);
98+
99+
/**
100+
* @return whether the bucket is full
101+
*/
102+
auto IsFull() -> bool;
103+
104+
/**
105+
* @return whether the bucket is empty
106+
*/
107+
auto IsEmpty() -> bool;
108+
109+
/**
110+
* Prints the bucket's occupancy information
111+
*/
112+
void PrintBucket();
113+
114+
private:
115+
uint32_t size_;
116+
uint32_t max_size_;
117+
MappingType array_[HTableBucketArraySize(sizeof(MappingType))];
118+
};
119+
120+
} // namespace bustub
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// BusTub
4+
//
5+
// extendible_htable_directory_page.h
6+
//
7+
// Identification: src/include/storage/page/extendible_htable_directory_page.h
8+
//
9+
// Copyright (c) 2015-2023, Carnegie Mellon University Database Group
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
/**
14+
* Directory page format:
15+
* --------------------------------------------------------------------------------------
16+
* | MaxSize (4) | GlobalDepth (4) | LocalDepths (512) | BucketPageIds(2048) | Free(1528)
17+
* --------------------------------------------------------------------------------------
18+
*/
19+
20+
#pragma once
21+
22+
#include <cassert>
23+
#include <climits>
24+
#include <cstdlib>
25+
#include <string>
26+
27+
#include "common/config.h"
28+
#include "storage/index/generic_key.h"
29+
#include "storage/page/hash_table_page_defs.h"
30+
31+
namespace bustub {
32+
33+
static constexpr uint64_t HTABLE_DIRECTORY_PAGE_METADATA_SIZE = sizeof(uint32_t) * 2;
34+
35+
/**
36+
* HTABLE_DIRECTORY_ARRAY_SIZE is the number of page_ids that can fit in the directory page of an extendible hash index.
37+
* This is 512 because the directory array must grow in powers of 2, and 1024 page_ids leaves zero room for
38+
* storage of the other member variables: page_id_, lsn_, global_depth_, and the array local_depths_.
39+
* Extending the directory implementation to span multiple pages would be a meaningful improvement to the
40+
* implementation.
41+
*/
42+
static constexpr uint64_t HTABLE_DIRECTORY_ARRAY_SIZE = 512;
43+
44+
/**
45+
* Directory Page for extendible hash table.
46+
*/
47+
class ExtendibleHTableDirectoryPage {
48+
public:
49+
// Delete all constructor / destructor to ensure memory safety
50+
ExtendibleHTableDirectoryPage() = delete;
51+
DISALLOW_COPY_AND_MOVE(ExtendibleHTableDirectoryPage);
52+
53+
/**
54+
* After creating a new directory page from buffer pool, must call initialize
55+
* method to set default values
56+
* @param max_size Max size of the array in the directory page
57+
*/
58+
void Init(int max_size = HTABLE_DIRECTORY_ARRAY_SIZE);
59+
60+
/**
61+
* Get the bucket page id that the key is hashed to
62+
*
63+
* @param hash the hash of the key
64+
* @return bucket page_id current key is hashed to
65+
*/
66+
auto HashToBucketPageId(uint32_t hash) -> page_id_t;
67+
68+
/**
69+
* Lookup a bucket page using a directory index
70+
*
71+
* @param bucket_idx the index in the directory to lookup
72+
* @return bucket page_id corresponding to bucket_idx
73+
*/
74+
auto GetBucketPageId(uint32_t bucket_idx) -> page_id_t;
75+
76+
/**
77+
* Updates the directory index using a bucket index and page_id
78+
*
79+
* @param bucket_idx directory index at which to insert page_id
80+
* @param bucket_page_id page_id to insert
81+
*/
82+
void SetBucketPageId(uint32_t bucket_idx, page_id_t bucket_page_id);
83+
84+
/**
85+
* Gets the split image of an index
86+
*
87+
* @param bucket_idx the directory index for which to find the split image
88+
* @return the directory index of the split image
89+
**/
90+
auto GetSplitImageIndex(uint32_t bucket_idx) -> uint32_t;
91+
92+
/**
93+
* GetGlobalDepthMask - returns a mask of global_depth 1's and the rest 0's.
94+
*
95+
* In Extendible Hashing we map a key to a directory index
96+
* using the following hash + mask function.
97+
*
98+
* DirectoryIndex = Hash(key) & GLOBAL_DEPTH_MASK
99+
*
100+
* where GLOBAL_DEPTH_MASK is a mask with exactly GLOBAL_DEPTH 1's from LSB
101+
* upwards. For example, global depth 3 corresponds to 0x00000007 in a 32-bit
102+
* representation.
103+
*
104+
* @return mask of global_depth 1's and the rest 0's (with 1's from LSB upwards)
105+
*/
106+
auto GetGlobalDepthMask() -> uint32_t;
107+
108+
/**
109+
* GetLocalDepthMask - same as global depth mask, except it
110+
* uses the local depth of the bucket located at bucket_idx
111+
*
112+
* @param bucket_idx the index to use for looking up local depth
113+
* @return mask of local 1's and the rest 0's (with 1's from LSB upwards)
114+
*/
115+
auto GetLocalDepthMask(uint32_t bucket_idx) -> uint32_t;
116+
117+
/**
118+
* Get the global depth of the hash table directory
119+
*
120+
* @return the global depth of the directory
121+
*/
122+
auto GetGlobalDepth() -> uint32_t;
123+
124+
/**
125+
* Increment the global depth of the directory
126+
*/
127+
void IncrGlobalDepth();
128+
129+
/**
130+
* Decrement the global depth of the directory
131+
*/
132+
void DecrGlobalDepth();
133+
134+
/**
135+
* @return true if the directory can be shrunk
136+
*/
137+
auto CanShrink() -> bool;
138+
139+
/**
140+
* @return the current directory size
141+
*/
142+
auto Size() -> uint32_t;
143+
144+
/**
145+
* Gets the local depth of the bucket at bucket_idx
146+
*
147+
* @param bucket_idx the bucket index to lookup
148+
* @return the local depth of the bucket at bucket_idx
149+
*/
150+
auto GetLocalDepth(uint32_t bucket_idx) -> uint32_t;
151+
152+
/**
153+
* Set the local depth of the bucket at bucket_idx to local_depth
154+
*
155+
* @param bucket_idx bucket index to update
156+
* @param local_depth new local depth
157+
*/
158+
void SetLocalDepth(uint32_t bucket_idx, uint8_t local_depth);
159+
160+
/**
161+
* Increment the local depth of the bucket at bucket_idx
162+
* @param bucket_idx bucket index to increment
163+
*/
164+
void IncrLocalDepth(uint32_t bucket_idx);
165+
166+
/**
167+
* Decrement the local depth of the bucket at bucket_idx
168+
* @param bucket_idx bucket index to decrement
169+
*/
170+
void DecrLocalDepth(uint32_t bucket_idx);
171+
172+
/**
173+
* Gets the high bit corresponding to the bucket's local depth.
174+
* This is not the same as the bucket index itself. This method
175+
* is helpful for finding the pair, or "split image", of a bucket.
176+
*
177+
* @param bucket_idx bucket index to lookup
178+
* @return the high bit corresponding to the bucket's local depth
179+
*/
180+
auto GetLocalHighBit(uint32_t bucket_idx) -> uint32_t;
181+
182+
/**
183+
* VerifyIntegrity
184+
*
185+
* Verify the following invariants:
186+
* (1) All LD <= GD.
187+
* (2) Each bucket has precisely 2^(GD - LD) pointers pointing to it.
188+
* (3) The LD is the same at each index with the same bucket_page_id
189+
*/
190+
void VerifyIntegrity();
191+
192+
/**
193+
* Prints the current directory
194+
*/
195+
void PrintDirectory();
196+
197+
private:
198+
uint32_t max_size_;
199+
uint32_t global_depth_;
200+
uint8_t local_depths_[HTABLE_DIRECTORY_ARRAY_SIZE];
201+
page_id_t bucket_page_ids_[HTABLE_DIRECTORY_ARRAY_SIZE];
202+
};
203+
204+
static_assert(sizeof(page_id_t) == 4);
205+
206+
static_assert(sizeof(ExtendibleHTableDirectoryPage) == HTABLE_DIRECTORY_PAGE_METADATA_SIZE +
207+
HTABLE_DIRECTORY_ARRAY_SIZE +
208+
sizeof(page_id_t) * HTABLE_DIRECTORY_ARRAY_SIZE);
209+
210+
static_assert(sizeof(ExtendibleHTableDirectoryPage) <= BUSTUB_PAGE_SIZE);
211+
212+
} // namespace bustub
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// BusTub
4+
//
5+
// extendible_htable_header_page.h
6+
//
7+
// Identification: src/include/storage/page/extendible_htable_header_page.h
8+
//
9+
// Copyright (c) 2015-2023, Carnegie Mellon University Database Group
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
/**
14+
* Header page format:
15+
* ---------------------------------------------------
16+
* | DirectoryPageIds(2048) | MaxSize (4) | Free(2044)
17+
* ---------------------------------------------------
18+
*/
19+
20+
#pragma once
21+
22+
#include <cstdlib>
23+
#include "common/config.h"
24+
#include "common/macros.h"
25+
#include "storage/page/hash_table_page_defs.h"
26+
27+
namespace bustub {
28+
29+
static constexpr uint64_t HTABLE_HEADER_PAGE_METADATA_SIZE = sizeof(uint32_t);
30+
static constexpr uint64_t HTABLE_HEADER_ARRAY_SIZE = 512;
31+
32+
class ExtendibleHTableHeaderPage {
33+
public:
34+
// Delete all constructor / destructor to ensure memory safety
35+
ExtendibleHTableHeaderPage() = delete;
36+
DISALLOW_COPY_AND_MOVE(ExtendibleHTableHeaderPage);
37+
38+
/**
39+
* After creating a new header page from buffer pool, must call initialize
40+
* method to set default values
41+
* @param max_size Max size of the array in the header page
42+
*/
43+
void Init(int max_size = HTABLE_HEADER_ARRAY_SIZE);
44+
45+
auto HashToDirectoryPageId(uint32_t hash) -> page_id_t;
46+
47+
/**
48+
* Prints the header's occupancy information
49+
*/
50+
void PrintHeader();
51+
52+
private:
53+
page_id_t directory_page_ids_[HTABLE_HEADER_ARRAY_SIZE];
54+
uint32_t max_size_;
55+
};
56+
57+
static_assert(sizeof(page_id_t) == 4);
58+
59+
static_assert(sizeof(ExtendibleHTableHeaderPage) ==
60+
sizeof(page_id_t) * HTABLE_HEADER_ARRAY_SIZE + HTABLE_HEADER_PAGE_METADATA_SIZE);
61+
62+
static_assert(sizeof(ExtendibleHTableHeaderPage) <= BUSTUB_PAGE_SIZE);
63+
64+
} // namespace bustub

0 commit comments

Comments
 (0)