Skip to content
This repository was archived by the owner on Sep 27, 2019. It is now read-only.

Commit a569a2f

Browse files
committed
Refactoring
1 parent ab9b7ea commit a569a2f

File tree

2 files changed

+112
-72
lines changed

2 files changed

+112
-72
lines changed

src/codegen/util/hash_table.cpp

Lines changed: 68 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -20,58 +20,32 @@ namespace codegen {
2020
namespace util {
2121

2222
static const uint32_t kDefaultNumElements = 256;
23+
static const uint32_t kNumBlockElems = 1024;
2324

2425
static_assert((kDefaultNumElements & (kDefaultNumElements - 1)) == 0,
2526
"Default number of elements must be a power of two");
26-
/**
27-
* This hash-table uses an open-addressing probing scheme
28-
*
29-
*/
3027

31-
HashTable::HashTable(::peloton::type::AbstractPool &memory, uint64_t key_size,
32-
uint64_t value_size)
33-
: memory_(memory),
34-
entry_size_(sizeof(Entry) + key_size + value_size),
35-
directory_(nullptr),
36-
directory_size_(0),
37-
directory_mask_(0),
38-
block_(nullptr),
39-
next_tuple_pos_(nullptr),
40-
available_bytes_(0),
41-
num_elems_(0),
42-
capacity_(0) {
43-
// Upon creation, we allocate room for kDefaultNumElements in the hash table.
44-
// We assume 50% load factor on the directory, thus the directory size is
45-
// twice the number of elements.
46-
directory_size_ = kDefaultNumElements * 2;
47-
directory_mask_ = directory_size_ - 1;
48-
directory_ = static_cast<Entry **>(
49-
memory_.Allocate(sizeof(Entry *) * directory_size_));
50-
PELOTON_MEMSET(directory_, 0, directory_size_);
28+
////////////////////////////////////////////////////////////////////////////////
29+
///
30+
/// EntryBuffer
31+
///
32+
////////////////////////////////////////////////////////////////////////////////
5133

34+
HashTable::EntryBuffer::EntryBuffer(::peloton::type::AbstractPool &memory,
35+
uint64_t entry_size)
36+
: memory_(memory), entry_size_(entry_size) {
5237
// We also need to allocate some space to store tuples. Tuples are stored
5338
// externally from the main hash table in a separate values memory space.
54-
uint64_t block_size =
55-
sizeof(MemoryBlock) + (entry_size_ * kDefaultNumElements);
39+
uint64_t block_size = sizeof(MemoryBlock) + (entry_size_ * kNumBlockElems);
5640
block_ = reinterpret_cast<MemoryBlock *>(memory_.Allocate(block_size));
5741
block_->next = nullptr;
5842

5943
// Set the next tuple write position and the available bytes
60-
next_tuple_pos_ = block_->data;
44+
next_entry_ = block_->data;
6145
available_bytes_ = block_size - sizeof(MemoryBlock);
62-
63-
// Set table stats
64-
num_elems_ = 0;
65-
capacity_ = kDefaultNumElements;
6646
}
6747

68-
HashTable::~HashTable() {
69-
// Free the directory
70-
if (directory_ != nullptr) {
71-
memory_.Free(directory_);
72-
directory_ = nullptr;
73-
}
74-
48+
HashTable::EntryBuffer::~EntryBuffer() {
7549
// Free all the blocks we've allocated
7650
MemoryBlock *block = block_;
7751
while (block != nullptr) {
@@ -82,41 +56,77 @@ HashTable::~HashTable() {
8256
block_ = nullptr;
8357
}
8458

85-
void HashTable::Init(HashTable &table, executor::ExecutorContext &exec_ctx,
86-
uint64_t key_size, uint64_t value_size) {
87-
new (&table) HashTable(*exec_ctx.GetPool(), key_size, value_size);
88-
}
89-
90-
void HashTable::Destroy(HashTable &table) { table.~HashTable(); }
91-
92-
HashTable::Entry *HashTable::AcquireEntrySlot() {
59+
HashTable::Entry *HashTable::EntryBuffer::NextFree() {
9360
if (entry_size_ > available_bytes_) {
94-
capacity_ *= 2;
95-
uint64_t block_size = sizeof(MemoryBlock) + (entry_size_ * capacity_);
61+
uint64_t block_size = sizeof(MemoryBlock) + (entry_size_ * kNumBlockElems);
9662
auto *new_block =
9763
reinterpret_cast<MemoryBlock *>(memory_.Allocate(block_size));
9864
new_block->next = block_;
9965
block_ = new_block;
100-
next_tuple_pos_ = new_block->data;
66+
next_entry_ = new_block->data;
10167
available_bytes_ = block_size - sizeof(MemoryBlock);
10268
}
10369

104-
auto *entry = reinterpret_cast<Entry *>(next_tuple_pos_);
70+
auto *entry = reinterpret_cast<Entry *>(next_entry_);
10571
entry->next = nullptr;
10672

107-
next_tuple_pos_ += entry_size_;
73+
next_entry_ += entry_size_;
10874
available_bytes_ -= entry_size_;
109-
num_elems_++;
11075

11176
return entry;
11277
}
11378

79+
////////////////////////////////////////////////////////////////////////////////
80+
///
81+
/// Hash Table
82+
///
83+
////////////////////////////////////////////////////////////////////////////////
84+
85+
/**
86+
* This hash-table uses an open-addressing probing scheme
87+
*
88+
*/
89+
90+
HashTable::HashTable(::peloton::type::AbstractPool &memory, uint64_t key_size,
91+
uint64_t value_size)
92+
: memory_(memory),
93+
directory_(nullptr),
94+
directory_size_(0),
95+
directory_mask_(0),
96+
entry_buffer_(memory, Entry::Size(key_size, value_size)),
97+
num_elems_(0),
98+
capacity_(kDefaultNumElements) {
99+
// Upon creation, we allocate room for kDefaultNumElements in the hash table.
100+
// We assume 50% load factor on the directory, thus the directory size is
101+
// twice the number of elements.
102+
directory_size_ = capacity_ * 2;
103+
directory_mask_ = directory_size_ - 1;
104+
directory_ = static_cast<Entry **>(
105+
memory_.Allocate(sizeof(Entry *) * directory_size_));
106+
PELOTON_MEMSET(directory_, 0, directory_size_);
107+
}
108+
109+
HashTable::~HashTable() {
110+
// Free the directory
111+
if (directory_ != nullptr) {
112+
memory_.Free(directory_);
113+
directory_ = nullptr;
114+
}
115+
}
116+
117+
void HashTable::Init(HashTable &table, executor::ExecutorContext &exec_ctx,
118+
uint64_t key_size, uint64_t value_size) {
119+
new (&table) HashTable(*exec_ctx.GetPool(), key_size, value_size);
120+
}
121+
122+
void HashTable::Destroy(HashTable &table) { table.~HashTable(); }
123+
114124
char *HashTable::StoreTupleLazy(uint64_t hash) {
115125
// Since this is a lazy insertion, we just need to acquire/allocate an entry
116126
// from storage. It is assumed that actual construction of the hash table is
117127
// done by a subsequent call to BuildLazy() only after ALL lazy insertions
118128
// have completed.
119-
auto *entry = AcquireEntrySlot();
129+
auto *entry = entry_buffer_.NextFree();
120130
entry->hash = hash;
121131

122132
// Insert the entry into the linked list in the first directory slot
@@ -129,6 +139,8 @@ char *HashTable::StoreTupleLazy(uint64_t hash) {
129139
directory_[1] = entry;
130140
}
131141

142+
num_elems_++;
143+
132144
// Return data pointer for key/value storage
133145
return entry->data;
134146
}
@@ -140,14 +152,16 @@ char *HashTable::StoreTuple(uint64_t hash) {
140152
}
141153

142154
// Acquire/allocate an entry from storage
143-
Entry *entry = AcquireEntrySlot();
155+
Entry *entry = entry_buffer_.NextFree();
144156
entry->hash = hash;
145157

146158
// Insert into hash table
147159
uint64_t index = hash & directory_mask_;
148160
entry->next = directory_[index];
149161
directory_[index] = entry;
150162

163+
num_elems_++;
164+
151165
// Return data pointer for key/value storage
152166
return entry->data;
153167
}

src/include/codegen/util/hash_table.h

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -124,46 +124,72 @@ class HashTable {
124124
template <typename Key, typename Value>
125125
bool Probe(uint64_t hash, const Key &key, Value &value);
126126

127-
// An entry in the hash table
127+
//////////////////////////////////////////////////////////////////////////////
128+
///
129+
/// Helper Classes
130+
///
131+
//////////////////////////////////////////////////////////////////////////////
132+
133+
/**
134+
* An entry in the hash table that stores a key and value
135+
*/
128136
struct Entry {
129137
uint64_t hash;
130138
Entry *next;
131139
char data[0];
140+
141+
static uint64_t Size(uint32_t key_size, uint32_t value_size) {
142+
return sizeof(Entry) + key_size + value_size;
143+
}
132144
};
133145

134-
private:
135-
Entry *AcquireEntrySlot();
146+
/**
147+
* An entry allocator
148+
*/
149+
class EntryBuffer {
150+
public:
151+
EntryBuffer(::peloton::type::AbstractPool &memory, uint64_t entry_size);
152+
153+
~EntryBuffer();
154+
155+
Entry *NextFree();
156+
157+
private:
158+
struct MemoryBlock {
159+
MemoryBlock *next;
160+
char data[0];
161+
};
162+
163+
// The memory pool where block allocations are sourced
164+
::peloton::type::AbstractPool &memory_;
165+
// The sizes of each entry
166+
uint64_t entry_size_;
167+
// The current active block
168+
MemoryBlock *block_;
169+
// A pointer into the block where the next position is
170+
char *next_entry_;
171+
// The number of available bytes left in the block
172+
uint64_t available_bytes_;
173+
};
136174

175+
private:
137176
// Does the hash table need resizing?
138177
bool NeedsResize() const { return num_elems_ == capacity_; }
139178

140179
// Resize the hash table
141180
void Resize();
142181

143-
private:
144-
// A chunk of memory that stores tuple data
145-
struct MemoryBlock {
146-
MemoryBlock *next;
147-
char data[0];
148-
};
149-
150182
private:
151183
// The memory allocator used for all allocations in this hash table
152184
::peloton::type::AbstractPool &memory_;
153185

154-
// The size of an entry in the hash table. Includes space for entry metadata,
155-
// key, and value
156-
uint64_t entry_size_;
157-
158186
// The directory of the hash table
159187
Entry **directory_;
160188
uint64_t directory_size_;
161189
uint64_t directory_mask_;
162190

163-
// A linked list of memory blocks where tuples are stored
164-
MemoryBlock *block_;
165-
char *next_tuple_pos_;
166-
uint64_t available_bytes_;
191+
// Entry allocator
192+
EntryBuffer entry_buffer_;
167193

168194
// The number of elements stored in this hash table, and the max before it
169195
// needs to be resized

0 commit comments

Comments
 (0)