Skip to content
This repository was archived by the owner on Sep 27, 2019. It is now read-only.

Commit 3086c01

Browse files
committed
Hash table proxy
1 parent a569a2f commit 3086c01

File tree

4 files changed

+143
-22
lines changed

4 files changed

+143
-22
lines changed
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Peloton
4+
//
5+
// hash_table_proxy.cpp
6+
//
7+
// Identification: src/codegen/proxy/hash_table_proxy.cpp
8+
//
9+
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "codegen/proxy/hash_table_proxy.h"
14+
15+
#include "codegen/proxy/executor_context_proxy.h"
16+
17+
namespace peloton {
18+
namespace codegen {
19+
20+
// We need to manually define the type because it is recursive
21+
llvm::Type *EntryProxy::GetType(CodeGen &codegen) {
22+
static const std::string kHashEntryTypeName = "peloton::Entry";
23+
24+
// Check if the hash entry is already defined in the module
25+
auto *llvm_type = codegen.LookupType(kHashEntryTypeName);
26+
if (llvm_type != nullptr) {
27+
return llvm_type;
28+
}
29+
30+
// Define the thing (the first field is the 64bit hash, the second is the
31+
// next HashEntry* pointer)
32+
auto *entry_type =
33+
llvm::StructType::create(codegen.GetContext(), kHashEntryTypeName);
34+
std::vector<llvm::Type *> elements = {
35+
codegen.Int64Type(), // The hash value
36+
entry_type->getPointerTo() // The next HashEntry* pointer
37+
};
38+
entry_type->setBody(elements, /*is_packed*/ false);
39+
return entry_type;
40+
}
41+
42+
DEFINE_TYPE(HashTable, "peloton::HashTable", memory, directory, size, mask,
43+
entry_buffer, num_elems, capacity, stats);
44+
45+
DEFINE_METHOD(peloton::codegen::util, HashTable, Init);
46+
DEFINE_METHOD(peloton::codegen::util, HashTable, Insert);
47+
DEFINE_METHOD(peloton::codegen::util, HashTable, InsertLazy);
48+
DEFINE_METHOD(peloton::codegen::util, HashTable, BuildLazy);
49+
DEFINE_METHOD(peloton::codegen::util, HashTable, ReserveLazy);
50+
DEFINE_METHOD(peloton::codegen::util, HashTable, MergeLazyUnfinished);
51+
DEFINE_METHOD(peloton::codegen::util, HashTable, Destroy);
52+
53+
} // namespace codegen
54+
} // namespace peloton

src/codegen/util/hash_table.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ namespace util {
2121

2222
static const uint32_t kDefaultNumElements = 256;
2323
static const uint32_t kNumBlockElems = 1024;
24+
static const uint32_t kDefaultEstimatePrecision = 5;
2425

2526
static_assert((kDefaultNumElements & (kDefaultNumElements - 1)) == 0,
2627
"Default number of elements must be a power of two");
@@ -32,7 +33,7 @@ static_assert((kDefaultNumElements & (kDefaultNumElements - 1)) == 0,
3233
////////////////////////////////////////////////////////////////////////////////
3334

3435
HashTable::EntryBuffer::EntryBuffer(::peloton::type::AbstractPool &memory,
35-
uint64_t entry_size)
36+
uint32_t entry_size)
3637
: memory_(memory), entry_size_(entry_size) {
3738
// We also need to allocate some space to store tuples. Tuples are stored
3839
// externally from the main hash table in a separate values memory space.
@@ -87,15 +88,16 @@ HashTable::Entry *HashTable::EntryBuffer::NextFree() {
8788
*
8889
*/
8990

90-
HashTable::HashTable(::peloton::type::AbstractPool &memory, uint64_t key_size,
91-
uint64_t value_size)
91+
HashTable::HashTable(::peloton::type::AbstractPool &memory, uint32_t key_size,
92+
uint32_t value_size)
9293
: memory_(memory),
9394
directory_(nullptr),
9495
directory_size_(0),
9596
directory_mask_(0),
9697
entry_buffer_(memory, Entry::Size(key_size, value_size)),
9798
num_elems_(0),
98-
capacity_(kDefaultNumElements) {
99+
capacity_(kDefaultNumElements),
100+
unique_key_estimate_(libcount::HLL::Create(kDefaultEstimatePrecision)) {
99101
// Upon creation, we allocate room for kDefaultNumElements in the hash table.
100102
// We assume 50% load factor on the directory, thus the directory size is
101103
// twice the number of elements.
@@ -115,13 +117,13 @@ HashTable::~HashTable() {
115117
}
116118

117119
void HashTable::Init(HashTable &table, executor::ExecutorContext &exec_ctx,
118-
uint64_t key_size, uint64_t value_size) {
120+
uint32_t key_size, uint32_t value_size) {
119121
new (&table) HashTable(*exec_ctx.GetPool(), key_size, value_size);
120122
}
121123

122124
void HashTable::Destroy(HashTable &table) { table.~HashTable(); }
123125

124-
char *HashTable::StoreTupleLazy(uint64_t hash) {
126+
char *HashTable::InsertLazy(uint64_t hash) {
125127
// Since this is a lazy insertion, we just need to acquire/allocate an entry
126128
// from storage. It is assumed that actual construction of the hash table is
127129
// done by a subsequent call to BuildLazy() only after ALL lazy insertions
@@ -145,7 +147,7 @@ char *HashTable::StoreTupleLazy(uint64_t hash) {
145147
return entry->data;
146148
}
147149

148-
char *HashTable::StoreTuple(uint64_t hash) {
150+
char *HashTable::Insert(uint64_t hash) {
149151
// Resize the hash table if needed
150152
if (NeedsResize()) {
151153
Resize();
@@ -223,9 +225,13 @@ void HashTable::ReserveLazy(
223225
}
224226

225227
void HashTable::MergeLazyUnfinished(const HashTable &other) {
228+
// Begin with the head of the linked list of entries, stored in the first
229+
// directory entry
230+
PELOTON_ASSERT(other.directory_[0] != nullptr);
226231
auto *head = other.directory_[0];
232+
227233
while (head != nullptr) {
228-
// Find the index and stash the next entry in the linked list
234+
// Compute the index and stash the next entry in the linked list
229235
uint64_t index = head->hash & directory_mask_;
230236
Entry *next = head->next;
231237

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Peloton
4+
//
5+
// hash_table_proxy.h
6+
//
7+
// Identification: src/include/codegen/proxy/hash_table_proxy.h
8+
//
9+
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#pragma once
14+
15+
#include "codegen/proxy/proxy.h"
16+
#include "codegen/util/hash_table.h"
17+
18+
namespace peloton {
19+
namespace codegen {
20+
21+
/// The proxy for HashTable::Entry
22+
PROXY(Entry) {
23+
DECLARE_MEMBER(0, uint64_t, hash_val);
24+
DECLARE_MEMBER(1, util::HashTable::Entry *, next);
25+
DECLARE_TYPE;
26+
};
27+
28+
/// The proxy for CCHashTable
29+
PROXY(HashTable) {
30+
DECLARE_MEMBER(0, char *, memory);
31+
DECLARE_MEMBER(1, util::HashTable::Entry **, directory);
32+
DECLARE_MEMBER(2, uint64_t, size);
33+
DECLARE_MEMBER(3, uint64_t, mask);
34+
DECLARE_MEMBER(4, char[sizeof(util::HashTable::EntryBuffer)], entry_buffer);
35+
DECLARE_MEMBER(5, uint64_t, num_elems);
36+
DECLARE_MEMBER(6, uint64_t, capacity);
37+
DECLARE_MEMBER(7, char[sizeof(std::unique_ptr<char>)], stats);
38+
DECLARE_TYPE;
39+
40+
// Proxy all methods that will be called from codegen
41+
DECLARE_METHOD(Init);
42+
DECLARE_METHOD(Insert);
43+
DECLARE_METHOD(InsertLazy);
44+
DECLARE_METHOD(BuildLazy);
45+
DECLARE_METHOD(ReserveLazy);
46+
DECLARE_METHOD(MergeLazyUnfinished);
47+
DECLARE_METHOD(Destroy);
48+
};
49+
50+
/// The type builders for Entry and HashTable
51+
TYPE_BUILDER(Entry, util::HashTable::Entry);
52+
TYPE_BUILDER(HashTable, util::HashTable);
53+
54+
} // namespace codegen
55+
} // namespace peloton

src/include/codegen/util/hash_table.h

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
#include <cstdint>
1616

17+
#include "libcount/hll.h"
18+
1719
#include "executor/executor_context.h"
1820

1921
namespace peloton {
@@ -28,8 +30,8 @@ namespace util {
2830
class HashTable {
2931
public:
3032
/// Constructor
31-
HashTable(::peloton::type::AbstractPool &memory, uint64_t key_size,
32-
uint64_t value_size);
33+
HashTable(::peloton::type::AbstractPool &memory, uint32_t key_size,
34+
uint32_t value_size);
3335

3436
/// Destructor
3537
~HashTable();
@@ -42,7 +44,7 @@ class HashTable {
4244
* @param value_size The size of the values in bytes
4345
*/
4446
static void Init(HashTable &table, executor::ExecutorContext &exec_ctx,
45-
uint64_t key_size, uint64_t value_size);
47+
uint32_t key_size, uint32_t value_size);
4648

4749
/**
4850
* Clean up all resources allocated by the provided table
@@ -58,15 +60,15 @@ class HashTable {
5860
* @param hash The hash value of the tuple that will be inserted
5961
* @return A memory region where the key and value can be stored
6062
*/
61-
char *StoreTupleLazy(uint64_t hash);
63+
char *InsertLazy(uint64_t hash);
6264

6365
/**
6466
* Make room in the hash-table to store the new key-value pair.
6567
*
6668
* @param hash
6769
* @return
6870
*/
69-
char *StoreTuple(uint64_t hash);
71+
char *Insert(uint64_t hash);
7072

7173
/**
7274
*
@@ -83,7 +85,7 @@ class HashTable {
8385

8486
/**
8587
*
86-
* @param other
88+
* @param
8789
*/
8890
void MergeLazyUnfinished(const HashTable &other);
8991

@@ -110,7 +112,7 @@ class HashTable {
110112
* @param value The value to store in the value
111113
*/
112114
template <typename Key, typename Value>
113-
void Insert(uint64_t hash, const Key &key, const Value &value);
115+
void TypedInsertLazy(uint64_t hash, const Key &key, const Value &value);
114116

115117
/**
116118
* Probe a key in the hash table. This function is used mostly for testing.
@@ -122,7 +124,7 @@ class HashTable {
122124
* @return True if a value was found. False otherwise.
123125
*/
124126
template <typename Key, typename Value>
125-
bool Probe(uint64_t hash, const Key &key, Value &value);
127+
bool TypedProbe(uint64_t hash, const Key &key, Value &value);
126128

127129
//////////////////////////////////////////////////////////////////////////////
128130
///
@@ -138,7 +140,7 @@ class HashTable {
138140
Entry *next;
139141
char data[0];
140142

141-
static uint64_t Size(uint32_t key_size, uint32_t value_size) {
143+
static uint32_t Size(uint32_t key_size, uint32_t value_size) {
142144
return sizeof(Entry) + key_size + value_size;
143145
}
144146
};
@@ -148,7 +150,7 @@ class HashTable {
148150
*/
149151
class EntryBuffer {
150152
public:
151-
EntryBuffer(::peloton::type::AbstractPool &memory, uint64_t entry_size);
153+
EntryBuffer(::peloton::type::AbstractPool &memory, uint32_t entry_size);
152154

153155
~EntryBuffer();
154156

@@ -163,7 +165,7 @@ class HashTable {
163165
// The memory pool where block allocations are sourced
164166
::peloton::type::AbstractPool &memory_;
165167
// The sizes of each entry
166-
uint64_t entry_size_;
168+
uint32_t entry_size_;
167169
// The current active block
168170
MemoryBlock *block_;
169171
// A pointer into the block where the next position is
@@ -198,6 +200,9 @@ class HashTable {
198200

199201
// Info about partitions
200202
// ...
203+
204+
// Stats
205+
std::unique_ptr<libcount::HLL> unique_key_estimate_;
201206
};
202207

203208
////////////////////////////////////////////////////////////////////////////////
@@ -207,14 +212,15 @@ class HashTable {
207212
////////////////////////////////////////////////////////////////////////////////
208213

209214
template <typename Key, typename Value>
210-
void HashTable::Insert(uint64_t hash, const Key &key, const Value &value) {
211-
auto *data = StoreTupleLazy(hash);
215+
void HashTable::TypedInsertLazy(uint64_t hash, const Key &key,
216+
const Value &value) {
217+
auto *data = InsertLazy(hash);
212218
*reinterpret_cast<Key *>(data) = key;
213219
*reinterpret_cast<Value *>(data + sizeof(Key)) = value;
214220
}
215221

216222
template <typename Key, typename Value>
217-
bool HashTable::Probe(uint64_t hash, const Key &key, Value &value) {
223+
bool HashTable::TypedProbe(uint64_t hash, const Key &key, Value &value) {
218224
// Initial index in the directory
219225
uint64_t index = hash & directory_mask_;
220226

0 commit comments

Comments
 (0)