Skip to content
This repository was archived by the owner on Sep 27, 2019. It is now read-only.

Commit f7f19d8

Browse files
committed
Fixes to hash table + tests
1 parent 75ec1d5 commit f7f19d8

File tree

9 files changed

+439
-68
lines changed

9 files changed

+439
-68
lines changed

src/codegen/operator/hash_join_translator.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#include "codegen/lang/vectorized_loop.h"
1818
#include "codegen/proxy/bloom_filter_proxy.h"
1919
#include "codegen/proxy/hash_table_proxy.h"
20-
#include "codegen/proxy/oa_hash_table_proxy.h"
2120
#include "expression/tuple_value_expression.h"
2221
#include "planner/hash_join_plan.h"
2322

src/codegen/util/hash_table.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,10 @@ void HashTable::ReserveLazy(
218218

219219
directory_size_ = capacity_ * 2;
220220
directory_mask_ = directory_size_ - 1;
221-
directory_ = static_cast<Entry **>(
222-
memory_.Allocate(sizeof(Entry *) * directory_size_));
221+
222+
uint64_t alloc_size = sizeof(Entry *) * directory_size_;
223+
directory_ = static_cast<Entry **>(memory_.Allocate(alloc_size));
224+
PELOTON_MEMSET(directory_, 0, alloc_size);
223225
}
224226

225227
void HashTable::MergeLazyUnfinished(const HashTable &other) {
@@ -238,11 +240,14 @@ void HashTable::MergeLazyUnfinished(const HashTable &other) {
238240
do {
239241
curr = directory_[index];
240242
head->next = curr;
241-
} while (!atomic_cas(directory_ + index, curr, head));
243+
} while (!::peloton::atomic_cas(directory_ + index, curr, head));
242244

243245
// Success, move along
244246
head = next;
245247
}
248+
249+
// Increment number of elements
250+
::peloton::atomic_add(&num_elems_, other.NumElements());
246251
}
247252

248253
void HashTable::Resize() {
@@ -255,9 +260,10 @@ void HashTable::Resize() {
255260
// Allocate the new directory with 50% fill factor
256261
uint64_t new_dir_size = capacity_ * 2;
257262
uint64_t new_dir_mask = new_dir_size - 1;
258-
auto *new_dir =
259-
static_cast<Entry **>(memory_.Allocate(sizeof(Entry *) * new_dir_size));
260-
PELOTON_MEMSET(new_dir, 0, new_dir_size);
263+
264+
uint64_t alloc_size = sizeof(Entry *) * new_dir_size;
265+
auto *new_dir = static_cast<Entry **>(memory_.Allocate(alloc_size));
266+
PELOTON_MEMSET(new_dir, 0, alloc_size);
261267

262268
// Insert all old directory entries into new directory
263269
for (uint32_t i = 0; i < directory_size_; i++) {
@@ -269,8 +275,8 @@ void HashTable::Resize() {
269275
while (entry != nullptr) {
270276
uint64_t index = entry->hash & new_dir_mask;
271277
Entry *next = entry->next;
272-
entry->next = directory_[index];
273-
directory_[index] = entry;
278+
entry->next = new_dir[index];
279+
new_dir[index] = entry;
274280
entry = next;
275281
}
276282
}

src/include/codegen/hash_table.h

Lines changed: 68 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
namespace peloton {
2121
namespace codegen {
2222

23+
/**
24+
* The main hash table access class for util::HashTable.
25+
*/
2326
class HashTable {
2427
public:
2528
/**
@@ -44,20 +47,29 @@ class HashTable {
4447
};
4548

4649
/**
47-
* A callback used when inserting insert a new entry into the hash table. The
48-
* caller implements the StoreValue() method to perform the insertion. The
49-
* argument provided to StoreValue() is the address where the contents can be
50-
* stored. The size of this space is equal to the value returned by
51-
* GetValueSize().
50+
* A callback used when inserting a new entry into the hash table. The caller
51+
* implements StoreValue() to perform the insertion, and GetValueSize() to
52+
* indicate the number of bytes needed to store the value associated with the
53+
* inserted key.
5254
*/
5355
struct InsertCallback {
5456
/** Virtual destructor */
5557
virtual ~InsertCallback() = default;
5658

57-
// Called to store the value associated with the key used for insertion.
59+
/**
60+
* Serialize the value into a provided memory space in the hash table.
61+
*
62+
* @param codegen The codegen instance
63+
* @param space The memory space for the value
64+
*/
5865
virtual void StoreValue(CodeGen &codegen, llvm::Value *space) const = 0;
5966

60-
// Called to determine the size of the payload the caller wants to store
67+
/**
68+
* Return the number of bytes for the value
69+
*
70+
* @param codegen The codegen instance
71+
* @return The number of bytes needed to store the value
72+
*/
6173
virtual llvm::Value *GetValueSize(CodeGen &codegen) const = 0;
6274
};
6375

@@ -71,60 +83,87 @@ class HashTable {
7183
}
7284
};
7385

74-
//===--------------------------------------------------------------------===//
75-
// This callback is used to iterate over all (or a subset) of the entries in
76-
// the hash table that match a provided key. The ProcessEntry() method is
77-
// called for every match and we provide a pointer to the actual HashEntry,
78-
// and a vector of codegen::Value for every value stored in the HashEntry.
79-
//===--------------------------------------------------------------------===//
86+
/**
87+
* A callback used when iterating over the entries in the hash table.
88+
* ProcessEntry() is invoked for each entry in the table, or only those
89+
* entries that match a provided key if a search key is provided.
90+
*/
8091
struct IterateCallback {
8192
/** Virtual destructor */
8293
virtual ~IterateCallback() = default;
8394

84-
// Callback to process an entry in the hash table. The key and opaque set of
85-
// bytes (representing the value space) are provided as arguments.
95+
/**
96+
* The primary callback function for each entry in the table, or for each
97+
* matching key-value pair when provided a search key.
98+
*
99+
* @param codegen The codegen instance
100+
* @param keys The key stored in the hash table
101+
* @param values A pointer to a set of bytes where the value is stored
102+
*/
86103
virtual void ProcessEntry(CodeGen &codegen,
87104
const std::vector<codegen::Value> &keys,
88105
llvm::Value *values) const = 0;
89106
};
90107

91108
class HashTableAccess;
92109

93-
//===--------------------------------------------------------------------===//
94-
// This callback is used when performing a vectorized iteration over all
95-
// entries in the hash table that match a provided key. For each vector of
96-
// entries, ProcessEntries() is called indicating the range of entries in the
97-
// hash-table to cover, and a selection vector indicating which entries in
98-
// this range are occupied.
99-
//===--------------------------------------------------------------------===//
110+
/**
111+
* A callback used when performing a batched/vectorized iteration over the
112+
* entries in the hash table. Iteration may be over the entire table, or a
113+
* subset of the table if a matching probing key was provided.
114+
*/
100115
struct VectorizedIterateCallback {
101116
/** Virtual destructor */
102117
virtual ~VectorizedIterateCallback() = default;
103118

104-
// Process a vector of entries in this hash-table
119+
/**
120+
* Process a vector of entries in the hash table.
121+
*
122+
* @param codegen The codegen instance
123+
* @param start
124+
* @param end
125+
* @param selection_vector A vector containing indexes of valid entries
126+
* @param access A hash-table random-access helper
127+
*/
105128
virtual void ProcessEntries(CodeGen &codegen, llvm::Value *start,
106129
llvm::Value *end, Vector &selection_vector,
107130
HashTableAccess &access) const = 0;
108131
};
109132

110-
//===--------------------------------------------------------------------===//
111-
// Convenience class proving a random access interface over the hash-table
112-
//===--------------------------------------------------------------------===//
133+
/**
134+
* Convenience class proving a random access interface over the hash-table
135+
*/
113136
class HashTableAccess {
114137
public:
115138
/** Virtual destructor */
116139
virtual ~HashTableAccess() = default;
117140

141+
/**
142+
* Extracts the key of an entry at a given index into the hash table storing
143+
* results into the output 'keys' vector.
144+
*
145+
* @param codegen The codegen instance
146+
* @param index The index in the directory
147+
* @param[out] keys Where each column of the key is stored
148+
*/
118149
// Extract the keys for the bucket at the given index
119150
virtual void ExtractBucketKeys(CodeGen &codegen, llvm::Value *index,
120151
std::vector<codegen::Value> &keys) const = 0;
152+
153+
/**
154+
* Returns a pointer to a value stored at the entry at the given index.
155+
*
156+
* @param codegen The codegen instance
157+
* @param index An index in the directory
158+
* @return A pointer to where the value is serialized
159+
*/
121160
virtual llvm::Value *BucketValue(CodeGen &codegen,
122161
llvm::Value *index) const = 0;
123162
};
124163

125-
//===--------------------------------------------------------------------===//
126-
// Return type for ProbeOrInsert
127-
//===--------------------------------------------------------------------===//
164+
/**
165+
* A struct storing the result of a probe into the hash table
166+
*/
128167
struct ProbeResult {
129168
// Actual probe result (bool), if the key already exists in the hast table
130169
llvm::Value *key_exists;
@@ -142,26 +181,20 @@ class HashTable {
142181
// Destructor
143182
virtual ~HashTable() = default;
144183

145-
// Initialize the hash-table instance
146184
virtual void Init(CodeGen &codegen, llvm::Value *ht_ptr) const;
147185
virtual void Init(CodeGen &codegen, llvm::Value *exec_ctx,
148186
llvm::Value *ht_ptr) const;
149187

150-
// Generate code to handle the insertion of a new tuple
151188
virtual void ProbeOrInsert(CodeGen &codegen, llvm::Value *ht_ptr,
152189
llvm::Value *hash,
153190
const std::vector<codegen::Value> &key,
154191
ProbeCallback &probe_callback,
155192
InsertCallback &insert_callback) const;
156193

157-
// Probe the hash table and insert a new slot if needed, returning both the
158-
// result and the data pointer
159194
virtual ProbeResult ProbeOrInsert(
160195
CodeGen &codegen, llvm::Value *ht_ptr, llvm::Value *hash,
161196
const std::vector<codegen::Value> &key) const;
162197

163-
// Insert a new entry into the hash table with the given keys, but don't
164-
// perform any key matching or merging
165198
virtual void Insert(CodeGen &codegen, llvm::Value *ht_ptr, llvm::Value *hash,
166199
const std::vector<codegen::Value> &keys,
167200
InsertCallback &callback) const;
@@ -178,21 +211,17 @@ class HashTable {
178211
void MergeLazyUnfinished(CodeGen &codegen, llvm::Value *global_ht,
179212
llvm::Value *local_ht) const;
180213

181-
// Generate code to iterate over the entire hash table
182214
virtual void Iterate(CodeGen &codegen, llvm::Value *ht_ptr,
183215
IterateCallback &callback) const;
184216

185-
// Generate code to iterate over the entire hash table in vectorized fashion
186217
virtual void VectorizedIterate(CodeGen &codegen, llvm::Value *ht_ptr,
187218
Vector &selection_vector,
188219
VectorizedIterateCallback &callback) const;
189220

190-
// Generate code that iterates all the matches
191221
virtual void FindAll(CodeGen &codegen, llvm::Value *ht_ptr,
192222
const std::vector<codegen::Value> &key,
193223
IterateCallback &callback) const;
194224

195-
// Destroy/cleanup the hash table
196225
virtual void Destroy(CodeGen &codegen, llvm::Value *ht_ptr) const;
197226

198227
private:

src/include/codegen/operator/hash_join_translator.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include "codegen/compilation_context.h"
1717
#include "codegen/consumer_context.h"
1818
#include "codegen/hash_table.h"
19-
#include "codegen/oa_hash_table.h"
2019
#include "codegen/operator/operator_translator.h"
2120
#include "codegen/updateable_storage.h"
2221

0 commit comments

Comments
 (0)