Skip to content

Commit c077580

Browse files
committed
Reorganize the code: use SimpleMixSplit instead of TwoIndependentMultiplyShift #18
1 parent 13ce917 commit c077580

19 files changed

+435
-176
lines changed

benchmarks/bulk-insert-and-query.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ struct FilterAPI<SimdBlockFilterFixed<HashFamily>> {
276276
#endif
277277

278278

279-
#ifdef __SSE4_1__
279+
#ifdef __SSE41__
280280

281281
template <typename HashFamily>
282282
struct FilterAPI<SimdBlockFilterFixed16<HashFamily>> {
@@ -990,7 +990,7 @@ int main(int argc, char * argv[]) {
990990
{52, "BlockedBloom (addall)"},
991991
{53, "BlockedBloom64"},
992992
#endif
993-
#ifdef __SSE4_1__
993+
#ifdef __SSE41__
994994
{54, "BlockedBloom16"},
995995
#endif
996996

@@ -1423,7 +1423,7 @@ int main(int argc, char * argv[]) {
14231423
cout << setw(NAME_WIDTH) << names[a] << cf << endl;
14241424
}
14251425
#endif
1426-
#ifdef __SSE4_1__
1426+
#ifdef __SSE41__
14271427
a = 54;
14281428
if (algorithmId == a || (algos.find(a) != algos.end())) {
14291429
auto cf = FilterBenchmark<SimdBlockFilterFixed16<SimpleMixSplit>>(

src/bloom/bloom.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,14 @@ static inline size_t fastrangesize(uint64_t word, size_t p) {
8585
#endif // SIZE_MAX == UINT32_MAX
8686
}
8787

88-
static size_t getBestK(size_t bitsPerItem) {
88+
static inline size_t getBestK(size_t bitsPerItem) {
8989
return max(1, (int)round((double)bitsPerItem * log(2)));
9090
}
9191

9292
inline uint64_t getBit(uint32_t index) { return 1L << (index & 63); }
9393

9494
template <typename ItemType, size_t bits_per_item, bool branchless,
95-
typename HashFamily = TwoIndependentMultiplyShift,
95+
typename HashFamily = SimpleMixSplit,
9696
int k = (int)((double)bits_per_item * 0.693147180559945 + 0.5)>
9797
class BloomFilter {
9898
public:
@@ -121,7 +121,7 @@ class BloomFilter {
121121
Status Add(const ItemType &item);
122122

123123
// Add multiple items to the filter.
124-
Status AddAll(const vector<ItemType> data, const size_t start,
124+
Status AddAll(const vector<ItemType>& data, const size_t start,
125125
const size_t end) {
126126
return AddAll(data.data(),start,end);
127127

@@ -269,7 +269,7 @@ BloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::Info() const {
269269
***************/
270270

271271
template <size_t blocksize, int k,
272-
typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
272+
typename HashFamily = ::hashing::SimpleMixSplit>
273273
class SimpleBlockFilter {
274274
private:
275275
const size_t arrayLength;
@@ -296,7 +296,7 @@ SimpleBlockFilter<blocksize, k, HashFamily>::SimpleBlockFilter(
296296

297297
template <size_t blocksize, int k, typename HashFamily>
298298
SimpleBlockFilter<blocksize, k, HashFamily>::~SimpleBlockFilter() noexcept {
299-
delete[] data;
299+
free(data);
300300
data = nullptr;
301301
}
302302

src/bloom/counting_bloom.h

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ inline uint32_t reduce(uint32_t hash, uint32_t n) {
201201
// CountingBloomFilter --------------------------------------------------------------------------------------
202202

203203
template <typename ItemType, size_t bits_per_item, bool branchless,
204-
typename HashFamily = TwoIndependentMultiplyShift,
204+
typename HashFamily = SimpleMixSplit,
205205
int k = (int)((double)bits_per_item * 0.693147180559945 + 0.5)>
206206
class CountingBloomFilter {
207207

@@ -221,7 +221,7 @@ class CountingBloomFilter {
221221
}
222222
~CountingBloomFilter() { delete[] data; }
223223
Status Add(const ItemType &item);
224-
Status AddAll(const vector<ItemType> data, const size_t start, const size_t end);
224+
Status AddAll(const vector<ItemType>& data, const size_t start, const size_t end);
225225
Status Remove(const ItemType &item);
226226
Status Contain(const ItemType &item) const;
227227
size_t SizeInBytes() const { return arrayLength * 8; }
@@ -255,7 +255,7 @@ void CountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
255255
template <typename ItemType, size_t bits_per_item, bool branchless,
256256
typename HashFamily, int k>
257257
Status CountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
258-
AddAll(const vector<ItemType> keys, const size_t start, const size_t end) {
258+
AddAll(const vector<ItemType>& keys, const size_t start, const size_t end) {
259259
int blocks = 1 + arrayLength / blockLen;
260260
uint32_t *tmp = new uint32_t[blocks * blockLen];
261261
int *tmpLen = new int[blocks]();
@@ -323,7 +323,7 @@ Status CountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
323323
// #define VERIFY_COUNT
324324

325325
template <typename ItemType, size_t bits_per_item, bool branchless,
326-
typename HashFamily = TwoIndependentMultiplyShift,
326+
typename HashFamily = SimpleMixSplit,
327327
int k = (int)((double)bits_per_item * 0.693147180559945 + 0.5)>
328328
class SuccinctCountingBloomFilter {
329329

@@ -363,7 +363,7 @@ class SuccinctCountingBloomFilter {
363363
}
364364
~SuccinctCountingBloomFilter() { delete[] data; delete[] counts; delete[] overflow; }
365365
Status Add(const ItemType &item);
366-
Status AddAll(const vector<ItemType> data, const size_t start, const size_t end);
366+
Status AddAll(const vector<ItemType>& data, const size_t start, const size_t end);
367367
Status Remove(const ItemType &item);
368368
Status Contain(const ItemType &item) const;
369369
size_t SizeInBytes() const { return arrayLength * 8 * 2 + overflowLength * 8; }
@@ -413,7 +413,7 @@ Status SuccinctCountingBloomFilter<ItemType, bits_per_item, branchless, HashFami
413413
template <typename ItemType, size_t bits_per_item, bool branchless,
414414
typename HashFamily, int k>
415415
Status SuccinctCountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
416-
AddAll(const vector<ItemType> keys, const size_t start, const size_t end) {
416+
AddAll(const vector<ItemType>& keys, const size_t start, const size_t end) {
417417
int blocks = 1 + arrayLength / blockLen;
418418
uint32_t *tmp = new uint32_t[blocks * blockLen];
419419
int *tmpLen = new int[blocks]();
@@ -968,15 +968,11 @@ void SuccinctCountingBlockedBloomRankFilter<ItemType, bits_per_item, HashFamily,
968968
Increment(bucket_start + ((a >> 0) & 7), (a >> 3) & 0x3f);
969969
Increment(bucket_start + ((a >> 9) & 7), (a >> 12) & 0x3f);
970970
Increment(bucket_start + ((a >> 18) & 7), (a >> 21) & 0x3f);
971-
// data[bucket_start + ((a >> 0) & 7)] |= 1ULL << ((a >> 3) & 0x3f);
972-
// data[bucket_start + ((a >> 9) & 7)] |= 1ULL << ((a >> 12) & 0x3f);
973-
// data[bucket_start + ((a >> 18) & 7)] |= 1ULL << ((a >> 21) & 0x3f);
974971
}
975972
uint32_t b = (uint32_t)(hash >> 32);
976973
for (int i = 3; i < k; i++) {
977974
a += b;
978975
Increment(bucket_start + (a & 7), (a >> 3) & 0x3f);
979-
// data[bucket_start + (a & 7)] |= 1ULL << ((a >> 3) & 0x3f);
980976
}
981977
}
982978

@@ -1046,7 +1042,6 @@ void SuccinctCountingBlockedBloomRankFilter<ItemType, bits_per_item, HashFamily,
10461042
c = (left << 1) | right;
10471043
if (insertAt >= 64 || (c & 0x8000000000000000L) != 0) {
10481044
// an overflow entry, or overflowing now
1049-
// int index = allocateOverflow();
10501045
int index = nextFreeOverflow;
10511046
nextFreeOverflow = (int) overflow[index];
10521047
for (int i = 0; i < 8; i++) {

src/bloom/simd-block-fixed-fpp.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ static inline uint64_t rotl64(uint64_t n, unsigned int c) {
4141
#ifdef __AVX2__
4242
#include <x86intrin.h>
4343

44-
template<typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
44+
template<typename HashFamily = ::hashing::SimpleMixSplit>
4545
class SimdBlockFilterFixed {
4646
private:
4747
// The filter is divided up into Buckets:
@@ -201,7 +201,7 @@ struct mask64bytes {
201201

202202
typedef struct mask64bytes mask64bytes_t;
203203

204-
template<typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
204+
template<typename HashFamily = ::hashing::SimpleMixSplit>
205205
class SimdBlockFilterFixed64 {
206206
private:
207207
// The filter is divided up into Buckets:
@@ -298,7 +298,7 @@ SimdBlockFilterFixed64<HashFamily>::Find(const uint64_t key) const noexcept {
298298
#ifdef __aarch64__
299299
#include <arm_neon.h>
300300

301-
template<typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
301+
template<typename HashFamily = ::hashing::SimpleMixSplit>
302302
class SimdBlockFilterFixed {
303303
private:
304304
// The filter is divided up into Buckets:
@@ -398,11 +398,11 @@ SimdBlockFilterFixed<HashFamily>::Find(const uint64_t key) const noexcept {
398398
/// 16-byte version (not very good)
399399
///////////////////////////////////////////////////////////////////
400400

401-
#ifdef __SSE4_1__
401+
#ifdef __SSE41__
402402

403403
#include <smmintrin.h>
404404

405-
template<typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
405+
template<typename HashFamily = ::hashing::SimpleMixSplit>
406406
class SimdBlockFilterFixed16 {
407407
private:
408408
// The filter is divided up into Buckets:

src/bloom/simd-block.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
using uint32_t = ::std::uint32_t;
2626
using uint64_t = ::std::uint64_t;
2727

28-
template<typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
28+
template<typename HashFamily = ::hashing::SimpleMixSplit>
2929
class SimdBlockFilter {
3030
private:
3131
// The filter is divided up into Buckets:

src/cuckoo/cuckoofilter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ const size_t kMaxCuckooCount = 500;
3131
// PackedTable to enable semi-sorting
3232
template <typename ItemType, size_t bits_per_item,
3333
template <size_t> class TableType = SingleTable,
34-
typename HashFamily = hashing::TwoIndependentMultiplyShift>
34+
typename HashFamily = hashing::SimpleMixSplit>
3535
class CuckooFilter {
3636
// Storage of items
3737
TableType<bits_per_item> *table_;

src/cuckoo/cuckoofilter_stable.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ inline uint32_t reduce(uint64_t hash, uint32_t n) {
2626
// PackedTable to enable semi-sorting
2727
template <typename ItemType, size_t bits_per_item,
2828
template <size_t> class TableType = SingleTable,
29-
typename HashFamily = hashing::TwoIndependentMultiplyShift>
29+
typename HashFamily = hashing::SimpleMixSplit>
3030
class CuckooFilterStable {
3131
// Storage of items
3232
TableType<bits_per_item> *table_;

src/gcs/gcs.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ inline uint32_t MultiStageMonotoneList_get(const MultiStageMonotoneList* list, u
235235
}
236236

237237
template <typename ItemType, size_t bits_per_item,
238-
typename HashFamily = TwoIndependentMultiplyShift>
238+
typename HashFamily = SimpleMixSplit>
239239
class GcsFilter {
240240

241241
int golombShift;
@@ -252,15 +252,15 @@ class GcsFilter {
252252
double BitsPerItem() const { return 8.0; }
253253

254254
public:
255-
explicit GcsFilter(const size_t len) : hasher() {
255+
explicit GcsFilter(const size_t ) : hasher() {
256256
}
257257

258258
~GcsFilter() {
259259
delete[] bucketData;
260260
delete[] monotoneList.data;
261261
}
262262

263-
Status AddAll(const vector<ItemType> data, const size_t start, const size_t end);
263+
Status AddAll(const vector<ItemType>& data, const size_t start, const size_t end);
264264

265265
// Report if the item is inserted, with false positive rate.
266266
Status Contain(const ItemType &item) const;
@@ -285,7 +285,7 @@ int compare_uint64(const void* a, const void* b) {
285285
template <typename ItemType, size_t bits_per_item,
286286
typename HashFamily>
287287
Status GcsFilter<ItemType, bits_per_item, HashFamily>::AddAll(
288-
const vector<ItemType> keys, const size_t start, const size_t end) {
288+
const vector<ItemType>& keys, const size_t start, const size_t end) {
289289

290290
int len = end - start;
291291
// this was found experimentally

src/hashutil.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
namespace hashing {
1313
// See Martin Dietzfelbinger, "Universal hashing and k-wise independent random
1414
// variables via integer arithmetic without primes".
15+
/*
1516
class TwoIndependentMultiplyShift {
17+
1618
unsigned __int128 multiply_, add_;
1719
1820
public:
@@ -30,7 +32,9 @@ class TwoIndependentMultiplyShift {
3032
inline uint64_t operator()(uint64_t key) const {
3133
return (add_ + multiply_ * static_cast<decltype(multiply_)>(key)) >> 64;
3234
}
35+
3336
};
37+
*/
3438

3539
class SimpleMixSplit {
3640

src/morton/compressed_cuckoo_filter.h

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,6 @@ Author: Alex D. Breslow
5050
#define INLINE __attribute__((always_inline)) inline
5151
#endif
5252

53-
#define UNROLL __attribute__((optimize("unroll-loops")))
54-
5553
struct Tester; // Forward declaration
5654
std::ostream& operator<<(std::ostream& os, __uint128_t integer);
5755

@@ -780,7 +778,6 @@ namespace CompressedCuckoo{
780778

781779
// Exclusive reduction on counters that are one or more bits
782780
// wide but which fit into a 64-bit word
783-
//UNROLL
784781
INLINE uint16_t exclusive_reduce_with_popcount64(const block_t& b,
785782
uint8_t counter_index) const{
786783
constexpr atom_t one = 1;
@@ -797,7 +794,6 @@ namespace CompressedCuckoo{
797794
// about.
798795
counters &= mask;
799796
atom_t popcount_mask = _popcount_masks[0];
800-
//#pragma unroll
801797
for(uint8_t i = 0; i < _fullness_counter_width; i++){
802798
sum += __builtin_popcountll(counters & popcount_mask) << i;
803799
popcount_mask <<= 1;
@@ -1780,7 +1776,6 @@ namespace CompressedCuckoo{
17801776
}
17811777

17821778
if(_handle_conflicts && conflict_vector.any()){
1783-
//std::cout << "CONFLICT exists!\n";
17841779
for(uint32_t i = 0; i < batch_size; i++){
17851780
statuses[offset + i] = first_level_store(bucket_ids_1[i],
17861781
fingerprints[i], c1[i]);
@@ -1848,7 +1843,6 @@ namespace CompressedCuckoo{
18481843
}
18491844

18501845
if(__builtin_expect(_handle_conflicts && conflict_vector.any(), 0)){
1851-
//std::cout << "CONFLICT exists!\n";
18521846
for(uint32_t i = 0; i < batch_size; i++){
18531847
statuses[offset + i] = first_level_store(bucket_ids[i],
18541848
fingerprints[i], c1[i]);
@@ -2162,7 +2156,7 @@ namespace CompressedCuckoo{
21622156

21632157
// Set OTA bit on overflow if not already set
21642158
inline void set_overflow_status(const hash_t bucket_id,
2165-
const atom_t fingerprint, const hash_t block_id, const hash_t lbi){
2159+
const atom_t fingerprint, const hash_t block_id, const hash_t){
21662160
// Bloom filter
21672161
if(_use_bloom_ota){ // Not yet implemented for selective Morton filter
21682162
return set_bloom_filter_ota(bucket_id, fingerprint, block_id);
@@ -2311,7 +2305,6 @@ namespace CompressedCuckoo{
23112305
} // End of block overflow resolution
23122306
count ++;
23132307
} // End of while loop
2314-
//std::cout << "MAX LOOP COUNT EXCEEDED\n";
23152308
// If you exit the while loop here, it means that the max count has been
23162309
// exceeded.
23172310
return false;

0 commit comments

Comments
 (0)