Skip to content

Commit ad925f6

Browse files
committed
adding 16-byte vectorized bloom version (not very good).
1 parent 76c35e2 commit ad925f6

File tree

2 files changed

+302
-3
lines changed

2 files changed

+302
-3
lines changed

benchmarks/bulk-insert-and-query.cc

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,26 @@ struct FilterAPI<SimdBlockFilterFixed64<HashFamily>> {
211211
};
212212

213213

214+
template <typename HashFamily>
215+
struct FilterAPI<SimdBlockFilterFixed16<HashFamily>> {
216+
using Table = SimdBlockFilterFixed16<HashFamily>;
217+
static Table ConstructFromAddCount(size_t add_count) {
218+
Table ans(ceil(add_count * 8.0 / CHAR_BIT));
219+
return ans;
220+
}
221+
static void Add(uint64_t key, Table* table) {
222+
table->Add(key);
223+
}
224+
static void AddAll(const vector<uint64_t> keys, const size_t start, const size_t end, Table* table) {
225+
throw std::runtime_error("Unsupported");
226+
}
227+
228+
CONTAIN_ATTRIBUTES
229+
static bool Contain(uint64_t key, const Table * table) {
230+
return table->Find(key);
231+
}
232+
};
233+
214234
template <typename HashFamily>
215235
struct FilterAPI<SimdBlockFilterFixed<HashFamily>> {
216236
using Table = SimdBlockFilterFixed<HashFamily>;
@@ -683,7 +703,7 @@ int main(int argc, char * argv[]) {
683703
{14,"GCS"}, {15,"CQF"}, {22, "Xor10 (NBitArray)"}, {23, "Xor14 (NBitArray)"},
684704
{25, "Xor10"},{26, "Xor10.666"}, {37,"Bloom8 (addall)"},
685705
{38,"Bloom12 (addall)"},{39,"Bloom16 (addall)"},
686-
{40,"BlockedBloom (addall)"}, {64,"BlockedBloom64"}
706+
{40,"BlockedBloom (addall)"}, {63,"BlockedBloom16"}, {64,"BlockedBloom64"}
687707
};
688708

689709
if (argc < 2) {
@@ -905,6 +925,15 @@ int main(int argc, char * argv[]) {
905925
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed);
906926
cout << setw(NAME_WIDTH) << names[64] << cf << endl;
907927
}
928+
929+
#endif
930+
#ifdef __SSSE3__
931+
932+
if (algorithmId == 63 || algorithmId < 0 || (algos.find(63) != algos.end())) {
933+
auto cf = FilterBenchmark<SimdBlockFilterFixed16<SimpleMixSplit>>(
934+
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed);
935+
cout << setw(NAME_WIDTH) << names[63] << cf << endl;
936+
}
908937
#endif
909938

910939
if (algorithmId == 11 || algorithmId < 0 || (algos.find(11) != algos.end())) {

src/simd-block-fixed-fpp.h

Lines changed: 272 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
#include <algorithm>
1919
#include <new>
2020

21-
#include <immintrin.h>
21+
#include <x86intrin.h>
2222

2323
#include "hashutil.h"
2424

@@ -39,6 +39,8 @@ inline uint64_t rotl64(uint64_t n, unsigned int c) {
3939
return (n << c) | ( n >> ((-c) & mask));
4040
}
4141

42+
#ifdef __AVX2__
43+
4244
template<typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
4345
class SimdBlockFilterFixed {
4446
private:
@@ -184,7 +186,10 @@ SimdBlockFilterFixed<HashFamily>::Find(const uint64_t key) const noexcept {
184186
return _mm256_testc_si256(bucket, mask);
185187
}
186188

189+
///////////////////////////////////////////////////////////////////
187190
/// 64-byte version
191+
///////////////////////////////////////////////////////////////////
192+
188193

189194
struct mask64bytes {
190195
__m256i first;
@@ -270,7 +275,6 @@ SimdBlockFilterFixed64<HashFamily>::Add(const uint64_t key) noexcept {
270275
mask64bytes_t* const bucket = &reinterpret_cast<mask64bytes_t*>(directory_)[bucket_idx];
271276
bucket->first = _mm256_or_si256(mask.first, bucket->first);
272277
bucket->second= _mm256_or_si256(mask.second, bucket->second);
273-
assert(Find(key));
274278
}
275279

276280
template <typename HashFamily>
@@ -282,3 +286,269 @@ SimdBlockFilterFixed64<HashFamily>::Find(const uint64_t key) const noexcept {
282286
const mask64bytes_t bucket = reinterpret_cast<mask64bytes_t*>(directory_)[bucket_idx];
283287
return _mm256_testc_si256(bucket.first, mask.first) & _mm256_testc_si256(bucket.second, mask.second);
284288
}
289+
290+
#endif //__AVX2__
291+
292+
///////////////////
293+
// 32-bit version ARM
294+
//////////////////
295+
#ifdef __aarch64__
296+
297+
struct mask32bytes {
298+
uint32x4_t first;
299+
uint32x4_t second;
300+
};
301+
302+
typedef struct mask32bytes mask32bytes_t;
303+
304+
305+
template<typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
306+
class SimdBlockFilterFixed {
307+
private:
308+
// The filter is divided up into Buckets:
309+
using Bucket = mask32bytes_t;
310+
311+
const int bucketCount;
312+
313+
Bucket* directory_;
314+
315+
HashFamily hasher_;
316+
317+
public:
318+
// Consumes at most (1 << log_heap_space) bytes on the heap:
319+
explicit SimdBlockFilterFixed(const int bits);
320+
~SimdBlockFilterFixed() noexcept;
321+
void Add(const uint64_t key) noexcept;
322+
323+
// Add multiple items to the filter.
324+
void AddAll(const vector<uint64_t> data, const size_t start, const size_t end);
325+
326+
bool Find(const uint64_t key) const noexcept;
327+
uint64_t SizeInBytes() const { return sizeof(Bucket) * bucketCount; }
328+
329+
private:
330+
// A helper function for Insert()/Find(). Turns a 32-bit hash into a 256-bit Bucket
331+
// with 1 single 1-bit set in each 32-bit lane.
332+
static mask32bytes_t MakeMask(const uint32_t hash) noexcept;
333+
334+
void ApplyBlock(uint64_t* tmp, int block, int len);
335+
336+
};
337+
338+
template<typename HashFamily>
339+
SimdBlockFilterFixed<HashFamily>::SimdBlockFilterFixed(const int bits)
340+
// bits / 16: fpp 0.1777%, 75.1%
341+
// bits / 20: fpp 0.4384%, 63.4%
342+
// bits / 22: fpp 0.6692%, 61.1%
343+
// bits / 24: fpp 0.9765%, 59.7% <<== seems to be best (1% fpp seems important)
344+
// bits / 26: fpp 1.3769%, 59.3%
345+
// bits / 28: fpp 1.9197%, 60.3%
346+
// bits / 32: fpp 3.3280%, 63.0%
347+
: bucketCount(::std::max(1, bits / 24)),
348+
directory_(nullptr),
349+
hasher_() {
350+
const size_t alloc_size = bucketCount * sizeof(Bucket);
351+
const int malloc_failed =
352+
posix_memalign(reinterpret_cast<void**>(&directory_), 64, alloc_size);
353+
if (malloc_failed) throw ::std::bad_alloc();
354+
memset(directory_, 0, alloc_size);
355+
}
356+
357+
template<typename HashFamily>
358+
SimdBlockFilterFixed<HashFamily>::~SimdBlockFilterFixed() noexcept {
359+
free(directory_);
360+
directory_ = nullptr;
361+
}
362+
363+
// The SIMD reinterpret_casts technically violate C++'s strict aliasing rules. However, we
364+
// compile with -fno-strict-aliasing.
365+
template <typename HashFamily>
366+
[[gnu::always_inline]] inline mask32bytes_t
367+
SimdBlockFilterFixed<HashFamily>::MakeMask(const uint32_t hash) noexcept {
368+
const uint32x4_t ones = {1,1,1,1};
369+
// Odd contants for hashing:
370+
const uint32x4_t rehash1 = {0x47b6137bU, 0x44974d91U, 0x8824ad5bU,
371+
0xa2b7289dU};
372+
const uint32x4_t rehash2 = {0x705495c7U, 0x2df1424bU, 0x9efc4947U, 0x5c6bfb31U};
373+
uint32x4_t hash_data = {hash,hash,hash,hash};
374+
// Multiply-shift hashing ala Dietzfelbinger et al.: multiply 'hash' by eight different
375+
// odd constants, then keep the 5 most significant bits from each product.
376+
uint32x4_t part1 = vmulq_u32(hash_data,rehash1);
377+
uint32x4_t part2 = vmulq_u32(hash_data,rehash2);
378+
part1 = vshrq_n_u32(part1, 27);
379+
part2 = vshrq_n_u32(part2, 27);
380+
vshlq_u32(ones, part1);
381+
vshlq_u32(ones, part2);
382+
mask32bytes_t answer;
383+
answer.first = part1;
384+
answer.second = part2;
385+
return answer;
386+
}
387+
388+
template <typename HashFamily>
389+
[[gnu::always_inline]] inline void
390+
SimdBlockFilterFixed<HashFamily>::Add(const uint64_t key) noexcept {
391+
const auto hash = hasher_(key);
392+
const uint32_t bucket_idx = reduce(rotl64(hash, 32), bucketCount);
393+
const mask32bytes_t mask = MakeMask(hash);
394+
mask32bytes_t bucket = directory_[bucket_idx];
395+
bucket.first = vorrq_u32(mask.first, bucket.first);
396+
bucket.second = vorrq_u32(mask.second, bucket.second);
397+
directory_[bucket_idx] = bucket;
398+
}
399+
400+
const int blockShift = 14;
401+
const int blockLen = 1 << blockShift;
402+
403+
template<typename HashFamily>
404+
void SimdBlockFilterFixed<HashFamily>::ApplyBlock(uint64_t* tmp, int block, int len) {
405+
for (int i = 0; i < len; i += 2) {
406+
uint64_t hash = tmp[(block << blockShift) + i];
407+
uint32_t bucket_idx = tmp[(block << blockShift) + i + 1];
408+
const mask32bytes_t mask = MakeMask(hash);
409+
410+
mask32bytes_t bucket = directory_[bucket_idx];
411+
bucket.first = vorrq_u32(mask.first, bucket.first);
412+
bucket.second = vorrq_u32(mask.second, bucket.second);
413+
directory_[bucket_idx] = bucket;
414+
}
415+
}
416+
417+
template<typename HashFamily>
418+
void SimdBlockFilterFixed<HashFamily>::AddAll(
419+
const vector<uint64_t> keys, const size_t start, const size_t end) {
420+
int blocks = 1 + bucketCount / blockLen;
421+
uint64_t* tmp = new uint64_t[blocks * blockLen];
422+
int* tmpLen = new int[blocks]();
423+
for(size_t i = start; i < end; i++) {
424+
uint64_t key = keys[i];
425+
uint64_t hash = hasher_(key);
426+
uint32_t bucket_idx = reduce(rotl64(hash, 32), bucketCount);
427+
int block = bucket_idx >> blockShift;
428+
int len = tmpLen[block];
429+
tmp[(block << blockShift) + len] = hash;
430+
tmp[(block << blockShift) + len + 1] = bucket_idx;
431+
tmpLen[block] = len + 2;
432+
if (len + 2 == blockLen) {
433+
ApplyBlock(tmp, block, len + 1);
434+
tmpLen[block] = 0;
435+
}
436+
}
437+
for (int block = 0; block < blocks; block++) {
438+
ApplyBlock(tmp, block, tmpLen[block]);
439+
}
440+
delete[] tmp;
441+
delete[] tmpLen;
442+
}
443+
444+
template <typename HashFamily>
445+
[[gnu::always_inline]] inline bool
446+
SimdBlockFilterFixed<HashFamily>::Find(const uint64_t key) const noexcept {
447+
const auto hash = hasher_(key);
448+
const uint32_t bucket_idx = reduce(rotl64(hash, 32), bucketCount);
449+
const mask32bytes_t mask = MakeMask(hash);
450+
const mask32bytes_t bucket = directory_[bucket_idx];
451+
uint32x4_t an1 = vbicq_u32(bucket.first,mask.first);
452+
uint32x4_t an2 = vbicq_u32(bucket.second,mask.second);
453+
uint32x4_t an = vorrq_u32(an1, an2);
454+
uint64x2_t v64 = vreinterpretq_u64_u32(an);
455+
uint32x2_t v32 = vqmovn_u64(v64);
456+
uint64x1_t result = vreinterpret_u64_u32(v32);
457+
return vget_lane_u64(result, 0);
458+
}
459+
460+
461+
462+
#endif // __aarch64__
463+
464+
465+
///////////////////////////////////////////////////////////////////
466+
/// 16-byte version (not very good)
467+
///////////////////////////////////////////////////////////////////
468+
469+
#ifdef __SSSE3__
470+
471+
template<typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
472+
class SimdBlockFilterFixed16 {
473+
private:
474+
// The filter is divided up into Buckets:
475+
using Bucket = __m128i;
476+
477+
const int bucketCount;
478+
479+
Bucket* directory_;
480+
481+
HashFamily hasher_;
482+
483+
public:
484+
// Consumes at most (1 << log_heap_space) bytes on the heap:
485+
explicit SimdBlockFilterFixed16(const int bits);
486+
~SimdBlockFilterFixed16() noexcept;
487+
void Add(const uint64_t key) noexcept;
488+
489+
bool Find(const uint64_t key) const noexcept;
490+
uint64_t SizeInBytes() const { return sizeof(Bucket) * bucketCount; }
491+
492+
private:
493+
static __m128i MakeMask(const uint64_t hash) noexcept;
494+
495+
496+
};
497+
498+
template<typename HashFamily>
499+
SimdBlockFilterFixed16<HashFamily>::SimdBlockFilterFixed16(const int bits)
500+
501+
: bucketCount(::std::max(1, bits / 10)),
502+
directory_(nullptr),
503+
hasher_() {
504+
const size_t alloc_size = bucketCount * sizeof(Bucket);
505+
const int malloc_failed =
506+
posix_memalign(reinterpret_cast<void**>(&directory_), 64, alloc_size);
507+
if (malloc_failed) throw ::std::bad_alloc();
508+
memset(directory_, 0, alloc_size);
509+
}
510+
511+
template<typename HashFamily>
512+
SimdBlockFilterFixed16<HashFamily>::~SimdBlockFilterFixed16() noexcept {
513+
free(directory_);
514+
directory_ = nullptr;
515+
}
516+
517+
518+
template <typename HashFamily>
519+
[[gnu::always_inline]] inline __m128i
520+
SimdBlockFilterFixed16<HashFamily>::MakeMask(const uint64_t hash) noexcept {
521+
const __m128i rehash1 = _mm_setr_epi16(0x47b5, 0x4497, 0x8823,
522+
0xa2b7, 0x7053, 0x2df1, 0x9efc, 0x5c6b);
523+
__m128i hash_data = _mm_set1_epi32(hash );
524+
__m128i h = _mm_mulhi_epi16(rehash1, hash_data);
525+
return _mm_shuffle_epi8(_mm_set_epi8(1,2,4,8,16,32,64,-128,1,2,4,8,16,32,64,-128),h);
526+
}
527+
528+
529+
530+
531+
template <typename HashFamily>
532+
[[gnu::always_inline]] inline void
533+
SimdBlockFilterFixed16<HashFamily>::Add(const uint64_t key) noexcept {
534+
const auto hash = hasher_(key);
535+
const uint32_t bucket_idx = reduce(rotl64(hash, 32), bucketCount);
536+
__m128i mask = MakeMask(hash);
537+
__m128i* const bucket = reinterpret_cast<__m128i*>(directory_) + bucket_idx;
538+
__m128i bucketvalue = _mm_loadu_si128(bucket);
539+
bucketvalue = _mm_or_si128(bucketvalue, mask);
540+
_mm_storeu_si128(bucket,bucketvalue);
541+
}
542+
543+
template <typename HashFamily>
544+
[[gnu::always_inline]] inline bool
545+
SimdBlockFilterFixed16<HashFamily>::Find(const uint64_t key) const noexcept {
546+
const auto hash = hasher_(key);
547+
const uint32_t bucket_idx = reduce(rotl64(hash, 32), bucketCount);
548+
const __m128i mask = MakeMask(hash);
549+
__m128i* const bucket = reinterpret_cast<__m128i*>(directory_) + bucket_idx;
550+
__m128i bucketvalue = _mm_loadu_si128(bucket);
551+
return _mm_testc_si128(bucketvalue,mask);
552+
}
553+
554+
#endif // #ifdef __SSSE3__

0 commit comments

Comments
 (0)