1818#include < algorithm>
1919#include < new>
2020
21- #include < immintrin .h>
21+ #include < x86intrin .h>
2222
2323#include " hashutil.h"
2424
@@ -39,6 +39,8 @@ inline uint64_t rotl64(uint64_t n, unsigned int c) {
3939 return (n << c) | ( n >> ((-c) & mask));
4040}
4141
42+ #ifdef __AVX2__
43+
4244template <typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
4345class SimdBlockFilterFixed {
4446 private:
@@ -184,7 +186,10 @@ SimdBlockFilterFixed<HashFamily>::Find(const uint64_t key) const noexcept {
184186 return _mm256_testc_si256 (bucket, mask);
185187}
186188
189+ // /////////////////////////////////////////////////////////////////
187190// / 64-byte version
191+ // /////////////////////////////////////////////////////////////////
192+
188193
189194struct mask64bytes {
190195 __m256i first;
@@ -270,7 +275,6 @@ SimdBlockFilterFixed64<HashFamily>::Add(const uint64_t key) noexcept {
270275 mask64bytes_t * const bucket = &reinterpret_cast <mask64bytes_t *>(directory_)[bucket_idx];
271276 bucket->first = _mm256_or_si256 (mask.first , bucket->first );
272277 bucket->second = _mm256_or_si256 (mask.second , bucket->second );
273- assert (Find (key));
274278}
275279
276280template <typename HashFamily>
@@ -282,3 +286,269 @@ SimdBlockFilterFixed64<HashFamily>::Find(const uint64_t key) const noexcept {
282286 const mask64bytes_t bucket = reinterpret_cast <mask64bytes_t *>(directory_)[bucket_idx];
283287 return _mm256_testc_si256 (bucket.first , mask.first ) & _mm256_testc_si256 (bucket.second , mask.second );
284288}
289+
290+ #endif // __AVX2__
291+
292+ // /////////////////
293+ // 32-bit version ARM
294+ // ////////////////
295+ #ifdef __aarch64__
296+
297+ struct mask32bytes {
298+ uint32x4_t first;
299+ uint32x4_t second;
300+ };
301+
302+ typedef struct mask32bytes mask32bytes_t ;
303+
304+
305+ template <typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
306+ class SimdBlockFilterFixed {
307+ private:
308+ // The filter is divided up into Buckets:
309+ using Bucket = mask32bytes_t ;
310+
311+ const int bucketCount;
312+
313+ Bucket* directory_;
314+
315+ HashFamily hasher_;
316+
317+ public:
318+ // Consumes at most (1 << log_heap_space) bytes on the heap:
319+ explicit SimdBlockFilterFixed (const int bits);
320+ ~SimdBlockFilterFixed () noexcept ;
321+ void Add (const uint64_t key) noexcept ;
322+
323+ // Add multiple items to the filter.
324+ void AddAll (const vector<uint64_t > data, const size_t start, const size_t end);
325+
326+ bool Find (const uint64_t key) const noexcept ;
327+ uint64_t SizeInBytes () const { return sizeof (Bucket) * bucketCount; }
328+
329+ private:
330+ // A helper function for Insert()/Find(). Turns a 32-bit hash into a 256-bit Bucket
331+ // with 1 single 1-bit set in each 32-bit lane.
332+ static mask32bytes_t MakeMask (const uint32_t hash) noexcept ;
333+
334+ void ApplyBlock (uint64_t * tmp, int block, int len);
335+
336+ };
337+
338+ template <typename HashFamily>
339+ SimdBlockFilterFixed<HashFamily>::SimdBlockFilterFixed(const int bits)
340+ // bits / 16: fpp 0.1777%, 75.1%
341+ // bits / 20: fpp 0.4384%, 63.4%
342+ // bits / 22: fpp 0.6692%, 61.1%
343+ // bits / 24: fpp 0.9765%, 59.7% <<== seems to be best (1% fpp seems important)
344+ // bits / 26: fpp 1.3769%, 59.3%
345+ // bits / 28: fpp 1.9197%, 60.3%
346+ // bits / 32: fpp 3.3280%, 63.0%
347+ : bucketCount(::std::max(1 , bits / 24 )),
348+ directory_ (nullptr ),
349+ hasher_() {
350+ const size_t alloc_size = bucketCount * sizeof (Bucket);
351+ const int malloc_failed =
352+ posix_memalign (reinterpret_cast <void **>(&directory_), 64 , alloc_size);
353+ if (malloc_failed) throw ::std::bad_alloc ();
354+ memset (directory_, 0 , alloc_size);
355+ }
356+
357+ template <typename HashFamily>
358+ SimdBlockFilterFixed<HashFamily>::~SimdBlockFilterFixed () noexcept {
359+ free (directory_);
360+ directory_ = nullptr ;
361+ }
362+
363+ // The SIMD reinterpret_casts technically violate C++'s strict aliasing rules. However, we
364+ // compile with -fno-strict-aliasing.
365+ template <typename HashFamily>
366+ [[gnu::always_inline]] inline mask32bytes_t
367+ SimdBlockFilterFixed<HashFamily>::MakeMask(const uint32_t hash) noexcept {
368+ const uint32x4_t ones = {1 ,1 ,1 ,1 };
369+ // Odd contants for hashing:
370+ const uint32x4_t rehash1 = {0x47b6137bU , 0x44974d91U , 0x8824ad5bU ,
371+ 0xa2b7289dU };
372+ const uint32x4_t rehash2 = {0x705495c7U , 0x2df1424bU , 0x9efc4947U , 0x5c6bfb31U };
373+ uint32x4_t hash_data = {hash,hash,hash,hash};
374+ // Multiply-shift hashing ala Dietzfelbinger et al.: multiply 'hash' by eight different
375+ // odd constants, then keep the 5 most significant bits from each product.
376+ uint32x4_t part1 = vmulq_u32 (hash_data,rehash1);
377+ uint32x4_t part2 = vmulq_u32 (hash_data,rehash2);
378+ part1 = vshrq_n_u32 (part1, 27 );
379+ part2 = vshrq_n_u32 (part2, 27 );
380+ vshlq_u32 (ones, part1);
381+ vshlq_u32 (ones, part2);
382+ mask32bytes_t answer;
383+ answer.first = part1;
384+ answer.second = part2;
385+ return answer;
386+ }
387+
388+ template <typename HashFamily>
389+ [[gnu::always_inline]] inline void
390+ SimdBlockFilterFixed<HashFamily>::Add(const uint64_t key) noexcept {
391+ const auto hash = hasher_ (key);
392+ const uint32_t bucket_idx = reduce (rotl64 (hash, 32 ), bucketCount);
393+ const mask32bytes_t mask = MakeMask (hash);
394+ mask32bytes_t bucket = directory_[bucket_idx];
395+ bucket.first = vorrq_u32 (mask.first , bucket.first );
396+ bucket.second = vorrq_u32 (mask.second , bucket.second );
397+ directory_[bucket_idx] = bucket;
398+ }
399+
400+ const int blockShift = 14 ;
401+ const int blockLen = 1 << blockShift;
402+
403+ template <typename HashFamily>
404+ void SimdBlockFilterFixed<HashFamily>::ApplyBlock(uint64_t * tmp, int block, int len) {
405+ for (int i = 0 ; i < len; i += 2 ) {
406+ uint64_t hash = tmp[(block << blockShift) + i];
407+ uint32_t bucket_idx = tmp[(block << blockShift) + i + 1 ];
408+ const mask32bytes_t mask = MakeMask (hash);
409+
410+ mask32bytes_t bucket = directory_[bucket_idx];
411+ bucket.first = vorrq_u32 (mask.first , bucket.first );
412+ bucket.second = vorrq_u32 (mask.second , bucket.second );
413+ directory_[bucket_idx] = bucket;
414+ }
415+ }
416+
417+ template <typename HashFamily>
418+ void SimdBlockFilterFixed<HashFamily>::AddAll(
419+ const vector<uint64_t > keys, const size_t start, const size_t end) {
420+ int blocks = 1 + bucketCount / blockLen;
421+ uint64_t * tmp = new uint64_t [blocks * blockLen];
422+ int * tmpLen = new int [blocks]();
423+ for (size_t i = start; i < end; i++) {
424+ uint64_t key = keys[i];
425+ uint64_t hash = hasher_ (key);
426+ uint32_t bucket_idx = reduce (rotl64 (hash, 32 ), bucketCount);
427+ int block = bucket_idx >> blockShift;
428+ int len = tmpLen[block];
429+ tmp[(block << blockShift) + len] = hash;
430+ tmp[(block << blockShift) + len + 1 ] = bucket_idx;
431+ tmpLen[block] = len + 2 ;
432+ if (len + 2 == blockLen) {
433+ ApplyBlock (tmp, block, len + 1 );
434+ tmpLen[block] = 0 ;
435+ }
436+ }
437+ for (int block = 0 ; block < blocks; block++) {
438+ ApplyBlock (tmp, block, tmpLen[block]);
439+ }
440+ delete[] tmp;
441+ delete[] tmpLen;
442+ }
443+
444+ template <typename HashFamily>
445+ [[gnu::always_inline]] inline bool
446+ SimdBlockFilterFixed<HashFamily>::Find(const uint64_t key) const noexcept {
447+ const auto hash = hasher_ (key);
448+ const uint32_t bucket_idx = reduce (rotl64 (hash, 32 ), bucketCount);
449+ const mask32bytes_t mask = MakeMask (hash);
450+ const mask32bytes_t bucket = directory_[bucket_idx];
451+ uint32x4_t an1 = vbicq_u32 (bucket.first ,mask.first );
452+ uint32x4_t an2 = vbicq_u32 (bucket.second ,mask.second );
453+ uint32x4_t an = vorrq_u32 (an1, an2);
454+ uint64x2_t v64 = vreinterpretq_u64_u32 (an);
455+ uint32x2_t v32 = vqmovn_u64 (v64);
456+ uint64x1_t result = vreinterpret_u64_u32 (v32);
457+ return vget_lane_u64 (result, 0 );
458+ }
459+
460+
461+
462+ #endif // __aarch64__
463+
464+
465+ // /////////////////////////////////////////////////////////////////
466+ // / 16-byte version (not very good)
467+ // /////////////////////////////////////////////////////////////////
468+
469+ #ifdef __SSSE3__
470+
471+ template <typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
472+ class SimdBlockFilterFixed16 {
473+ private:
474+ // The filter is divided up into Buckets:
475+ using Bucket = __m128i;
476+
477+ const int bucketCount;
478+
479+ Bucket* directory_;
480+
481+ HashFamily hasher_;
482+
483+ public:
484+ // Consumes at most (1 << log_heap_space) bytes on the heap:
485+ explicit SimdBlockFilterFixed16 (const int bits);
486+ ~SimdBlockFilterFixed16 () noexcept ;
487+ void Add (const uint64_t key) noexcept ;
488+
489+ bool Find (const uint64_t key) const noexcept ;
490+ uint64_t SizeInBytes () const { return sizeof (Bucket) * bucketCount; }
491+
492+ private:
493+ static __m128i MakeMask (const uint64_t hash) noexcept ;
494+
495+
496+ };
497+
498+ template <typename HashFamily>
499+ SimdBlockFilterFixed16<HashFamily>::SimdBlockFilterFixed16(const int bits)
500+
501+ : bucketCount(::std::max(1 , bits / 10 )),
502+ directory_ (nullptr ),
503+ hasher_() {
504+ const size_t alloc_size = bucketCount * sizeof (Bucket);
505+ const int malloc_failed =
506+ posix_memalign (reinterpret_cast <void **>(&directory_), 64 , alloc_size);
507+ if (malloc_failed) throw ::std::bad_alloc ();
508+ memset (directory_, 0 , alloc_size);
509+ }
510+
511+ template <typename HashFamily>
512+ SimdBlockFilterFixed16<HashFamily>::~SimdBlockFilterFixed16 () noexcept {
513+ free (directory_);
514+ directory_ = nullptr ;
515+ }
516+
517+
518+ template <typename HashFamily>
519+ [[gnu::always_inline]] inline __m128i
520+ SimdBlockFilterFixed16<HashFamily>::MakeMask(const uint64_t hash) noexcept {
521+ const __m128i rehash1 = _mm_setr_epi16 (0x47b5 , 0x4497 , 0x8823 ,
522+ 0xa2b7 , 0x7053 , 0x2df1 , 0x9efc , 0x5c6b );
523+ __m128i hash_data = _mm_set1_epi32 (hash );
524+ __m128i h = _mm_mulhi_epi16 (rehash1, hash_data);
525+ return _mm_shuffle_epi8 (_mm_set_epi8 (1 ,2 ,4 ,8 ,16 ,32 ,64 ,-128 ,1 ,2 ,4 ,8 ,16 ,32 ,64 ,-128 ),h);
526+ }
527+
528+
529+
530+
531+ template <typename HashFamily>
532+ [[gnu::always_inline]] inline void
533+ SimdBlockFilterFixed16<HashFamily>::Add(const uint64_t key) noexcept {
534+ const auto hash = hasher_ (key);
535+ const uint32_t bucket_idx = reduce (rotl64 (hash, 32 ), bucketCount);
536+ __m128i mask = MakeMask (hash);
537+ __m128i* const bucket = reinterpret_cast <__m128i*>(directory_) + bucket_idx;
538+ __m128i bucketvalue = _mm_loadu_si128 (bucket);
539+ bucketvalue = _mm_or_si128 (bucketvalue, mask);
540+ _mm_storeu_si128 (bucket,bucketvalue);
541+ }
542+
543+ template <typename HashFamily>
544+ [[gnu::always_inline]] inline bool
545+ SimdBlockFilterFixed16<HashFamily>::Find(const uint64_t key) const noexcept {
546+ const auto hash = hasher_ (key);
547+ const uint32_t bucket_idx = reduce (rotl64 (hash, 32 ), bucketCount);
548+ const __m128i mask = MakeMask (hash);
549+ __m128i* const bucket = reinterpret_cast <__m128i*>(directory_) + bucket_idx;
550+ __m128i bucketvalue = _mm_loadu_si128 (bucket);
551+ return _mm_testc_si128 (bucketvalue,mask);
552+ }
553+
554+ #endif // #ifdef __SSSE3__
0 commit comments