@@ -290,23 +290,16 @@ SimdBlockFilterFixed64<HashFamily>::Find(const uint64_t key) const noexcept {
290290#endif // __AVX2__
291291
292292// /////////////////
293- // 32-bit version ARM
293+ // 16-byte version ARM
294294// ////////////////
295295#ifdef __aarch64__
296296#include < arm_neon.h>
297- struct mask32bytes {
298- uint32x4_t first;
299- uint32x4_t second;
300- };
301-
302- typedef struct mask32bytes mask32bytes_t ;
303-
304297
305298template <typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
306299class SimdBlockFilterFixed {
307300 private:
308301 // The filter is divided up into Buckets:
309- using Bucket = mask32bytes_t ;
302+ using Bucket = uint16x8_t ;
310303
311304 const int bucketCount;
312305
@@ -337,14 +330,7 @@ class SimdBlockFilterFixed {
337330
338331template <typename HashFamily>
339332SimdBlockFilterFixed<HashFamily>::SimdBlockFilterFixed(const int bits)
340- // bits / 16: fpp 0.1777%, 75.1%
341- // bits / 20: fpp 0.4384%, 63.4%
342- // bits / 22: fpp 0.6692%, 61.1%
343- // bits / 24: fpp 0.9765%, 59.7% <<== seems to be best (1% fpp seems important)
344- // bits / 26: fpp 1.3769%, 59.3%
345- // bits / 28: fpp 1.9197%, 60.3%
346- // bits / 32: fpp 3.3280%, 63.0%
347- : bucketCount(::std::max(1 , bits / 24 )),
333+ : bucketCount(::std::max(1 , bits / 10 )),
348334 directory_ (nullptr ),
349335 hasher_() {
350336 const size_t alloc_size = bucketCount * sizeof (Bucket);
@@ -363,22 +349,16 @@ SimdBlockFilterFixed<HashFamily>::~SimdBlockFilterFixed() noexcept {
363349// The SIMD reinterpret_casts technically violate C++'s strict aliasing rules. However, we
364350// compile with -fno-strict-aliasing.
365351template <typename HashFamily>
366- [[gnu::always_inline]] inline mask32bytes_t
352+ [[gnu::always_inline]] inline uint16x8_t
367353SimdBlockFilterFixed<HashFamily>::MakeMask(const uint32_t hash) noexcept {
368- const uint32x4_t ones = {1 ,1 ,1 ,1 };
369- const uint32x4_t rehash1 = {0x47b6137bU , 0x44974d91U , 0x8824ad5bU ,
370- 0xa2b7289dU };
354+ const uint16x8_t ones = {1 , 1 , 1 , 1 , 1 ,1 ,1 ,1 };
355+ const uint16x8_t rehash = {0x79d8 , 0xe722 , 0xf2fb , 0x21ec , 0x121b , 0x2302 , 0x705a , 0x6e87 }
356+
371357 const uint32x4_t rehash2 = {0x705495c7U , 0x2df1424bU , 0x9efc4947U , 0x5c6bfb31U };
372- uint32x4_t hash_data = {hash,hash,hash,hash};
373- uint32x4_t part1 = vmulq_u32 (hash_data,rehash1);
374- uint32x4_t part2 = vmulq_u32 (hash_data,rehash2);
375- part1 = vshrq_n_u32 (part1, 27 );
376- part2 = vshrq_n_u32 (part2, 27 );
377- part1 = vshlq_u32 (ones, vreinterpretq_s32_u32 (part1));
378- part2 = vshlq_u32 (ones, vreinterpretq_s32_u32 (part2));
379- mask32bytes_t answer;
380- answer.first = part1;
381- answer.second = part2;
358+ uint16x8_t hash_data = {hash,hash,hash,hash,hash,hash,hash,hash};
359+ uint16x8_t answer = vmulq_u16 (hash_data,rehash);
360+ answer = vshrq_n_u16 (answer, 12 );
361+ answer = vshlq_u16 (ones, vreinterpretq_s16_u16 (answer));
382362 return answer;
383363}
384364
@@ -387,68 +367,20 @@ template <typename HashFamily>
387367SimdBlockFilterFixed<HashFamily>::Add(const uint64_t key) noexcept {
388368 const auto hash = hasher_ (key);
389369 const uint32_t bucket_idx = reduce (rotl64 (hash, 32 ), bucketCount);
390- const mask32bytes_t mask = MakeMask (hash);
391- mask32bytes_t bucket = directory_[bucket_idx];
392- bucket.first = vorrq_u32 (mask.first , bucket.first );
393- bucket.second = vorrq_u32 (mask.second , bucket.second );
394- directory_[bucket_idx] = bucket;
395- }
396-
397- const int blockShift = 14 ;
398- const int blockLen = 1 << blockShift;
399-
400- template <typename HashFamily>
401- void SimdBlockFilterFixed<HashFamily>::ApplyBlock(uint64_t * tmp, int block, int len) {
402- for (int i = 0 ; i < len; i += 2 ) {
403- uint64_t hash = tmp[(block << blockShift) + i];
404- uint32_t bucket_idx = tmp[(block << blockShift) + i + 1 ];
405- const mask32bytes_t mask = MakeMask (hash);
406-
407- mask32bytes_t bucket = directory_[bucket_idx];
408- bucket.first = vorrq_u32 (mask.first , bucket.first );
409- bucket.second = vorrq_u32 (mask.second , bucket.second );
410- directory_[bucket_idx] = bucket;
411- }
412- }
413-
414- template <typename HashFamily>
415- void SimdBlockFilterFixed<HashFamily>::AddAll(
416- const vector<uint64_t > keys, const size_t start, const size_t end) {
417- int blocks = 1 + bucketCount / blockLen;
418- uint64_t * tmp = new uint64_t [blocks * blockLen];
419- int * tmpLen = new int [blocks]();
420- for (size_t i = start; i < end; i++) {
421- uint64_t key = keys[i];
422- uint64_t hash = hasher_ (key);
423- uint32_t bucket_idx = reduce (rotl64 (hash, 32 ), bucketCount);
424- int block = bucket_idx >> blockShift;
425- int len = tmpLen[block];
426- tmp[(block << blockShift) + len] = hash;
427- tmp[(block << blockShift) + len + 1 ] = bucket_idx;
428- tmpLen[block] = len + 2 ;
429- if (len + 2 == blockLen) {
430- ApplyBlock (tmp, block, len + 1 );
431- tmpLen[block] = 0 ;
432- }
433- }
434- for (int block = 0 ; block < blocks; block++) {
435- ApplyBlock (tmp, block, tmpLen[block]);
436- }
437- delete[] tmp;
438- delete[] tmpLen;
370+ const uint16x8_t mask = MakeMask (hash);
371+ uint16x8_t bucket = directory_[bucket_idx];
372+ directory_[bucket_idx] = vorrq_u32 (mask, bucket);
439373}
440374
441375template <typename HashFamily>
442376[[gnu::always_inline]] inline bool
443377SimdBlockFilterFixed<HashFamily>::Find(const uint64_t key) const noexcept {
444378 const auto hash = hasher_ (key);
445379 const uint32_t bucket_idx = reduce (rotl64 (hash, 32 ), bucketCount);
446- const mask32bytes_t mask = MakeMask (hash);
447- const mask32bytes_t bucket = directory_[bucket_idx];
448- uint32x4_t an1 = vbicq_u32 (mask.first , bucket.first );
449- uint32x4_t an2 = vbicq_u32 (mask.second ,bucket.second );
450- uint32x4_t an = vorrq_u32 (an1, an2);
451- uint64x2_t v64 = vreinterpretq_u64_u32 (an);
380+ const uint16x8_t mask = MakeMask (hash);
381+ const uint16x8_t bucket = directory_[bucket_idx];
382+ uint16x8_t an = vbicq_u16 (mask, bucket);
383+ uint64x2_t v64 = vreinterpretq_u64_u16 (an);
452384 uint32x2_t v32 = vqmovn_u64 (v64);
453385 uint64x1_t result = vreinterpret_u64_u32 (v32);
454386 return vget_lane_u64 (result, 0 ) == 0 ;
0 commit comments