Skip to content

Commit a25abb3

Browse files
committed
Experimenting with the ARM block bloom version.
1 parent 41f59c5 commit a25abb3

File tree

1 file changed

+18
-86
lines changed

1 file changed

+18
-86
lines changed

src/simd-block-fixed-fpp.h

Lines changed: 18 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -290,23 +290,16 @@ SimdBlockFilterFixed64<HashFamily>::Find(const uint64_t key) const noexcept {
290290
#endif //__AVX2__
291291

292292
///////////////////
293-
// 32-bit version ARM
293+
// 16-byte version ARM
294294
//////////////////
295295
#ifdef __aarch64__
296296
#include <arm_neon.h>
297-
struct mask32bytes {
298-
uint32x4_t first;
299-
uint32x4_t second;
300-
};
301-
302-
typedef struct mask32bytes mask32bytes_t;
303-
304297

305298
template<typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
306299
class SimdBlockFilterFixed {
307300
private:
308301
// The filter is divided up into Buckets:
309-
using Bucket = mask32bytes_t;
302+
using Bucket = uint16x8_t;
310303

311304
const int bucketCount;
312305

@@ -337,14 +330,7 @@ class SimdBlockFilterFixed {
337330

338331
template<typename HashFamily>
339332
SimdBlockFilterFixed<HashFamily>::SimdBlockFilterFixed(const int bits)
340-
// bits / 16: fpp 0.1777%, 75.1%
341-
// bits / 20: fpp 0.4384%, 63.4%
342-
// bits / 22: fpp 0.6692%, 61.1%
343-
// bits / 24: fpp 0.9765%, 59.7% <<== seems to be best (1% fpp seems important)
344-
// bits / 26: fpp 1.3769%, 59.3%
345-
// bits / 28: fpp 1.9197%, 60.3%
346-
// bits / 32: fpp 3.3280%, 63.0%
347-
: bucketCount(::std::max(1, bits / 24)),
333+
: bucketCount(::std::max(1, bits / 10)),
348334
directory_(nullptr),
349335
hasher_() {
350336
const size_t alloc_size = bucketCount * sizeof(Bucket);
@@ -363,22 +349,16 @@ SimdBlockFilterFixed<HashFamily>::~SimdBlockFilterFixed() noexcept {
363349
// The SIMD reinterpret_casts technically violate C++'s strict aliasing rules. However, we
364350
// compile with -fno-strict-aliasing.
365351
template <typename HashFamily>
366-
[[gnu::always_inline]] inline mask32bytes_t
352+
[[gnu::always_inline]] inline uint16x8_t
367353
SimdBlockFilterFixed<HashFamily>::MakeMask(const uint32_t hash) noexcept {
368-
const uint32x4_t ones = {1,1,1,1};
369-
const uint32x4_t rehash1 = {0x47b6137bU, 0x44974d91U, 0x8824ad5bU,
370-
0xa2b7289dU};
354+
const uint16x8_t ones = {1,1,1,1,1,1,1,1};
355+
const uint16x8_t rehash = {0x79d8, 0xe722, 0xf2fb, 0x21ec, 0x121b, 0x2302, 0x705a, 0x6e87}
356+
371357
const uint32x4_t rehash2 = {0x705495c7U, 0x2df1424bU, 0x9efc4947U, 0x5c6bfb31U};
372-
uint32x4_t hash_data = {hash,hash,hash,hash};
373-
uint32x4_t part1 = vmulq_u32(hash_data,rehash1);
374-
uint32x4_t part2 = vmulq_u32(hash_data,rehash2);
375-
part1 = vshrq_n_u32(part1, 27);
376-
part2 = vshrq_n_u32(part2, 27);
377-
part1 = vshlq_u32(ones, vreinterpretq_s32_u32(part1));
378-
part2 = vshlq_u32(ones, vreinterpretq_s32_u32(part2));
379-
mask32bytes_t answer;
380-
answer.first = part1;
381-
answer.second = part2;
358+
uint16x8_t hash_data = {hash,hash,hash,hash,hash,hash,hash,hash};
359+
uint16x8_t answer = vmulq_u16(hash_data,rehash);
360+
answer = vshrq_n_u16(answer, 12);
361+
answer = vshlq_u16(ones, vreinterpretq_s16_u16(answer));
382362
return answer;
383363
}
384364

@@ -387,68 +367,20 @@ template <typename HashFamily>
387367
SimdBlockFilterFixed<HashFamily>::Add(const uint64_t key) noexcept {
388368
const auto hash = hasher_(key);
389369
const uint32_t bucket_idx = reduce(rotl64(hash, 32), bucketCount);
390-
const mask32bytes_t mask = MakeMask(hash);
391-
mask32bytes_t bucket = directory_[bucket_idx];
392-
bucket.first = vorrq_u32(mask.first, bucket.first);
393-
bucket.second = vorrq_u32(mask.second, bucket.second);
394-
directory_[bucket_idx] = bucket;
395-
}
396-
397-
const int blockShift = 14;
398-
const int blockLen = 1 << blockShift;
399-
400-
template<typename HashFamily>
401-
void SimdBlockFilterFixed<HashFamily>::ApplyBlock(uint64_t* tmp, int block, int len) {
402-
for (int i = 0; i < len; i += 2) {
403-
uint64_t hash = tmp[(block << blockShift) + i];
404-
uint32_t bucket_idx = tmp[(block << blockShift) + i + 1];
405-
const mask32bytes_t mask = MakeMask(hash);
406-
407-
mask32bytes_t bucket = directory_[bucket_idx];
408-
bucket.first = vorrq_u32(mask.first, bucket.first);
409-
bucket.second = vorrq_u32(mask.second, bucket.second);
410-
directory_[bucket_idx] = bucket;
411-
}
412-
}
413-
414-
template<typename HashFamily>
415-
void SimdBlockFilterFixed<HashFamily>::AddAll(
416-
const vector<uint64_t> keys, const size_t start, const size_t end) {
417-
int blocks = 1 + bucketCount / blockLen;
418-
uint64_t* tmp = new uint64_t[blocks * blockLen];
419-
int* tmpLen = new int[blocks]();
420-
for(size_t i = start; i < end; i++) {
421-
uint64_t key = keys[i];
422-
uint64_t hash = hasher_(key);
423-
uint32_t bucket_idx = reduce(rotl64(hash, 32), bucketCount);
424-
int block = bucket_idx >> blockShift;
425-
int len = tmpLen[block];
426-
tmp[(block << blockShift) + len] = hash;
427-
tmp[(block << blockShift) + len + 1] = bucket_idx;
428-
tmpLen[block] = len + 2;
429-
if (len + 2 == blockLen) {
430-
ApplyBlock(tmp, block, len + 1);
431-
tmpLen[block] = 0;
432-
}
433-
}
434-
for (int block = 0; block < blocks; block++) {
435-
ApplyBlock(tmp, block, tmpLen[block]);
436-
}
437-
delete[] tmp;
438-
delete[] tmpLen;
370+
const uint16x8_t mask = MakeMask(hash);
371+
uint16x8_t bucket = directory_[bucket_idx];
372+
directory_[bucket_idx] = vorrq_u32(mask, bucket);
439373
}
440374

441375
template <typename HashFamily>
442376
[[gnu::always_inline]] inline bool
443377
SimdBlockFilterFixed<HashFamily>::Find(const uint64_t key) const noexcept {
444378
const auto hash = hasher_(key);
445379
const uint32_t bucket_idx = reduce(rotl64(hash, 32), bucketCount);
446-
const mask32bytes_t mask = MakeMask(hash);
447-
const mask32bytes_t bucket = directory_[bucket_idx];
448-
uint32x4_t an1 = vbicq_u32(mask.first, bucket.first);
449-
uint32x4_t an2 = vbicq_u32(mask.second,bucket.second);
450-
uint32x4_t an = vorrq_u32(an1, an2);
451-
uint64x2_t v64 = vreinterpretq_u64_u32(an);
380+
const uint16x8_t mask = MakeMask(hash);
381+
const uint16x8_t bucket = directory_[bucket_idx];
382+
uint16x8_t an = vbicq_u16(mask, bucket);
383+
uint64x2_t v64 = vreinterpretq_u64_u16(an);
452384
uint32x2_t v32 = vqmovn_u64(v64);
453385
uint64x1_t result = vreinterpret_u64_u32(v32);
454386
return vget_lane_u64(result, 0) == 0;

0 commit comments

Comments
 (0)