Skip to content

Commit db4baf9

Browse files
committed
Fix for aarch64 (ARM NEON) version.
1 parent ad925f6 commit db4baf9

File tree

3 files changed

+84
-17
lines changed

3 files changed

+84
-17
lines changed

benchmarks/Makefile

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,14 @@
22
OPT = -O3 -DNDEBUG
33
#OPT = -g -ggdb
44

5-
CXXFLAGS += -fno-strict-aliasing -Wall -std=c++11 -I. -I../src/ $(OPT) -march=native
5+
CXXFLAGS += -fno-strict-aliasing -Wall -std=c++11 -I. -I../src/ $(OPT)
6+
7+
UNAME_P := $(shell uname -p)
8+
ifeq ($(UNAME_P),aarch64)
9+
CXXFLAGS +=
10+
else
11+
CXXFLAGS += -march=native
12+
endif
613

714
LDFLAGS = -Wall
815

benchmarks/bulk-insert-and-query.cc

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,10 @@
3232
#include "gcs.h"
3333
#ifdef __AVX2__
3434
#include "gqf_cpp.h"
35+
#include "simd-block.h"
3536
#endif
3637
#include "random.h"
37-
#ifdef __AVX2__
38-
#include "simd-block.h"
3938
#include "simd-block-fixed-fpp.h"
40-
#endif
4139
#include "timing.h"
4240
#ifdef __linux__
4341
#include "linux-perf-events.h"
@@ -169,6 +167,30 @@ struct FilterAPI<CuckooFilterStable<ItemType, bits_per_item, TableType, HashFami
169167
}
170168
};
171169

170+
171+
#ifdef __aarch64__
172+
template <typename HashFamily>
173+
struct FilterAPI<SimdBlockFilterFixed<HashFamily>> {
174+
using Table = SimdBlockFilterFixed<HashFamily>;
175+
static Table ConstructFromAddCount(size_t add_count) {
176+
Table ans(ceil(add_count * 8.0 / CHAR_BIT));
177+
return ans;
178+
}
179+
static void Add(uint64_t key, Table* table) {
180+
table->Add(key);
181+
}
182+
static void AddAll(const vector<uint64_t> keys, const size_t start, const size_t end, Table* table) {
183+
table->AddAll(keys, start, end);
184+
}
185+
186+
CONTAIN_ATTRIBUTES
187+
static bool Contain(uint64_t key, const Table * table) {
188+
return table->Find(key);
189+
}
190+
};
191+
192+
#endif
193+
172194
#ifdef __AVX2__
173195
template <typename HashFamily>
174196
struct FilterAPI<SimdBlockFilter<HashFamily>> {
@@ -695,6 +717,18 @@ int main() {
695717
*/
696718

697719
int main(int argc, char * argv[]) {
720+
#ifdef __aarch64__
721+
std::map<int,std::string> names = {{0,"Xor8"},{1,"Xor12"},
722+
{2,"Xor16"}, {3,"Cuckoo8"}, {4,"Cuckoo12"},
723+
{5,"Cuckoo16"}, {6,"CuckooSemiSort13" }, {7,"Bloom8"},
724+
{8,"Bloom12" }, {9,"Bloom16"}, {10,"BlockedBloom"},
725+
{11,"sort"}, {12,"Xor+8"}, {13,"Xor+16"},
726+
{14,"GCS"}, {22, "Xor10 (NBitArray)"}, {23, "Xor14 (NBitArray)"},
727+
{25, "Xor10"},{26, "Xor10.666"}, {37,"Bloom8 (addall)"},
728+
{38,"Bloom12 (addall)"},
729+
{40,"BlockedBloom (addall)"}
730+
};
731+
#elif defined( __AVX2__)
698732
std::map<int,std::string> names = {{0,"Xor8"},{1,"Xor12"},
699733
{2,"Xor16"}, {3,"Cuckoo8"}, {4,"Cuckoo12"},
700734
{5,"Cuckoo16"}, {6,"CuckooSemiSort13" }, {7,"Bloom8"},
@@ -705,6 +739,18 @@ int main(int argc, char * argv[]) {
705739
{38,"Bloom12 (addall)"},{39,"Bloom16 (addall)"},
706740
{40,"BlockedBloom (addall)"}, {63,"BlockedBloom16"}, {64,"BlockedBloom64"}
707741
};
742+
#else
743+
std::map<int,std::string> names = {{0,"Xor8"},{1,"Xor12"},
744+
{2,"Xor16"}, {3,"Cuckoo8"}, {4,"Cuckoo12"},
745+
{5,"Cuckoo16"}, {6,"CuckooSemiSort13" }, {7,"Bloom8"},
746+
{8,"Bloom12" }, {9,"Bloom16"},
747+
{11,"sort"}, {12,"Xor+8"}, {13,"Xor+16"},
748+
{14,"GCS"}, {22, "Xor10 (NBitArray)"}, {23, "Xor14 (NBitArray)"},
749+
{25, "Xor10"},{26, "Xor10.666"}, {37,"Bloom8 (addall)"},
750+
{38,"Bloom12 (addall)"},{39,"Bloom16 (addall)"}
751+
};
752+
#endif
753+
708754

709755
if (argc < 2) {
710756
cout << "Usage: " << argv[0] << " <numberOfEntries> [<algorithmId> [<seed>]]" << endl;
@@ -736,6 +782,10 @@ int main(int argc, char * argv[]) {
736782
// we have a list of algos
737783
algorithmId = 9999999; // disabling
738784
parse_comma_separated(argv[2], algos);
785+
if(algos.size() == 0) {
786+
cerr<< " no algo selected " << endl;
787+
return -3;
788+
}
739789
} else {
740790
// we select just one
741791
stringstream input_string_2(argv[2]);
@@ -914,6 +964,14 @@ int main(int argc, char * argv[]) {
914964
cout << setw(NAME_WIDTH) << names[9] << cf << endl;
915965
}
916966

967+
#ifdef __aarch64__
968+
if (algorithmId == 10 || algorithmId < 0 || (algos.find(10) != algos.end())) {
969+
auto cf = FilterBenchmark<SimdBlockFilterFixed<>>(
970+
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed);
971+
cout << setw(NAME_WIDTH) << names[10] << cf << endl;
972+
}
973+
#endif
974+
917975
#ifdef __AVX2__
918976
if (algorithmId == 10 || algorithmId < 0 || (algos.find(10) != algos.end())) {
919977
auto cf = FilterBenchmark<SimdBlockFilterFixed<>>(
@@ -1091,8 +1149,13 @@ int main(int argc, char * argv[]) {
10911149
cout << setw(NAME_WIDTH) << names[40] << cf << endl;
10921150
}
10931151
#endif
1094-
1095-
1152+
#ifdef __aarch64__
1153+
if (algorithmId == 40 || algorithmId < 0 || (algos.find(40) != algos.end())) {
1154+
auto cf = FilterBenchmark<SimdBlockFilterFixed<SimpleMixSplit>>(
1155+
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true);
1156+
cout << setw(NAME_WIDTH) << names[40] << cf << endl;
1157+
}
1158+
#endif
10961159

10971160
// broken algorithms (don't always find all key)
10981161
/*

src/simd-block-fixed-fpp.h

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
#include <algorithm>
1919
#include <new>
2020

21-
#include <x86intrin.h>
2221

2322
#include "hashutil.h"
2423

@@ -40,6 +39,7 @@ inline uint64_t rotl64(uint64_t n, unsigned int c) {
4039
}
4140

4241
#ifdef __AVX2__
42+
#include <x86intrin.h>
4343

4444
template<typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
4545
class SimdBlockFilterFixed {
@@ -293,7 +293,7 @@ SimdBlockFilterFixed64<HashFamily>::Find(const uint64_t key) const noexcept {
293293
// 32-bit version ARM
294294
//////////////////
295295
#ifdef __aarch64__
296-
296+
#include <arm_neon.h>
297297
struct mask32bytes {
298298
uint32x4_t first;
299299
uint32x4_t second;
@@ -366,19 +366,16 @@ template <typename HashFamily>
366366
[[gnu::always_inline]] inline mask32bytes_t
367367
SimdBlockFilterFixed<HashFamily>::MakeMask(const uint32_t hash) noexcept {
368368
const uint32x4_t ones = {1,1,1,1};
369-
// Odd contants for hashing:
370369
const uint32x4_t rehash1 = {0x47b6137bU, 0x44974d91U, 0x8824ad5bU,
371370
0xa2b7289dU};
372371
const uint32x4_t rehash2 = {0x705495c7U, 0x2df1424bU, 0x9efc4947U, 0x5c6bfb31U};
373372
uint32x4_t hash_data = {hash,hash,hash,hash};
374-
// Multiply-shift hashing ala Dietzfelbinger et al.: multiply 'hash' by eight different
375-
// odd constants, then keep the 5 most significant bits from each product.
376373
uint32x4_t part1 = vmulq_u32(hash_data,rehash1);
377374
uint32x4_t part2 = vmulq_u32(hash_data,rehash2);
378375
part1 = vshrq_n_u32(part1, 27);
379376
part2 = vshrq_n_u32(part2, 27);
380-
vshlq_u32(ones, part1);
381-
vshlq_u32(ones, part2);
377+
part1 = vshlq_u32(ones, vreinterpretq_s32_u32(part1));
378+
part2 = vshlq_u32(ones, vreinterpretq_s32_u32(part2));
382379
mask32bytes_t answer;
383380
answer.first = part1;
384381
answer.second = part2;
@@ -448,13 +445,13 @@ SimdBlockFilterFixed<HashFamily>::Find(const uint64_t key) const noexcept {
448445
const uint32_t bucket_idx = reduce(rotl64(hash, 32), bucketCount);
449446
const mask32bytes_t mask = MakeMask(hash);
450447
const mask32bytes_t bucket = directory_[bucket_idx];
451-
uint32x4_t an1 = vbicq_u32(bucket.first,mask.first);
452-
uint32x4_t an2 = vbicq_u32(bucket.second,mask.second);
448+
uint32x4_t an1 = vbicq_u32(mask.first, bucket.first);
449+
uint32x4_t an2 = vbicq_u32(mask.second,bucket.second);
453450
uint32x4_t an = vorrq_u32(an1, an2);
454451
uint64x2_t v64 = vreinterpretq_u64_u32(an);
455452
uint32x2_t v32 = vqmovn_u64(v64);
456453
uint64x1_t result = vreinterpret_u64_u32(v32);
457-
return vget_lane_u64(result, 0);
454+
return vget_lane_u64(result, 0) == 0;
458455
}
459456

460457

@@ -551,4 +548,4 @@ SimdBlockFilterFixed16<HashFamily>::Find(const uint64_t key) const noexcept {
551548
return _mm_testc_si128(bucketvalue,mask);
552549
}
553550

554-
#endif // #ifdef __SSSE3__
551+
#endif // #ifdef __SSSE3__

0 commit comments

Comments
 (0)