Skip to content

Commit 79aae0a

Browse files
Merge pull request #5 from lemire/master
64-byte Blocked approach has *slightly* better space usage.
2 parents dbc3007 + fedb8f1 commit 79aae0a

File tree

5 files changed

+126
-243
lines changed

5 files changed

+126
-243
lines changed

benchmarks/bulk-insert-and-query.cc

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,27 @@ struct FilterAPI<SimdBlockFilter<HashFamily>> {
190190
}
191191
};
192192

193+
template <typename HashFamily>
194+
struct FilterAPI<SimdBlockFilterFixed64<HashFamily>> {
195+
using Table = SimdBlockFilterFixed64<HashFamily>;
196+
static Table ConstructFromAddCount(size_t add_count) {
197+
Table ans(ceil(add_count * 8.0 / CHAR_BIT));
198+
return ans;
199+
}
200+
static void Add(uint64_t key, Table* table) {
201+
table->Add(key);
202+
}
203+
static void AddAll(const vector<uint64_t> keys, const size_t start, const size_t end, Table* table) {
204+
throw std::runtime_error("Unsupported");
205+
}
206+
207+
CONTAIN_ATTRIBUTES
208+
static bool Contain(uint64_t key, const Table * table) {
209+
return table->Find(key);
210+
}
211+
};
212+
213+
193214
template <typename HashFamily>
194215
struct FilterAPI<SimdBlockFilterFixed<HashFamily>> {
195216
using Table = SimdBlockFilterFixed<HashFamily>;
@@ -209,7 +230,6 @@ struct FilterAPI<SimdBlockFilterFixed<HashFamily>> {
209230
return table->Find(key);
210231
}
211232
};
212-
213233
#endif
214234

215235
template <typename ItemType, typename FingerprintType>
@@ -663,7 +683,7 @@ int main(int argc, char * argv[]) {
663683
{14,"GCS"}, {15,"CQF"}, {22, "Xor10 (NBitArray)"}, {23, "Xor14 (NBitArray)"},
664684
{25, "Xor10"},{26, "Xor10.666"}, {37,"Bloom8 (addall)"},
665685
{38,"Bloom12 (addall)"},{39,"Bloom16 (addall)"},
666-
{40,"BlockedBloom (addall)"}
686+
{40,"BlockedBloom (addall)"}, {64,"BlockedBloom64"}
667687
};
668688

669689
if (argc < 2) {
@@ -876,10 +896,15 @@ int main(int argc, char * argv[]) {
876896

877897
#ifdef __AVX2__
878898
if (algorithmId == 10 || algorithmId < 0 || (algos.find(10) != algos.end())) {
879-
auto cf = FilterBenchmark<SimdBlockFilterFixed<SimpleMixSplit>>(
899+
auto cf = FilterBenchmark<SimdBlockFilterFixed<>>(
880900
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed);
881901
cout << setw(NAME_WIDTH) << names[10] << cf << endl;
882902
}
903+
if (algorithmId == 64 || algorithmId < 0 || (algos.find(64) != algos.end())) {
904+
auto cf = FilterBenchmark<SimdBlockFilterFixed64<SimpleMixSplit>>(
905+
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed);
906+
cout << setw(NAME_WIDTH) << names[64] << cf << endl;
907+
}
883908
#endif
884909

885910
if (algorithmId == 11 || algorithmId < 0 || (algos.find(11) != algos.end())) {

src/simd-block-fixed-fpp.h

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,3 +184,101 @@ SimdBlockFilterFixed<HashFamily>::Find(const uint64_t key) const noexcept {
184184
return _mm256_testc_si256(bucket, mask);
185185
}
186186

187+
/// 64-byte version
188+
189+
struct mask64bytes {
190+
__m256i first;
191+
__m256i second;
192+
};
193+
194+
typedef struct mask64bytes mask64bytes_t;
195+
196+
template<typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
197+
class SimdBlockFilterFixed64 {
198+
private:
199+
// The filter is divided up into Buckets:
200+
using Bucket = mask64bytes_t;
201+
202+
const int bucketCount;
203+
204+
Bucket* directory_;
205+
206+
HashFamily hasher_;
207+
208+
public:
209+
// Consumes at most (1 << log_heap_space) bytes on the heap:
210+
explicit SimdBlockFilterFixed64(const int bits);
211+
~SimdBlockFilterFixed64() noexcept;
212+
void Add(const uint64_t key) noexcept;
213+
214+
bool Find(const uint64_t key) const noexcept;
215+
uint64_t SizeInBytes() const { return sizeof(Bucket) * bucketCount; }
216+
217+
private:
218+
static mask64bytes_t MakeMask(const uint64_t hash) noexcept;
219+
220+
221+
};
222+
223+
template<typename HashFamily>
224+
SimdBlockFilterFixed64<HashFamily>::SimdBlockFilterFixed64(const int bits)
225+
226+
: bucketCount(::std::max(1, bits / 50)),
227+
directory_(nullptr),
228+
hasher_() {
229+
if (!__builtin_cpu_supports("avx2")) {
230+
throw ::std::runtime_error("SimdBlockFilterFixed64 does not work without AVX2 instructions");
231+
}
232+
const size_t alloc_size = bucketCount * sizeof(Bucket);
233+
const int malloc_failed =
234+
posix_memalign(reinterpret_cast<void**>(&directory_), 64, alloc_size);
235+
if (malloc_failed) throw ::std::bad_alloc();
236+
memset(directory_, 0, alloc_size);
237+
}
238+
239+
template<typename HashFamily>
240+
SimdBlockFilterFixed64<HashFamily>::~SimdBlockFilterFixed64() noexcept {
241+
free(directory_);
242+
directory_ = nullptr;
243+
}
244+
245+
246+
247+
template <typename HashFamily>
248+
[[gnu::always_inline]] inline mask64bytes_t
249+
SimdBlockFilterFixed64<HashFamily>::MakeMask(const uint64_t hash) noexcept {
250+
const __m256i ones = _mm256_set1_epi64x(1);
251+
const __m256i rehash1 = _mm256_setr_epi32(0x47b6137bU, 0x44974d91U, 0x8824ad5bU,
252+
0xa2b7289dU, 0x705495c7U, 0x2df1424bU, 0x9efc4947U, 0x5c6bfb31U);
253+
mask64bytes_t answer;
254+
__m256i hash_data = _mm256_set1_epi64x(hash);
255+
__m256i h = _mm256_mullo_epi32(rehash1, hash_data);
256+
h = _mm256_srli_epi32(h, 26);
257+
answer.first = _mm256_and_si256(_mm256_set1_epi64x(0x3f),h);
258+
answer.first = _mm256_sllv_epi64(ones, answer.first);
259+
answer.second = _mm256_srli_epi64(h,32);
260+
answer.second = _mm256_sllv_epi64(ones, answer.second);
261+
return answer;
262+
}
263+
264+
template <typename HashFamily>
265+
[[gnu::always_inline]] inline void
266+
SimdBlockFilterFixed64<HashFamily>::Add(const uint64_t key) noexcept {
267+
const auto hash = hasher_(key);
268+
const uint32_t bucket_idx = reduce(rotl64(hash, 32), bucketCount);
269+
mask64bytes_t mask = MakeMask(hash);
270+
mask64bytes_t* const bucket = &reinterpret_cast<mask64bytes_t*>(directory_)[bucket_idx];
271+
bucket->first = _mm256_or_si256(mask.first, bucket->first);
272+
bucket->second= _mm256_or_si256(mask.second, bucket->second);
273+
assert(Find(key));
274+
}
275+
276+
template <typename HashFamily>
277+
[[gnu::always_inline]] inline bool
278+
SimdBlockFilterFixed64<HashFamily>::Find(const uint64_t key) const noexcept {
279+
const auto hash = hasher_(key);
280+
const uint32_t bucket_idx = reduce(rotl64(hash, 32), bucketCount);
281+
const mask64bytes_t mask = MakeMask(hash);
282+
const mask64bytes_t bucket = reinterpret_cast<mask64bytes_t*>(directory_)[bucket_idx];
283+
return _mm256_testc_si256(bucket.first, mask.first) & _mm256_testc_si256(bucket.second, mask.second);
284+
}

src/simd-block.h

Lines changed: 0 additions & 165 deletions
Original file line numberDiff line numberDiff line change
@@ -141,168 +141,3 @@ SimdBlockFilter<HashFamily>::Find(const uint64_t key) const noexcept {
141141
}
142142

143143

144-
145-
146-
/// Rest is copied and pasted to work over 64-byte blocks
147-
148-
template <typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
149-
class SimdBlockFilter64 {
150-
private:
151-
// The filter is divided up into Buckets:
152-
using Bucket = uint32_t[16];
153-
154-
// log2(number of bytes in a bucket):
155-
static constexpr int LOG_BUCKET_BYTE_SIZE = 6;
156-
157-
static_assert((1 << LOG_BUCKET_BYTE_SIZE) == sizeof(Bucket) &&
158-
sizeof(Bucket) == 2 * sizeof(__m256i),
159-
"Bucket sizing has gone awry.");
160-
161-
// log_num_buckets_ is the log (base 2) of the number of buckets in the
162-
// directory:
163-
const int log_num_buckets_;
164-
165-
// directory_mask_ is (1 << log_num_buckets_) - 1. It is precomputed in the
166-
// contructor for efficiency reasons:
167-
const uint32_t directory_mask_;
168-
169-
Bucket *directory_;
170-
171-
HashFamily hasher_;
172-
173-
public:
174-
// Consumes at most (1 << log_heap_space) bytes on the heap:
175-
explicit SimdBlockFilter64(const int log_heap_space);
176-
SimdBlockFilter64(SimdBlockFilter64 &&that)
177-
: log_num_buckets_(that.log_num_buckets_),
178-
directory_mask_(that.directory_mask_), directory_(that.directory_),
179-
hasher_(that.hasher_) {}
180-
~SimdBlockFilter64() noexcept;
181-
void Add(const uint64_t key) noexcept;
182-
bool Find(const uint64_t key) const noexcept;
183-
uint64_t SizeInBytes() const {
184-
return sizeof(Bucket) * (1ull << log_num_buckets_);
185-
}
186-
187-
private:
188-
static void MakeMask(const uint32_t hash, __m256i *out1,
189-
__m256i *out2) noexcept;
190-
191-
SimdBlockFilter64(const SimdBlockFilter64 &) = delete;
192-
void operator=(const SimdBlockFilter64 &) = delete;
193-
};
194-
195-
template <typename HashFamily>
196-
SimdBlockFilter64<HashFamily>::SimdBlockFilter64(const int log_heap_space)
197-
: // Since log_heap_space is in bytes, we need to convert it to the number
198-
// of Buckets we will use.
199-
log_num_buckets_(::std::max(1, log_heap_space - LOG_BUCKET_BYTE_SIZE)),
200-
// Don't use log_num_buckets_ if it will lead to undefined behavior by a
201-
// shift that is too large.
202-
directory_mask_((1ull << ::std::min(63, log_num_buckets_)) - 1),
203-
directory_(nullptr), hasher_() {
204-
if (!__builtin_cpu_supports("avx2")) {
205-
throw ::std::runtime_error(
206-
"SimdBlockFilter64 does not work without AVX2 instructions");
207-
}
208-
const size_t alloc_size = 1ull << (log_num_buckets_ + LOG_BUCKET_BYTE_SIZE);
209-
const int malloc_failed =
210-
posix_memalign(reinterpret_cast<void **>(&directory_), 64, alloc_size);
211-
if (malloc_failed)
212-
throw ::std::bad_alloc();
213-
memset(directory_, 0, alloc_size);
214-
}
215-
216-
template <typename HashFamily>
217-
SimdBlockFilter64<HashFamily>::~SimdBlockFilter64() noexcept {
218-
free(directory_);
219-
directory_ = nullptr;
220-
}
221-
222-
// with AVX-512, this becomes a single instruction
223-
static inline __m256i hacked_mm256_mullo_epi64(__m256i x, __m256i ml,
224-
__m256i mh) {
225-
__m256i xl = x;
226-
// _mm256_and_si256(x, _mm256_set1_epi64x(UINT64_C(0x00000000ffffffff)));
227-
// __m256i xh = _mm256_srli_epi64(x, 32);
228-
//__m256i hl = _mm256_slli_epi64(_mm256_mul_epu32(xh, ml), 32);
229-
__m256i lh = _mm256_slli_epi64(_mm256_mul_epu32(xl, mh), 32);
230-
__m256i ll = _mm256_mul_epu32(xl, ml);
231-
// return _mm256_add_epi64(ll, _mm256_add_epi64(hl, lh));
232-
return _mm256_add_epi64(lh, ll);
233-
}
234-
235-
// The SIMD reinterpret_casts technically violate C++'s strict aliasing rules.
236-
// However, we compile with -fno-strict-aliasing.
237-
template <typename HashFamily>
238-
[[gnu::always_inline]] inline void
239-
SimdBlockFilter64<HashFamily>::MakeMask(const uint32_t hash, __m256i *out1,
240-
__m256i *out2) noexcept {
241-
const __m256i ones = _mm256_set1_epi64x(1);
242-
// Odd contants for hashing:
243-
const __m256i rehash1_l = _mm256_setr_epi64x(
244-
0x53214365047b6137 & 0xffffffff, 0x2c5635344974d91 & 0xffffffff,
245-
0x7fe299d78824ad5b & 0xffffffff, 0xc01ac48e4d29f115 & 0xffffffff);
246-
const __m256i rehash1_h = _mm256_setr_epi64x(
247-
UINT64_C(0x53214365047b6137) >> 32, UINT64_C(0x2c5635344974d91) >> 32,
248-
UINT64_C(0x7fe299d78824ad5b) >> 32, UINT64_C(0xc01ac48e4d29f115) >> 32);
249-
250-
const __m256i rehash2_l = _mm256_setr_epi64x(
251-
0x7bdeb6734f95e2e3 & 0xffffffff, 0x2ec75a90a4e6ad3d & 0xffffffff,
252-
0x3d485cae00ae48fd & 0xffffffff, 0xe7d0f0c09b59d29b & 0xffffffff);
253-
const __m256i rehash2_h = _mm256_setr_epi64x(
254-
UINT64_C(0x7bdeb6734f95e2e3) >> 32, UINT64_C(0x2ec75a90a4e6ad3d) >> 32,
255-
UINT64_C(0x3d485cae00ae48fd) >> 32, UINT64_C(0xe7d0f0c09b59d29b) >> 32);
256-
257-
__m256i hash_data = _mm256_set1_epi64x(hash);
258-
259-
// Multiply-shift hashing ala Dietzfelbinger et al.: multiply 'hash' by eight
260-
// different odd constants, then keep the 6 most significant bits from each
261-
// product.
262-
__m256i hash_data1 = hacked_mm256_mullo_epi64(
263-
hash_data, rehash1_l,
264-
rehash1_h); //_mm256_mullo_epi64(rehash1, hash_data);
265-
__m256i hash_data2 = hacked_mm256_mullo_epi64(
266-
hash_data, rehash2_l,
267-
rehash2_h); //_mm256_mullo_epi64(rehash2, hash_data);
268-
269-
hash_data1 = _mm256_and_si256(_mm256_srli_epi64(hash_data1, 32),
270-
_mm256_set1_epi64x(63));
271-
hash_data2 = _mm256_and_si256(_mm256_srli_epi64(hash_data2, 32),
272-
_mm256_set1_epi64x(63));
273-
// Use these 6 bits to shift a single bit to a location in each 32-bit lane
274-
*out1 = _mm256_sllv_epi64(ones, hash_data1);
275-
*out2 = _mm256_sllv_epi64(ones, hash_data2);
276-
}
277-
278-
279-
template <typename HashFamily>
280-
[[gnu::always_inline]] inline void
281-
SimdBlockFilter64<HashFamily>::Add(const uint64_t key) noexcept {
282-
const auto hash = hasher_(key);
283-
const uint32_t bucket_idx = hash & directory_mask_;
284-
__m256i mask1, mask2;
285-
MakeMask(hash >> log_num_buckets_, &mask1, &mask2);
286-
__m256i *const bucket1 =
287-
&reinterpret_cast<__m256i *>(directory_)[2 * bucket_idx];
288-
__m256i *const bucket2 =
289-
&reinterpret_cast<__m256i *>(directory_)[2 * bucket_idx + 1];
290-
291-
_mm256_store_si256(bucket1, _mm256_or_si256(*bucket1, mask1));
292-
_mm256_store_si256(bucket2, _mm256_or_si256(*bucket2, mask2));
293-
}
294-
295-
template <typename HashFamily>
296-
[[gnu::always_inline]] inline bool
297-
SimdBlockFilter64<HashFamily>::Find(const uint64_t key) const noexcept {
298-
const auto hash = hasher_(key);
299-
const uint32_t bucket_idx = hash & directory_mask_;
300-
__m256i mask1, mask2;
301-
MakeMask(hash >> log_num_buckets_, &mask1, &mask2);
302-
const __m256i bucket1 =
303-
reinterpret_cast<__m256i *>(directory_)[2 * bucket_idx];
304-
const __m256i bucket2 =
305-
reinterpret_cast<__m256i *>(directory_)[2 * bucket_idx + 1];
306-
return _mm256_testc_si256(bucket1, mask1) &
307-
_mm256_testc_si256(bucket2, mask2);
308-
}

src/xorfilter.h

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -242,42 +242,6 @@ Status XorFilter<ItemType, FingerprintType, HashFamily>::AddAll(
242242
delete[] tmpc;
243243
delete[] alone;
244244

245-
/*
246-
247-
int* alone = new int[arrayLength];
248-
int alonePos = 0;
249-
for (size_t i = 0; i < arrayLength; i++) {
250-
if (t2vals[i].t2count == 1) {
251-
alone[alonePos++] = i;
252-
}
253-
}
254-
reverseOrderPos = 0;
255-
while (alonePos > 0 && reverseOrderPos < size) {
256-
int i = alone[--alonePos];
257-
if (t2vals[i].t2count == 0) {
258-
continue;
259-
}
260-
long hash = t2vals[i].t2;
261-
uint8_t found = -1;
262-
for (int hi = 0; hi < 3; hi++) {
263-
int h = getHashFromHash(hash, hi, blockLength);
264-
int newCount = --t2vals[h].t2count;
265-
if (newCount == 0) {
266-
found = (uint8_t) hi;
267-
} else {
268-
if (newCount == 1) {
269-
alone[alonePos++] = h;
270-
}
271-
t2vals[h].t2 ^= hash;
272-
}
273-
}
274-
reverseOrder[reverseOrderPos] = hash;
275-
reverseH[reverseOrderPos] = found;
276-
reverseOrderPos++;
277-
}
278-
delete [] alone;
279-
*/
280-
281245
if (reverseOrderPos == size) {
282246
break;
283247
}

0 commit comments

Comments
 (0)