@@ -192,6 +192,8 @@ BloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::Info() const {
192192 return ss.str ();
193193}
194194
195+
196+
195197/* **************
196198 * Simple block filter (naive implementation)
197199 ***************/
@@ -200,82 +202,81 @@ template <size_t blocksize, int k,
200202 typename HashFamily = ::hashing::TwoIndependentMultiplyShift>
201203class SimpleBlockFilter {
202204private:
203- // The filter is divided up into Buckets:
204- using Bucket = uint64_t [blocksize];
205-
206- const int bucketCount;
207-
208- Bucket *directory_;
209-
205+ const size_t arrayLength;
206+ uint64_t * data;
210207 HashFamily hasher_;
211-
212208public:
213209 // Consumes at most (1 << log_heap_space) bytes on the heap:
214210 explicit SimpleBlockFilter (const int bits);
215211 ~SimpleBlockFilter () noexcept ;
216212 void Add (const uint64_t key) noexcept ;
217-
218213 bool Find (const uint64_t key) const noexcept ;
219- uint64_t SizeInBytes () const { return sizeof (Bucket) * bucketCount; }
214+ uint64_t SizeInBytes () const {
215+ return arrayLength * 8 ;
216+ }
220217};
221218
222219template <size_t blocksize, int k, typename HashFamily>
223220SimpleBlockFilter<blocksize, k, HashFamily>::SimpleBlockFilter(
224221 const int capacity)
225- : bucketCount( capacity * k / (blocksize * 47 )), directory_( nullptr ),
222+ : arrayLength(( capacity * 10 ) / 64 + 8 ),
226223 hasher_ () {
227- const size_t alloc_size = bucketCount * sizeof (Bucket);
228- const int malloc_failed =
229- posix_memalign (reinterpret_cast <void **>(&directory_), 64 , alloc_size);
230- if (malloc_failed)
231- throw ::std::bad_alloc ();
232- memset (directory_, 0 , alloc_size);
224+ data = new uint64_t [arrayLength]();
233225}
234226
235227template <size_t blocksize, int k, typename HashFamily>
236228SimpleBlockFilter<blocksize, k, HashFamily>::~SimpleBlockFilter () noexcept {
237- free (directory_ );
238- directory_ = nullptr ;
229+ free (data );
230+ data = nullptr ;
239231}
240232
241233static inline uint64_t rotl64 (uint64_t n, unsigned int c) {
242234 // assumes width is a power of 2
243235 const unsigned int mask = (CHAR_BIT * sizeof (n) - 1 );
244- // assert ( (c<=mask) &&"rotate by type width or more");
245236 c &= mask;
246237 return (n << c) | (n >> ((-c) & mask));
247238}
248239
249- char setbit64 (uint64_t *t, uint64_t bit) { return *t |= (1L << (bit & 63 )); }
250-
251240template <size_t blocksize, int k, typename HashFamily>
252241inline void
253242SimpleBlockFilter<blocksize, k, HashFamily>::Add(const uint64_t key) noexcept {
254243 const auto hash = hasher_ (key);
255- const uint32_t bucket_idx = reduce (rotl64 (hash, 32 ), bucketCount);
256- Bucket *bucket = directory_ + bucket_idx;
257- uint32_t a = (uint32_t )(hash >> 32 );
258- uint32_t b = (uint32_t )hash;
259- for (int i = 0 ; i < k; i++) {
260- setbit64 ((uint64_t *)bucket + (a % blocksize), a / blocksize);
261- a += b;
262- }
244+ const uint32_t idx = reduce (hash, arrayLength);
245+ uint64_t *bucket = data + idx;
246+ // uint32_t a = (uint32_t)(hash ^ (hash >> 32));
247+
248+ // *bucket++ |= (uint64_t) ((1L << (a & 63)) | (1L << ((a >> 6) & 63)));
249+ // *bucket |= (uint64_t) ((1L << ((a >> 12) & 63)) | (1L << ((a >> 18) & 63)));
250+
251+ // *bucket++ |= (uint64_t) (a & (a >> 1));
252+ uint64_t m1 = 1L << hash;
253+ uint64_t m2 = 1L << (hash >> 8 );
254+ uint64_t m = m1 | m2;
255+ *bucket |= m;
256+
263257}
264258template <size_t blocksize, int k, typename HashFamily>
265259inline bool
266260SimpleBlockFilter<blocksize, k, HashFamily>::Find(const uint64_t key) const
267261 noexcept {
268262 const auto hash = hasher_ (key);
269- const uint32_t bucket_idx = reduce (rotl64 (hash, 32 ), bucketCount);
270- const Bucket *bucket = directory_ + bucket_idx;
271- uint32_t a = (uint32_t )(hash >> 32 );
272- uint32_t b = (uint32_t )hash;
273- char ok = 1 ;
274- for (int i = 0 ; i < k; i++) {
275- ok &= bittest64 (((const uint64_t *)bucket) + (a % blocksize), a / blocksize);
276- a += b;
277- }
278- return ok;
263+ const uint32_t idx = reduce (hash, arrayLength);
264+ uint64_t *bucket = data + idx;
265+ // uint32_t a = (uint32_t)(hash ^ (hash >> 32));
266+ // uint64_t m1 = (uint64_t) ((1L << (a & 63)) | (1L << ((a >> 6) & 63)));
267+ // uint64_t m2 = (uint64_t) ((1L << ((a >> 12) & 63)) | (1L << ((a >> 18) & 63)));
268+ uint64_t m1 = 1L << hash;
269+ uint64_t m2 = 1L << (hash >> 8 );
270+ uint64_t m = m1 | m2;
271+ return !((m & *bucket) - m);
272+
273+ /*
274+ uint64_t x = *bucket++;
275+ // a += b;
276+ // x = *bucket++;
277+ // y &= (x >> (a & 63)) & (x >> ((a >> 8) & 63));
278+ return y & 1;
279+ */
279280}
280281
281282} // namespace bloomfilter
0 commit comments