Skip to content

Commit f9c32cf

Browse files
committed
Fixing issue #8
1 parent 64bee75 commit f9c32cf

File tree

1 file changed

+74
-13
lines changed

1 file changed

+74
-13
lines changed

src/bloom.h

Lines changed: 74 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,67 @@ inline uint32_t reduce(uint32_t hash, uint32_t n) {
2323
return (uint32_t)(((uint64_t)hash * n) >> 32);
2424
}
2525

26+
/**
27+
* Given a value "word", produces an integer in [0,p) without division.
28+
* The function is as fair as possible in the sense that if you iterate
29+
* through all possible values of "word", then you will generate all
30+
* possible outputs as uniformly as possible.
31+
*/
32+
static inline uint32_t fastrange32(uint32_t word, uint32_t p) {
33+
// http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
34+
return (uint32_t)(((uint64_t)word * (uint64_t)p) >> 32);
35+
}
36+
37+
#if defined(_MSC_VER) && defined (_WIN64)
38+
#include <intrin.h>// should be part of all recent Visual Studio
39+
#pragma intrinsic(_umul128)
40+
#endif // defined(_MSC_VER) && defined (_WIN64)
41+
42+
43+
/**
44+
* Given a value "word", produces an integer in [0,p) without division.
45+
* The function is as fair as possible in the sense that if you iterate
46+
* through all possible values of "word", then you will generate all
47+
* possible outputs as uniformly as possible.
48+
*/
49+
static inline uint64_t fastrange64(uint64_t word, uint64_t p) {
50+
// http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
51+
#ifdef __SIZEOF_INT128__ // then we know we have a 128-bit int
52+
return (uint64_t)(((__uint128_t)word * (__uint128_t)p) >> 64);
53+
#elif defined(_MSC_VER) && defined(_WIN64)
54+
// supported in Visual Studio 2005 and better
55+
uint64_t highProduct;
56+
_umul128(word, p, &highProduct); // ignore output
57+
return highProduct;
58+
unsigned __int64 _umul128(
59+
unsigned __int64 Multiplier,
60+
unsigned __int64 Multiplicand,
61+
unsigned __int64 *HighProduct
62+
);
63+
#else
64+
return word % p; // fallback
65+
#endif // __SIZEOF_INT128__
66+
}
67+
68+
69+
#ifndef UINT32_MAX
70+
#define UINT32_MAX (0xffffffff)
71+
#endif // UINT32_MAX
72+
73+
/**
74+
* Given a value "word", produces an integer in [0,p) without division.
75+
* The function is as fair as possible in the sense that if you iterate
76+
* through all possible values of "word", then you will generate all
77+
* possible outputs as uniformly as possible.
78+
*/
79+
static inline size_t fastrangesize(uint64_t word, size_t p) {
80+
#if (SIZE_MAX == UINT32_MAX)
81+
return (size_t)fastrange32(word, p);
82+
#else // assume 64-bit
83+
return (size_t)fastrange64(word, p);
84+
#endif // SIZE_MAX == UINT32_MAX
85+
}
86+
2687
static size_t getBestK(size_t bitsPerItem) {
2788
return max(1, (int)round((double)bitsPerItem * log(2)));
2889
}
@@ -85,14 +146,14 @@ template <typename ItemType, size_t bits_per_item, bool branchless,
85146
Status BloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::Add(
86147
const ItemType &key) {
87148
uint64_t hash = hasher(key);
88-
uint32_t a = (uint32_t)(hash >> 32);
89-
uint32_t b = (uint32_t)hash;
149+
uint64_t a = (hash >> 32) | (hash << 32);
150+
uint64_t b = hash;
90151
for (int i = 0; i < k; i++) {
91152
// int index = reduce(a, this->bitCount);
92153
// data[index >> 6] |= getBit(index);
93154
// reworked to avoid overflows
94155
// use the fact that reduce is not very sensitive to lower bits of a
95-
data[reduce(a, this->arrayLength)] |= getBit(a);
156+
data[fastrangesize(a, this->arrayLength)] |= getBit(a);
96157
a += b;
97158
}
98159
return Ok;
@@ -118,10 +179,10 @@ Status BloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::AddAll(
118179
for (size_t i = start; i < end; i++) {
119180
uint64_t key = keys[i];
120181
uint64_t hash = hasher(key);
121-
uint32_t a = (uint32_t)(hash >> 32);
122-
uint32_t b = (uint32_t)hash;
182+
uint64_t a = (hash >> 32) | (hash << 32);;
183+
uint64_t b = hash;
123184
for (int j = 0; j < k; j++) {
124-
int index = reduce(a, this->arrayLength);
185+
int index = fastrangesize(a, this->arrayLength);
125186
int block = index >> blockShift;
126187
int len = tmpLen[block];
127188
tmp[(block << blockShift) + len] = (index << 6) + (a & 63);
@@ -149,27 +210,27 @@ template <typename ItemType, size_t bits_per_item, bool branchless,
149210
Status BloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::Contain(
150211
const ItemType &key) const {
151212
uint64_t hash = hasher(key);
152-
uint32_t a = (uint32_t)(hash >> 32);
153-
uint32_t b = (uint32_t)hash;
213+
uint64_t a = (hash >> 32) | (hash << 32);;
214+
uint64_t b = hash;
154215
if (branchless && k >= 3) {
155-
int b0 = data[reduce(a, this->arrayLength)] >> (a & 63);
216+
int b0 = data[fastrangesize(a, this->arrayLength)] >> (a & 63);
156217
a += b;
157-
int b1 = data[reduce(a, this->arrayLength)] >> (a & 63);
218+
int b1 = data[fastrangesize(a, this->arrayLength)] >> (a & 63);
158219
a += b;
159-
int b2 = data[reduce(a, this->arrayLength)] >> (a & 63);
220+
int b2 = data[fastrangesize(a, this->arrayLength)] >> (a & 63);
160221
if ((b0 & b1 & b2 & 1) == 0) {
161222
return NotFound;
162223
}
163224
for (int i = 3; i < k; i++) {
164225
a += b;
165-
if (((data[reduce(a, this->arrayLength)] >> (a & 63)) & 1) == 0) {
226+
if (((data[fastrangesize(a, this->arrayLength)] >> (a & 63)) & 1) == 0) {
166227
return NotFound;
167228
}
168229
}
169230
return Ok;
170231
}
171232
for (int i = 0; i < k; i++) {
172-
if ((data[reduce(a, this->arrayLength)] & getBit(a)) == 0) {
233+
if ((data[fastrangesize(a, this->arrayLength)] & getBit(a)) == 0) {
173234
return NotFound;
174235
}
175236
a += b;

0 commit comments

Comments
 (0)