Skip to content

Commit 1ba514e

Browse files
committed
Updating to upstream fix.
1 parent ea53455 commit 1ba514e

File tree

4 files changed

+75
-11
lines changed

4 files changed

+75
-11
lines changed

src/build_filter.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
#include <stdlib.h>
1010
#include <string.h>
1111

12-
#include "bloom.h"
12+
#include "bloom/bloom.h"
1313
#include "hexutil.h"
14-
#include "xorfilter.h"
14+
#include "xorfilter/xorfilter.h"
1515
#include "xor_singleheader/include/xorfilter.h"
1616
#include "mappeablebloomfilter.h"
1717

@@ -136,10 +136,7 @@ int main(int argc, char **argv) {
136136
printusage(argv[0]);
137137
return 0;
138138
}
139-
if (optind >= argc) {
140-
printusage(argv[0]);
141-
return -1;
142-
}
139+
143140
size_t array_size;
144141
uint64_t * array;
145142
if(synthetic) {
@@ -149,6 +146,10 @@ int main(int argc, char **argv) {
149146
array[i] = i;
150147
}
151148
} else {
149+
if (optind >= argc) {
150+
printusage(argv[0]);
151+
return -1;
152+
}
152153
const char *filename = argv[optind];
153154
array = read_data(filename, array_size, maxline, printall);
154155
if(array == nullptr) {

src/mappeablebloomfilter.h

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,69 @@ static inline size_t getBestK(size_t bitsPerItem) {
1111
}
1212
}
1313

14+
15+
16+
/**
17+
* Given a value "word", produces an integer in [0,p) without division.
18+
* The function is as fair as possible in the sense that if you iterate
19+
* through all possible values of "word", then you will generate all
20+
* possible outputs as uniformly as possible.
21+
*/
22+
static inline uint32_t fastrange32(uint32_t word, uint32_t p) {
23+
// http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
24+
return (uint32_t)(((uint64_t)word * (uint64_t)p) >> 32);
25+
}
26+
27+
#if defined(_MSC_VER) && defined (_WIN64)
28+
#include <intrin.h>// should be part of all recent Visual Studio
29+
#pragma intrinsic(_umul128)
30+
#endif // defined(_MSC_VER) && defined (_WIN64)
31+
32+
33+
/**
34+
* Given a value "word", produces an integer in [0,p) without division.
35+
* The function is as fair as possible in the sense that if you iterate
36+
* through all possible values of "word", then you will generate all
37+
* possible outputs as uniformly as possible.
38+
*/
39+
static inline uint64_t fastrange64(uint64_t word, uint64_t p) {
40+
// http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
41+
#ifdef __SIZEOF_INT128__ // then we know we have a 128-bit int
42+
return (uint64_t)(((__uint128_t)word * (__uint128_t)p) >> 64);
43+
#elif defined(_MSC_VER) && defined(_WIN64)
44+
// supported in Visual Studio 2005 and better
45+
uint64_t highProduct;
46+
_umul128(word, p, &highProduct); // ignore output
47+
return highProduct;
48+
unsigned __int64 _umul128(
49+
unsigned __int64 Multiplier,
50+
unsigned __int64 Multiplicand,
51+
unsigned __int64 *HighProduct
52+
);
53+
#else
54+
return word % p; // fallback
55+
#endif // __SIZEOF_INT128__
56+
}
57+
58+
59+
#ifndef UINT32_MAX
60+
#define UINT32_MAX (0xffffffff)
61+
#endif // UINT32_MAX
62+
63+
/**
64+
* Given a value "word", produces an integer in [0,p) without division.
65+
* The function is as fair as possible in the sense that if you iterate
66+
* through all possible values of "word", then you will generate all
67+
* possible outputs as uniformly as possible.
68+
*/
69+
static inline size_t fastrangesize(uint64_t word, size_t p) {
70+
#if (SIZE_MAX == UINT32_MAX)
71+
return (size_t)fastrange32(word, p);
72+
#else // assume 64-bit
73+
return (size_t)fastrange64(word, p);
74+
#endif // SIZE_MAX == UINT32_MAX
75+
}
76+
1477
inline uint64_t getBit(uint32_t index) { return 1L << (index & 63); }
1578

1679
template <int bitsPerItem> class MappeableBloomFilter {
@@ -27,10 +90,10 @@ template <int bitsPerItem> class MappeableBloomFilter {
2790
// Report if the item is inserted, with false positive rate.
2891
bool Contain(const uint64_t key) const {
2992
uint64_t hash = hasher(key);
30-
uint32_t a = (uint32_t)(hash >> 32);
31-
uint32_t b = (uint32_t)hash;
93+
uint64_t a = (hash >> 32) | (hash << 32);;
94+
uint64_t b = hash;
3295
for (int i = 0; i < k; i++) {
33-
if ((data[reduce(a, this->arrayLength)] & getBit(a)) == 0) {
96+
if ((data[fastrangesize(a, this->arrayLength)] & getBit(a)) == 0) {
3497
return false;
3598
}
3699
a += b;

0 commit comments

Comments
 (0)