Skip to content

Commit 341aeb8

Browse files
committed
Fuse filter (first version)
1 parent 64e5eae commit 341aeb8

File tree

1 file changed

+23
-17
lines changed

1 file changed

+23
-17
lines changed

src/xorfilter/xor_fuse_filter.h

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,14 @@ inline uint32_t reduce(uint32_t hash, uint32_t n) {
3131
return (uint32_t) (((uint64_t) hash * n) >> 32);
3232
}
3333

34-
size_t getHashFromHash(uint64_t hash, int index, int blockLength) {
35-
uint32_t r = rotl64(hash, index * 21);
36-
return (size_t) reduce(r, blockLength) + index * blockLength;
34+
const size_t segmentLengthBits = 13;
35+
const size_t segmentLength = 1 << segmentLengthBits;
36+
37+
size_t getHashFromHash(uint64_t hash, int index, int segmentCount) {
38+
__uint128_t x = (__uint128_t) hash * (__uint128_t) segmentCount;
39+
int seg = (uint64_t)(x >> 64);
40+
int h = (seg + index) * segmentLength + (size_t)((hash >> (index * segmentLengthBits)) & (segmentLength - 1));
41+
return h;
3742
}
3843

3944
template <typename ItemType, typename FingerprintType,
@@ -43,7 +48,7 @@ class XorFuseFilter {
4348

4449
size_t size;
4550
size_t arrayLength;
46-
size_t blockLength;
51+
size_t segmentCount;
4752
FingerprintType *fingerprints;
4853

4954
HashFamily* hasher;
@@ -55,8 +60,11 @@ class XorFuseFilter {
5560
explicit XorFuseFilter(const size_t size) {
5661
hasher = new HashFamily();
5762
this->size = size;
58-
this->arrayLength = 32 + 1.23 * size;
59-
this->blockLength = arrayLength / 3;
63+
size_t capacity = size / 0.879;
64+
capacity = (capacity + 3 - 1) / 3 * 3;
65+
capacity = (capacity + segmentLength - 1) / segmentLength * segmentLength;
66+
this->segmentCount = capacity / segmentLength;
67+
this->arrayLength = (segmentCount + 2) * segmentLength;
6068
fingerprints = new FingerprintType[arrayLength]();
6169
std::fill_n(fingerprints, arrayLength, 0);
6270
}
@@ -134,14 +142,14 @@ Status XorFuseFilter<ItemType, FingerprintType, HashFamily>::AddAll(
134142
t2val_t * t2vals = new t2val_t[m];
135143
while (true) {
136144
memset(t2vals, 0, sizeof(t2val_t[m]));
137-
int blocks = 1 + ((3 * blockLength) >> blockShift);
145+
int blocks = 1 + (arrayLength >> blockShift);
138146
uint64_t* tmp = new uint64_t[blocks << blockShift];
139147
int* tmpc = new int[blocks]();
140148
for(size_t i = start; i < end; i++) {
141149
uint64_t k = keys[i];
142150
uint64_t hash = (*hasher)(k);
143151
for (int hi = 0; hi < 3; hi++) {
144-
int index = getHashFromHash(hash, hi, blockLength);
152+
int index = getHashFromHash(hash, hi, segmentCount);
145153
int b = index >> blockShift;
146154
int i2 = tmpc[b];
147155
tmp[(b << blockShift) + i2] = hash;
@@ -222,7 +230,7 @@ Status XorFuseFilter<ItemType, FingerprintType, HashFamily>::AddAll(
222230
}
223231
long hash = t2vals[i].t2;
224232
for (int hi = 0; hi < 3; hi++) {
225-
int h = getHashFromHash(hash, hi, blockLength);
233+
int h = getHashFromHash(hash, hi, segmentCount);
226234
if (h == i) {
227235
found = (uint8_t) hi;
228236
t2vals[i].t2count = 0;
@@ -253,7 +261,6 @@ Status XorFuseFilter<ItemType, FingerprintType, HashFamily>::AddAll(
253261
std::cout << "WARNING: hashIndex " << hashIndex << "\n";
254262
if (hashIndex >= 0) {
255263
std::cout << (end - start) << " keys; arrayLength " << arrayLength
256-
<< " blockLength " << blockLength
257264
<< " reverseOrderPos " << reverseOrderPos << "\n";
258265
}
259266

@@ -275,7 +282,7 @@ Status XorFuseFilter<ItemType, FingerprintType, HashFamily>::AddAll(
275282
// unless the other two entries are already occupied
276283
FingerprintType xor2 = fingerprint(hash);
277284
for (int hi = 0; hi < 3; hi++) {
278-
size_t h = getHashFromHash(hash, hi, blockLength);
285+
size_t h = getHashFromHash(hash, hi, segmentCount);
279286
if (found == hi) {
280287
change = h;
281288
} else {
@@ -299,12 +306,11 @@ Status XorFuseFilter<ItemType, FingerprintType, HashFamily>::Contain(
299306
const ItemType &key) const {
300307
uint64_t hash = (*hasher)(key);
301308
FingerprintType f = fingerprint(hash);
302-
uint32_t r0 = (uint32_t) hash;
303-
uint32_t r1 = (uint32_t) rotl64(hash, 21);
304-
uint32_t r2 = (uint32_t) rotl64(hash, 42);
305-
uint32_t h0 = reduce(r0, blockLength);
306-
uint32_t h1 = reduce(r1, blockLength) + blockLength;
307-
uint32_t h2 = reduce(r2, blockLength) + 2 * blockLength;
309+
__uint128_t x = (__uint128_t) hash * (__uint128_t) segmentCount;
310+
int seg = (uint64_t)(x >> 64);
311+
int h0 = (seg + 0) * segmentLength + (size_t)((hash >> (0 * segmentLengthBits)) & (segmentLength - 1));
312+
int h1 = (seg + 1) * segmentLength + (size_t)((hash >> (1 * segmentLengthBits)) & (segmentLength - 1));
313+
int h2 = (seg + 2) * segmentLength + (size_t)((hash >> (2 * segmentLengthBits)) & (segmentLength - 1));
308314
f ^= fingerprints[h0] ^ fingerprints[h1] ^ fingerprints[h2];
309315
return f == 0 ? Ok : NotFound;
310316
}

0 commit comments

Comments
 (0)