@@ -31,9 +31,14 @@ inline uint32_t reduce(uint32_t hash, uint32_t n) {
3131 return (uint32_t ) (((uint64_t ) hash * n) >> 32 );
3232}
3333
34- size_t getHashFromHash (uint64_t hash, int index, int blockLength) {
35- uint32_t r = rotl64 (hash, index * 21 );
36- return (size_t ) reduce (r, blockLength) + index * blockLength;
34+ const size_t segmentLengthBits = 13 ;
35+ const size_t segmentLength = 1 << segmentLengthBits;
36+
37+ size_t getHashFromHash (uint64_t hash, int index, int segmentCount) {
38+ __uint128_t x = (__uint128_t ) hash * (__uint128_t ) segmentCount;
39+ int seg = (uint64_t )(x >> 64 );
40+ int h = (seg + index) * segmentLength + (size_t )((hash >> (index * segmentLengthBits)) & (segmentLength - 1 ));
41+ return h;
3742}
3843
3944template <typename ItemType, typename FingerprintType,
@@ -43,7 +48,7 @@ class XorFuseFilter {
4348
4449 size_t size;
4550 size_t arrayLength;
46- size_t blockLength ;
51+ size_t segmentCount ;
4752 FingerprintType *fingerprints;
4853
4954 HashFamily* hasher;
@@ -55,8 +60,11 @@ class XorFuseFilter {
5560 explicit XorFuseFilter (const size_t size) {
5661 hasher = new HashFamily ();
5762 this ->size = size;
58- this ->arrayLength = 32 + 1.23 * size;
59- this ->blockLength = arrayLength / 3 ;
63+ size_t capacity = size / 0.879 ;
64+ capacity = (capacity + 3 - 1 ) / 3 * 3 ;
65+ capacity = (capacity + segmentLength - 1 ) / segmentLength * segmentLength;
66+ this ->segmentCount = capacity / segmentLength;
67+ this ->arrayLength = (segmentCount + 2 ) * segmentLength;
6068 fingerprints = new FingerprintType[arrayLength]();
6169 std::fill_n (fingerprints, arrayLength, 0 );
6270 }
@@ -134,14 +142,14 @@ Status XorFuseFilter<ItemType, FingerprintType, HashFamily>::AddAll(
134142 t2val_t * t2vals = new t2val_t [m];
135143 while (true ) {
136144 memset (t2vals, 0 , sizeof (t2val_t [m]));
137- int blocks = 1 + (( 3 * blockLength) >> blockShift);
145+ int blocks = 1 + (arrayLength >> blockShift);
138146 uint64_t * tmp = new uint64_t [blocks << blockShift];
139147 int * tmpc = new int [blocks]();
140148 for (size_t i = start; i < end; i++) {
141149 uint64_t k = keys[i];
142150 uint64_t hash = (*hasher)(k);
143151 for (int hi = 0 ; hi < 3 ; hi++) {
144- int index = getHashFromHash (hash, hi, blockLength );
152+ int index = getHashFromHash (hash, hi, segmentCount );
145153 int b = index >> blockShift;
146154 int i2 = tmpc[b];
147155 tmp[(b << blockShift) + i2] = hash;
@@ -222,7 +230,7 @@ Status XorFuseFilter<ItemType, FingerprintType, HashFamily>::AddAll(
222230 }
223231 long hash = t2vals[i].t2 ;
224232 for (int hi = 0 ; hi < 3 ; hi++) {
225- int h = getHashFromHash (hash, hi, blockLength );
233+ int h = getHashFromHash (hash, hi, segmentCount );
226234 if (h == i) {
227235 found = (uint8_t ) hi;
228236 t2vals[i].t2count = 0 ;
@@ -253,7 +261,6 @@ Status XorFuseFilter<ItemType, FingerprintType, HashFamily>::AddAll(
253261 std::cout << " WARNING: hashIndex " << hashIndex << " \n " ;
254262 if (hashIndex >= 0 ) {
255263 std::cout << (end - start) << " keys; arrayLength " << arrayLength
256- << " blockLength " << blockLength
257264 << " reverseOrderPos " << reverseOrderPos << " \n " ;
258265 }
259266
@@ -275,7 +282,7 @@ Status XorFuseFilter<ItemType, FingerprintType, HashFamily>::AddAll(
275282 // unless the other two entries are already occupied
276283 FingerprintType xor2 = fingerprint (hash);
277284 for (int hi = 0 ; hi < 3 ; hi++) {
278- size_t h = getHashFromHash (hash, hi, blockLength );
285+ size_t h = getHashFromHash (hash, hi, segmentCount );
279286 if (found == hi) {
280287 change = h;
281288 } else {
@@ -299,12 +306,11 @@ Status XorFuseFilter<ItemType, FingerprintType, HashFamily>::Contain(
299306 const ItemType &key) const {
300307 uint64_t hash = (*hasher)(key);
301308 FingerprintType f = fingerprint (hash);
302- uint32_t r0 = (uint32_t ) hash;
303- uint32_t r1 = (uint32_t ) rotl64 (hash, 21 );
304- uint32_t r2 = (uint32_t ) rotl64 (hash, 42 );
305- uint32_t h0 = reduce (r0, blockLength);
306- uint32_t h1 = reduce (r1, blockLength) + blockLength;
307- uint32_t h2 = reduce (r2, blockLength) + 2 * blockLength;
309+ __uint128_t x = (__uint128_t ) hash * (__uint128_t ) segmentCount;
310+ int seg = (uint64_t )(x >> 64 );
311+ int h0 = (seg + 0 ) * segmentLength + (size_t )((hash >> (0 * segmentLengthBits)) & (segmentLength - 1 ));
312+ int h1 = (seg + 1 ) * segmentLength + (size_t )((hash >> (1 * segmentLengthBits)) & (segmentLength - 1 ));
313+ int h2 = (seg + 2 ) * segmentLength + (size_t )((hash >> (2 * segmentLengthBits)) & (segmentLength - 1 ));
308314 f ^= fingerprints[h0] ^ fingerprints[h1] ^ fingerprints[h2];
309315 return f == 0 ? Ok : NotFound;
310316}
0 commit comments