|
13 | 13 | // highly unlikely |
14 | 14 | #endif |
15 | 15 |
|
| 16 | +static int binary_fuse_cmpfunc(const void * a, const void * b) { |
| 17 | + return ( *(const uint64_t*)a - *(const uint64_t*)b ); |
| 18 | +} |
| 19 | + |
| 20 | +static size_t binary_fuse_sort_and_remove_dup(uint64_t* keys, size_t length) { |
| 21 | + qsort(keys, length, sizeof(uint64_t), binary_fuse_cmpfunc); |
| 22 | + size_t j = 0; |
| 23 | + for(size_t i = 1; i < length; i++) { |
| 24 | + if(keys[i] != keys[i-1]) { |
| 25 | + keys[j] = keys[i]; |
| 26 | + j++; |
| 27 | + } |
| 28 | + } |
| 29 | + return j+1; |
| 30 | +} |
| 31 | + |
16 | 32 | /** |
17 | 33 | * We start with a few utilities. |
18 | 34 | ***/ |
@@ -60,13 +76,73 @@ typedef struct binary_fuse8_s { |
60 | 76 | uint8_t *Fingerprints; |
61 | 77 | } binary_fuse8_t; |
62 | 78 |
|
63 | | -#ifdef _MSC_VER |
64 | | -// Windows programmers who target 32-bit platform may need help: |
65 | | -static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) { return __umulh(a, b); } |
66 | | -#else |
| 79 | +// #ifdefs adapted from: |
| 80 | +// https://stackoverflow.com/a/50958815 |
| 81 | +#ifdef __SIZEOF_INT128__ // compilers supporting __uint128, e.g., gcc, clang |
67 | 82 | static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) { |
68 | 83 | return ((__uint128_t)a * b) >> 64; |
69 | 84 | } |
| 85 | +#elif defined(_M_X64) || defined(_MARM64) // MSVC |
| 86 | +static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) { |
| 87 | + return __umulh(a, b); |
| 88 | +} |
| 89 | +#elif defined(_M_IA64) // also MSVC |
| 90 | +static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) { |
| 91 | + unsigned __int64 hi; |
| 92 | + (void) _umul128(a, b, &hi); |
| 93 | + return hi; |
| 94 | +} |
| 95 | +#else // portable implementation using uint64_t |
| 96 | +static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) { |
| 97 | + // Adapted from: |
| 98 | + // https://stackoverflow.com/a/51587262 |
| 99 | + |
| 100 | + /* |
| 101 | + This is implementing schoolbook multiplication: |
| 102 | +
|
| 103 | + a1 a0 |
| 104 | + X b1 b0 |
| 105 | + ------------- |
| 106 | + 00 LOW PART |
| 107 | + ------------- |
| 108 | + 00 |
| 109 | + 10 10 MIDDLE PART |
| 110 | + + 01 |
| 111 | + ------------- |
| 112 | + 01 |
| 113 | + + 11 11 HIGH PART |
| 114 | + ------------- |
| 115 | + */ |
| 116 | + |
| 117 | + const uint64_t a0 = (uint32_t) a; |
| 118 | + const uint64_t a1 = a >> 32; |
| 119 | + const uint64_t b0 = (uint32_t) b; |
| 120 | + const uint64_t b1 = b >> 32; |
| 121 | + const uint64_t p11 = a1 * b1; |
| 122 | + const uint64_t p01 = a0 * b1; |
| 123 | + const uint64_t p10 = a1 * b0; |
| 124 | + const uint64_t p00 = a0 * b0; |
| 125 | + |
| 126 | + // 64-bit product + two 32-bit values |
| 127 | + const uint64_t middle = p10 + (p00 >> 32) + (uint32_t) p01; |
| 128 | + |
| 129 | + /* |
| 130 | + Proof that 64-bit products can accumulate two more 32-bit values |
| 131 | + without overflowing: |
| 132 | +
|
| 133 | + Max 32-bit value is 2^32 - 1. |
| 134 | + PSum = (2^32-1) * (2^32-1) + (2^32-1) + (2^32-1) |
| 135 | + = 2^64 - 2^32 - 2^32 + 1 + 2^32 - 1 + 2^32 - 1 |
| 136 | + = 2^64 - 1 |
| 137 | + Therefore the high half below cannot overflow regardless of input. |
| 138 | + */ |
| 139 | + |
| 140 | + // high half |
| 141 | + return p11 + (middle >> 32) + (p01 >> 32); |
| 142 | + |
| 143 | + // low half (which we don't care about, but here it is) |
| 144 | + // (middle << 32) | (uint32_t) p00; |
| 145 | +} |
70 | 146 | #endif |
71 | 147 |
|
72 | 148 | typedef struct binary_hashes_s { |
@@ -151,7 +227,7 @@ static inline bool binary_fuse8_allocate(uint32_t size, |
151 | 227 | filter->SegmentLength = 262144; |
152 | 228 | } |
153 | 229 | filter->SegmentLengthMask = filter->SegmentLength - 1; |
154 | | - double sizeFactor = binary_fuse_calculate_size_factor(arity, size); |
| 230 | + double sizeFactor = size <= 1 ? 0 : binary_fuse_calculate_size_factor(arity, size); |
155 | 231 | uint32_t capacity = size <= 1 ? 0 : (uint32_t)(round((double)size * sizeFactor)); |
156 | 232 | uint32_t initSegmentCount = |
157 | 233 | (capacity + filter->SegmentLength - 1) / filter->SegmentLength - |
@@ -197,7 +273,7 @@ static inline uint8_t binary_fuse_mod3(uint8_t x) { |
197 | 273 | // The caller is responsable for calling binary_fuse8_allocate(size,filter) |
198 | 274 | // before. For best performance, the caller should ensure that there are not too |
199 | 275 | // many duplicated keys. |
200 | | -static inline bool binary_fuse8_populate(const uint64_t *keys, uint32_t size, |
| 276 | +static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size, |
201 | 277 | binary_fuse8_t *filter) { |
202 | 278 | uint64_t rng_counter = 0x726b2b9d438b9d4d; |
203 | 279 | filter->Seed = binary_fuse_rng_splitmix64(&rng_counter); |
@@ -230,17 +306,15 @@ static inline bool binary_fuse8_populate(const uint64_t *keys, uint32_t size, |
230 | 306 | for (int loop = 0; true; ++loop) { |
231 | 307 | if (loop + 1 > XOR_MAX_ITERATIONS) { |
232 | 308 | // The probability of this happening is lower than the |
233 | | - // the cosmic-ray probability (i.e., a cosmic ray corrupts your system), |
234 | | - // but if it happens, we just fill the fingerprint with ones which |
235 | | - // will flag all possible keys as 'possible', ensuring a correct result. |
| 309 | + // the cosmic-ray probability (i.e., a cosmic ray corrupts your system) |
236 | 310 | memset(filter->Fingerprints, ~0, filter->ArrayLength); |
237 | 311 | free(alone); |
238 | 312 | free(t2count); |
239 | 313 | free(reverseH); |
240 | 314 | free(t2hash); |
241 | 315 | free(reverseOrder); |
242 | 316 | free(startPos); |
243 | | - return true; |
| 317 | + return false; |
244 | 318 | } |
245 | 319 |
|
246 | 320 | for (uint32_t i = 0; i < block; i++) { |
@@ -295,9 +369,9 @@ static inline bool binary_fuse8_populate(const uint64_t *keys, uint32_t size, |
295 | 369 | error = (t2count[h2] < 4) ? 1 : error; |
296 | 370 | } |
297 | 371 | if(error) { |
298 | | - memset(reverseOrder, 0, sizeof(uint64_t[size])); |
299 | | - memset(t2count, 0, sizeof(uint8_t[capacity])); |
300 | | - memset(t2hash, 0, sizeof(uint64_t[capacity])); |
| 372 | + memset(reverseOrder, 0, sizeof(uint64_t) * size); |
| 373 | + memset(t2count, 0, sizeof(uint8_t) * capacity); |
| 374 | + memset(t2hash, 0, sizeof(uint64_t) * capacity); |
301 | 375 | filter->Seed = binary_fuse_rng_splitmix64(&rng_counter); |
302 | 376 | continue; |
303 | 377 | } |
@@ -345,10 +419,12 @@ static inline bool binary_fuse8_populate(const uint64_t *keys, uint32_t size, |
345 | 419 | // success |
346 | 420 | size = stacksize; |
347 | 421 | break; |
| 422 | + } else if(duplicates > 0) { |
| 423 | + size = binary_fuse_sort_and_remove_dup(keys, size); |
348 | 424 | } |
349 | | - memset(reverseOrder, 0, sizeof(uint64_t[size])); |
350 | | - memset(t2count, 0, sizeof(uint8_t[capacity])); |
351 | | - memset(t2hash, 0, sizeof(uint64_t[capacity])); |
| 425 | + memset(reverseOrder, 0, sizeof(uint64_t) * size); |
| 426 | + memset(t2count, 0, sizeof(uint8_t) * capacity); |
| 427 | + memset(t2hash, 0, sizeof(uint64_t) * capacity); |
352 | 428 | filter->Seed = binary_fuse_rng_splitmix64(&rng_counter); |
353 | 429 | } |
354 | 430 |
|
@@ -439,7 +515,7 @@ static inline bool binary_fuse16_allocate(uint32_t size, |
439 | 515 | } |
440 | 516 | filter->SegmentLengthMask = filter->SegmentLength - 1; |
441 | 517 | double sizeFactor = size <= 1 ? 0 : binary_fuse_calculate_size_factor(arity, size); |
442 | | - uint32_t capacity = (uint32_t)(round((double)size * sizeFactor)); |
| 518 | + uint32_t capacity = size <= 1 ? 0 : (uint32_t)(round((double)size * sizeFactor)); |
443 | 519 | uint32_t initSegmentCount = |
444 | 520 | (capacity + filter->SegmentLength - 1) / filter->SegmentLength - |
445 | 521 | (arity - 1); |
@@ -481,7 +557,7 @@ static inline void binary_fuse16_free(binary_fuse16_t *filter) { |
481 | 557 | // The caller is responsable for calling binary_fuse8_allocate(size,filter) |
482 | 558 | // before. For best performance, the caller should ensure that there are not too |
483 | 559 | // many duplicated keys. |
484 | | -static inline bool binary_fuse16_populate(const uint64_t *keys, uint32_t size, |
| 560 | +static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size, |
485 | 561 | binary_fuse16_t *filter) { |
486 | 562 | uint64_t rng_counter = 0x726b2b9d438b9d4d; |
487 | 563 | filter->Seed = binary_fuse_rng_splitmix64(&rng_counter); |
@@ -514,17 +590,14 @@ static inline bool binary_fuse16_populate(const uint64_t *keys, uint32_t size, |
514 | 590 | for (int loop = 0; true; ++loop) { |
515 | 591 | if (loop + 1 > XOR_MAX_ITERATIONS) { |
516 | 592 | // The probability of this happening is lower than the |
517 | | - // the cosmic-ray probability (i.e., a cosmic ray corrupts your system), |
518 | | - // but if it happens, we just fill the fingerprint with ones which |
519 | | - // will flag all possible keys as 'possible', ensuring a correct result. |
520 | | - memset(filter->Fingerprints, ~0, filter->ArrayLength * sizeof(uint16_t)); |
| 593 | + // the cosmic-ray probability (i.e., a cosmic ray corrupts your system). |
521 | 594 | free(alone); |
522 | 595 | free(t2count); |
523 | 596 | free(reverseH); |
524 | 597 | free(t2hash); |
525 | 598 | free(reverseOrder); |
526 | 599 | free(startPos); |
527 | | - return true; |
| 600 | + return false; |
528 | 601 | } |
529 | 602 |
|
530 | 603 | for (uint32_t i = 0; i < block; i++) { |
@@ -579,9 +652,9 @@ static inline bool binary_fuse16_populate(const uint64_t *keys, uint32_t size, |
579 | 652 | error = (t2count[h2] < 4) ? 1 : error; |
580 | 653 | } |
581 | 654 | if(error) { |
582 | | - memset(reverseOrder, 0, sizeof(uint64_t[size])); |
583 | | - memset(t2count, 0, sizeof(uint8_t[capacity])); |
584 | | - memset(t2hash, 0, sizeof(uint64_t[capacity])); |
| 655 | + memset(reverseOrder, 0, sizeof(uint64_t) * size); |
| 656 | + memset(t2count, 0, sizeof(uint8_t) * capacity); |
| 657 | + memset(t2hash, 0, sizeof(uint64_t) * capacity); |
585 | 658 | filter->Seed = binary_fuse_rng_splitmix64(&rng_counter); |
586 | 659 | continue; |
587 | 660 | } |
@@ -629,10 +702,12 @@ static inline bool binary_fuse16_populate(const uint64_t *keys, uint32_t size, |
629 | 702 | // success |
630 | 703 | size = stacksize; |
631 | 704 | break; |
| 705 | + } else if(duplicates > 0) { |
| 706 | + size = binary_fuse_sort_and_remove_dup(keys, size); |
632 | 707 | } |
633 | | - memset(reverseOrder, 0, sizeof(uint64_t[size])); |
634 | | - memset(t2count, 0, sizeof(uint8_t[capacity])); |
635 | | - memset(t2hash, 0, sizeof(uint64_t[capacity])); |
| 708 | + memset(reverseOrder, 0, sizeof(uint64_t) * size); |
| 709 | + memset(t2count, 0, sizeof(uint8_t) * capacity); |
| 710 | + memset(t2hash, 0, sizeof(uint64_t) * capacity); |
636 | 711 | filter->Seed = binary_fuse_rng_splitmix64(&rng_counter); |
637 | 712 | } |
638 | 713 |
|
|
0 commit comments