@@ -65,6 +65,8 @@ inline uint32_t reduce(uint32_t hash, uint32_t n) {
6565 return (uint32_t )(((uint64_t )hash * n) >> 32 );
6666}
6767
68+ // CountingBloomFilter --------------------------------------------------------------------------------------
69+
6870template <typename ItemType, size_t bits_per_item, bool branchless,
6971 typename HashFamily = TwoIndependentMultiplyShift,
7072 int k = (int )((double )bits_per_item * 0.693147180559945 + 0.5 )>
@@ -183,7 +185,7 @@ Status CountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
183185 return Ok;
184186}
185187
186- // --------------------------------------------------------------------------------------
188+ // SuccinctCountingBloomFilter --------------------------------------------------------------------------------------
187189
188190// #define VERIFY_COUNT
189191
@@ -467,5 +469,283 @@ Status SuccinctCountingBloomFilter<ItemType, bits_per_item, branchless, HashFami
467469 return Ok;
468470}
469471
472+ // SuccinctCountingBlockedBloomFilter --------------------------------------------------------------------------------------
473+
474+
475+ // #define VERIFY_COUNT
476+
477+ template <typename ItemType, size_t bits_per_item, typename HashFamily,
478+ int k = (int )((double )bits_per_item * 0.693147180559945 + 0.5 )>
479+ class SuccinctCountingBlockedBloomFilter {
480+ private:
481+ const int bucketCount;
482+ HashFamily hasher;
483+ uint64_t *data;
484+ uint64_t *counts;
485+ uint64_t *overflow;
486+ size_t overflowLength;
487+ size_t nextFreeOverflow;
488+ #ifdef VERIFY_COUNT
489+ uint8_t *realCount;
490+ #endif
491+
492+ void Increment (size_t group, int bit);
493+ void Decrement (size_t group, int bit);
494+ int ReadCount (size_t group, int bit);
495+
496+ public:
497+ explicit SuccinctCountingBlockedBloomFilter (const int capacity);
498+ ~SuccinctCountingBlockedBloomFilter () noexcept ;
499+ void Add (const uint64_t key) noexcept ;
500+ void Remove (const uint64_t key) noexcept ;
501+ bool Contain (const uint64_t key) const noexcept ;
502+ uint64_t SizeInBytes () const {
503+ return 2 * 64 * bucketCount + 8 * overflowLength;
504+ }
505+ };
506+
507+ template <typename ItemType, size_t bits_per_item, typename HashFamily, int k>
508+ SuccinctCountingBlockedBloomFilter<ItemType, bits_per_item, HashFamily, k>::
509+ SuccinctCountingBlockedBloomFilter (const int capacity)
510+ : bucketCount(capacity * bits_per_item / 512 ), hasher() {
511+ const size_t alloc_size = bucketCount * (512 / 8 );
512+ const int malloc_failed =
513+ posix_memalign (reinterpret_cast <void **>(&data), 64 , alloc_size);
514+ if (malloc_failed)
515+ throw ::std::bad_alloc ();
516+ memset (data, 0 , alloc_size);
517+ size_t arrayLength = bucketCount * 8 ;
518+ overflowLength = 100 + arrayLength / 100 * 36 ;
519+ counts = new uint64_t [arrayLength]();
520+ overflow = new uint64_t [overflowLength]();
521+ #ifdef VERIFY_COUNT
522+ realCount = new uint8_t [arrayLength * 64 ]();
523+ #endif
524+ nextFreeOverflow = 0 ;
525+ for (size_t i = 0 ; i < overflowLength; i += 8 ) {
526+ overflow[i] = i + 8 ;
527+ }
528+ }
529+
530+ template <typename ItemType, size_t bits_per_item, typename HashFamily, int k>
531+ SuccinctCountingBlockedBloomFilter<ItemType, bits_per_item, HashFamily, k>::
532+ ~SuccinctCountingBlockedBloomFilter () noexcept {
533+ free (data);
534+ delete[] counts;
535+ delete[] overflow;
536+ }
537+
538+ static inline uint64_t rotl64 (uint64_t n, unsigned int c) {
539+ // assumes width is a power of 2
540+ const unsigned int mask = (CHAR_BIT * sizeof (n) - 1 );
541+ // assert ( (c<=mask) &&"rotate by type width or more");
542+ c &= mask;
543+ return (n << c) | (n >> ((-c) & mask));
544+ }
545+
546+ template <typename ItemType, size_t bits_per_item, typename HashFamily, int k>
547+ void SuccinctCountingBlockedBloomFilter<ItemType, bits_per_item, HashFamily, k>::
548+ Add (const uint64_t key) noexcept {
549+ const auto hash = hasher (key);
550+ const uint32_t bucket_start = reduce (rotl64 (hash, 32 ), bucketCount) * 8 ;
551+ uint32_t a = (uint32_t )hash;
552+ if (k >= 3 ) {
553+ Increment (bucket_start + ((a >> 0 ) & 7 ), (a >> 3 ) & 0x3f );
554+ Increment (bucket_start + ((a >> 9 ) & 7 ), (a >> 12 ) & 0x3f );
555+ Increment (bucket_start + ((a >> 18 ) & 7 ), (a >> 21 ) & 0x3f );
556+ // data[bucket_start + ((a >> 0) & 7)] |= 1ULL << ((a >> 3) & 0x3f);
557+ // data[bucket_start + ((a >> 9) & 7)] |= 1ULL << ((a >> 12) & 0x3f);
558+ // data[bucket_start + ((a >> 18) & 7)] |= 1ULL << ((a >> 21) & 0x3f);
559+ }
560+ uint32_t b = (uint32_t )(hash >> 32 );
561+ for (int i = 3 ; i < k; i++) {
562+ a += b;
563+ Increment (bucket_start + (a & 7 ), (a >> 3 ) & 0x3f );
564+ // data[bucket_start + (a & 7)] |= 1ULL << ((a >> 3) & 0x3f);
565+ }
566+ }
567+
568+ template <typename ItemType, size_t bits_per_item, typename HashFamily, int k>
569+ void SuccinctCountingBlockedBloomFilter<ItemType, bits_per_item, HashFamily, k>::
570+ Increment (size_t group, int bit) {
571+ #ifdef VERIFY_COUNT
572+ realCount[(group << 6 ) + bit]++;
573+ #endif
574+ uint64_t m = data[group];
575+ uint64_t c = counts[group];
576+ if ((c & 0xc000000000000000ULL ) != 0 ) {
577+ // an overflow entry, or overflowing now
578+ size_t index;
579+ if ((c & 0x8000000000000000ULL ) == 0 ) {
580+ // convert to an overflow entry
581+ // allocate overflow
582+ index = nextFreeOverflow;
583+ if (index >= overflowLength) {
584+ ::std::cout << " ERROR: overflow too small\n " ;
585+ data[group] |= 1ULL << bit;
586+ return ;
587+ }
588+ nextFreeOverflow = (size_t ) overflow[index];
589+ for (int i = 0 ; i < 8 ; i++) {
590+ overflow[index + i] = 0 ;
591+ }
592+ // convert to a pointer
593+ for (int i = 0 ; i < 64 ; i++) {
594+ int n = ReadCount (group, i);
595+ overflow[index + i / 8 ] += n * (1ULL << (i * 8 ));
596+ }
597+ uint64_t count = 64 ;
598+ c = 0x8000000000000000ULL | (count << 32 ) | index;
599+ counts[group] = c;
600+ } else {
601+ // already
602+ index = (size_t ) (c & 0x0fffffffULL );
603+ c += 1ULL << 32 ;
604+ counts[group] = c;
605+ }
606+ overflow[index + bit / 8 ] += (1ULL << (bit * 8 ));
607+ data[group] |= 1ULL << bit;
608+ } else {
609+ data[group] |= 1ULL << bit;
610+ int bitsBefore = bitCount64 (m & (0xffffffffffffffffULL >> (63 - bit)));
611+ int before = select64 ((c << 1 ) | 1 , bitsBefore);
612+ int d = (m >> bit) & 1 ;
613+ int insertAt = before - d;
614+ uint64_t mask = (1ULL << insertAt) - 1 ;
615+ uint64_t left = c & ~mask;
616+ uint64_t right = c & mask;
617+ c = (left << 1 ) | ((1ULL ^ d) << insertAt) | right;
618+ counts[group] = c;
619+ }
620+ #ifdef VERIFY_COUNT
621+ for (int b = 0 ; b < 64 ; b++) {
622+ if (realCount[(group << 6 ) + b] != ReadCount (group, b)) {
623+ ::std::cout << " group " << group << " /" << b << " of " << bit << " \n " ;
624+ }
625+ }
626+ #endif
627+ }
628+
629+ template <typename ItemType, size_t bits_per_item, typename HashFamily, int k>
630+ int SuccinctCountingBlockedBloomFilter<ItemType, bits_per_item, HashFamily, k>::
631+ ReadCount (size_t group, int bit) {
632+ uint64_t m = data[group];
633+ uint64_t d = (m >> bit) & 1 ;
634+ if (d == 0 ) {
635+ return 0 ;
636+ }
637+ uint64_t c = counts[group];
638+ if ((c & 0x8000000000000000ULL ) != 0 ) {
639+ size_t index = (size_t ) (c & 0x0fffffffULL );
640+ uint64_t n = overflow[index + bit / 8 ];
641+ n >>= 8 * (bit & 0xff );
642+ return (int ) (n & 0xff );
643+ }
644+ int bitsBefore = bitCount64 (m & (0xffffffffffffffffULL >> (63 - bit)));
645+ int bitPos = select64 (c, bitsBefore - 1 );
646+ uint64_t y = ((c << (63 - bitPos)) << 1 ) | (1ULL << (63 - bitPos));
647+ return numberOfLeadingZeros64 (y) + 1 ;
648+ }
649+
650+ template <typename ItemType, size_t bits_per_item, typename HashFamily, int k>
651+ void SuccinctCountingBlockedBloomFilter<ItemType, bits_per_item, HashFamily, k>::
652+ Remove (const uint64_t key) noexcept {
653+ const auto hash = hasher (key);
654+ const uint32_t bucket_start = reduce (rotl64 (hash, 32 ), bucketCount) * 8 ;
655+ uint32_t a = (uint32_t )hash;
656+ if (k >= 3 ) {
657+ Decrement (bucket_start + ((a >> 0 ) & 7 ), (a >> 3 ) & 0x3f );
658+ Decrement (bucket_start + ((a >> 9 ) & 7 ), (a >> 12 ) & 0x3f );
659+ Decrement (bucket_start + ((a >> 18 ) & 7 ), (a >> 21 ) & 0x3f );
660+ }
661+ uint32_t b = (uint32_t )(hash >> 32 );
662+ for (int i = 3 ; i < k; i++) {
663+ a += b;
664+ Decrement (bucket_start + (a & 7 ), (a >> 3 ) & 0x3f );
665+ }
666+ }
667+
668+ template <typename ItemType, size_t bits_per_item, typename HashFamily, int k>
669+ void SuccinctCountingBlockedBloomFilter<ItemType, bits_per_item, HashFamily, k>::
670+ Decrement (size_t group, int bit) {
671+ #ifdef VERIFY_COUNT
672+ realCount[(group << 6 ) + bit]--;
673+ #endif
674+ uint64_t m = data[group];
675+ uint64_t c = counts[group];
676+ if ((c & 0x8000000000000000ULL ) != 0 ) {
677+ // an overflow entry
678+ size_t index = (size_t ) (c & 0x0fffffffULL );
679+ size_t count = (size_t ) (c >> 32 ) & 0x0fffffffULL ;
680+ c -= 1ULL << 32 ;
681+ counts[group] = c;
682+ uint64_t n = overflow[index + bit / 8 ];
683+ overflow[index + bit / 8 ] = n - (1ULL << (bit * 8 ));
684+ n >>= 8 * (bit & 0xf );
685+ if ((n & 0xff ) == 1 ) {
686+ data[group] &= ~(1ULL << bit);
687+ }
688+ if (count < 64 ) {
689+ // convert back to an inline entry, and free up the overflow entry
690+ uint64_t c2 = 0 ;
691+ for (int j = 63 ; j >= 0 ; j--) {
692+ int cj = (int ) ((overflow[index + j / 8 ] >> (8 * j)) & 0xff );
693+ if (cj > 0 ) {
694+ c2 = ((c2 << 1 ) | 1 ) << (cj - 1 );
695+ }
696+ }
697+ counts[group] = c2;
698+ // free overflow
699+ overflow[index] = nextFreeOverflow;
700+ nextFreeOverflow = index;
701+ }
702+ } else {
703+ int bitsBefore = bitCount64 (m & (0xffffffffffffffffULL >> (63 - bit)));
704+ int before = select64 ((c << 1 ) | 1 , bitsBefore) - 1 ;
705+ int removeAt = max (0 , before - 1 );
706+ // remove the bit from the counter
707+ uint64_t mask = (1ULL << removeAt) - 1 ;
708+ uint64_t left = (c >> 1 ) & ~mask;
709+ uint64_t right= c & mask;
710+ counts[group] = left | right;
711+ uint64_t removed = (c >> removeAt) & 1 ;
712+ // possibly reset the data bit
713+ data[group] = m & ~(removed << bit);
714+ }
715+ #ifdef VERIFY_COUNT
716+ for (int b = 0 ; b < 64 ; b++) {
717+ if (realCount[(group << 6 ) + b] != ReadCount (group, b)) {
718+ ::std::cout << " group- " << group << " /" << b << " of " << bit << " \n " ;
719+ }
720+ }
721+ #endif
722+ }
723+
724+ template <typename ItemType, size_t bits_per_item, typename HashFamily, int k>
725+ bool SuccinctCountingBlockedBloomFilter<ItemType, bits_per_item, HashFamily, k>::
726+ Contain (const uint64_t key) const noexcept {
727+ const auto hash = hasher (key);
728+ const uint32_t bucket_start = reduce (rotl64 (hash, 32 ), bucketCount) * 8 ;
729+ uint32_t a = (uint32_t )hash;
730+ char ok = 1 ;
731+ if (k >= 3 ) {
732+ ok &= data[bucket_start + ((a >> 0 ) & 7 )] >> ((a >> 3 ) & 0x3f );
733+ ok &= data[bucket_start + ((a >> 9 ) & 7 )] >> ((a >> 12 ) & 0x3f );
734+ ok &= data[bucket_start + ((a >> 18 ) & 7 )] >> ((a >> 21 ) & 0x3f );
735+ }
736+ if (!ok) {
737+ return ok;
738+ }
739+ uint32_t b = (uint32_t )(hash >> 32 );
740+ for (int i = 3 ; i < k; i++) {
741+ a += b;
742+ ok &= data[bucket_start + (a & 7 )] >> ((a >> 3 ) & 63 );
743+ if (!ok) {
744+ return ok;
745+ }
746+ }
747+ return ok;
748+ }
749+
470750}
471751#endif
0 commit comments