@@ -73,20 +73,21 @@ class CountingBloomFilter {
7373 uint64_t *data;
7474 size_t arrayLength;
7575 HashFamily hasher;
76+ const int blockShift = 16 ;
77+ const int blockLen = 1 << blockShift;
78+
79+ void AddBlock (uint32_t *tmp, int block, int len);
7680
7781public:
7882 explicit CountingBloomFilter (const size_t n) : hasher() {
7983 size_t bitCount = 4 * n * bits_per_item;
8084 this ->arrayLength = (bitCount + 63 ) / 64 ;
8185 data = new uint64_t [arrayLength]();
8286 }
83-
8487 ~CountingBloomFilter () { delete[] data; }
85-
8688 Status Add (const ItemType &item);
87-
89+ Status AddAll ( const vector<ItemType> data, const size_t start, const size_t end);
8890 Status Contain (const ItemType &item) const ;
89-
9091 size_t SizeInBytes () const { return arrayLength * 8 ; }
9192};
9293
@@ -105,6 +106,49 @@ Status CountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
105106 return Ok;
106107}
107108
109+ template <typename ItemType, size_t bits_per_item, bool branchless,
110+ typename HashFamily, int k>
111+ void CountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
112+ AddBlock (uint32_t *tmp, int block, int len) {
113+ for (int i = 0 ; i < len; i++) {
114+ int index = tmp[(block << blockShift) + i];
115+ data[index >> 6 ] += 1ULL << ((index << 2 ) & 63 );
116+ }
117+ }
118+
119+ template <typename ItemType, size_t bits_per_item, bool branchless,
120+ typename HashFamily, int k>
121+ Status CountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
122+ AddAll (const vector<ItemType> keys, const size_t start, const size_t end) {
123+ int blocks = 1 + arrayLength / blockLen;
124+ uint32_t *tmp = new uint32_t [blocks * blockLen];
125+ int *tmpLen = new int [blocks]();
126+ for (size_t i = start; i < end; i++) {
127+ uint64_t key = keys[i];
128+ uint64_t hash = hasher (key);
129+ uint32_t a = (uint32_t )(hash >> 32 );
130+ uint32_t b = (uint32_t )hash;
131+ for (int j = 0 ; j < k; j++) {
132+ int index = reduce (a, this ->arrayLength );
133+ int block = index >> blockShift;
134+ int len = tmpLen[block];
135+ tmp[(block << blockShift) + len] = (index << 6 ) + (a & 63 );
136+ tmpLen[block] = len + 1 ;
137+ if (len + 1 == blockLen) {
138+ AddBlock (tmp, block, len + 1 );
139+ tmpLen[block] = 0 ;
140+ }
141+ a += b;
142+ }
143+ }
144+ for (int block = 0 ; block < blocks; block++) {
145+ AddBlock (tmp, block, tmpLen[block]);
146+ }
147+ delete[] tmp;
148+ delete[] tmpLen;
149+ return Ok;
150+ }
151+
108152template <typename ItemType, size_t bits_per_item, bool branchless,
109153 typename HashFamily, int k>
110154Status CountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
@@ -137,9 +181,12 @@ class SuccinctCountingBloomFilter {
137181 size_t overflowLength;
138182 size_t nextFreeOverflow;
139183 HashFamily hasher;
184+ const int blockShift = 13 ;
185+ const int blockLen = 1 << blockShift;
140186
141187 void Increment (size_t group, int bit);
142188 int ReadCount (size_t group, int bit);
189+ void AddBlock (uint32_t *tmp, int block, int len);
143190
144191public:
145192 explicit SuccinctCountingBloomFilter (const size_t n) : hasher() {
@@ -155,13 +202,10 @@ class SuccinctCountingBloomFilter {
155202 overflow[i] = i + 4 ;
156203 }
157204 }
158-
159205 ~SuccinctCountingBloomFilter () { delete[] data; delete[] counts; delete[] overflow; }
160-
161206 Status Add (const ItemType &item);
162-
207+ Status AddAll ( const vector<ItemType> data, const size_t start, const size_t end);
163208 Status Contain (const ItemType &item) const ;
164-
165209 size_t SizeInBytes () const { return arrayLength * 8 * 2 + overflowLength * 8 ; }
166210};
167211
@@ -180,6 +224,50 @@ Status SuccinctCountingBloomFilter<ItemType, bits_per_item, branchless, HashFami
180224 return Ok;
181225}
182226
227+ template <typename ItemType, size_t bits_per_item, bool branchless,
228+ typename HashFamily, int k>
229+ void SuccinctCountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
230+ AddBlock (uint32_t *tmp, int block, int len) {
231+ for (int i = 0 ; i < len; i++) {
232+ uint32_t index = tmp[(block << blockShift) + i];
233+ uint32_t group = index >> 6 ;
234+ Increment (group, index & 63 );
235+ }
236+ }
237+
238+ template <typename ItemType, size_t bits_per_item, bool branchless,
239+ typename HashFamily, int k>
240+ Status SuccinctCountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
241+ AddAll (const vector<ItemType> keys, const size_t start, const size_t end) {
242+ int blocks = 1 + arrayLength / blockLen;
243+ uint32_t *tmp = new uint32_t [blocks * blockLen];
244+ int *tmpLen = new int [blocks]();
245+ for (size_t i = start; i < end; i++) {
246+ uint64_t key = keys[i];
247+ uint64_t hash = hasher (key);
248+ uint32_t a = (uint32_t )(hash >> 32 );
249+ uint32_t b = (uint32_t )hash;
250+ for (int j = 0 ; j < k; j++) {
251+ int index = reduce (a, this ->arrayLength );
252+ int block = index >> blockShift;
253+ int len = tmpLen[block];
254+ tmp[(block << blockShift) + len] = (index << 6 ) + (a & 63 );
255+ tmpLen[block] = len + 1 ;
256+ if (len + 1 == blockLen) {
257+ AddBlock (tmp, block, len + 1 );
258+ tmpLen[block] = 0 ;
259+ }
260+ a += b;
261+ }
262+ }
263+ for (int block = 0 ; block < blocks; block++) {
264+ AddBlock (tmp, block, tmpLen[block]);
265+ }
266+ delete[] tmp;
267+ delete[] tmpLen;
268+ return Ok;
269+ }
270+
183271template <typename ItemType, size_t bits_per_item, bool branchless,
184272 typename HashFamily, int k>
185273void SuccinctCountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
0 commit comments