Skip to content

Commit 29dc59b

Browse files
committed
Counting Bloom filter: AddAll
1 parent e4aca7b commit 29dc59b

File tree

2 files changed

+103
-15
lines changed

2 files changed

+103
-15
lines changed

benchmarks/bulk-insert-and-query.cc

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,7 @@ struct FilterAPI<SuccinctCountingBloomFilter<ItemType, bits_per_item, branchless
511511
table->Add(key);
512512
}
513513
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
514-
throw std::runtime_error("Unsupported");
514+
table->AddAll(keys, start, end);
515515
}
516516
CONTAIN_ATTRIBUTES
517517
static bool Contain(uint64_t key, const Table * table) {
@@ -748,7 +748,7 @@ int main(int argc, char * argv[]) {
748748
{41,"Branchless Bloom12 (addall)"},
749749
{42,"Branchless Bloom16 (addall)"},
750750
{43,"Branchless Bloom8 (addall)"},
751-
{44,"Counting Bloom8"}, {45,"Succinct Counting Bloom8"},
751+
{44,"Counting Bloom10 (addall)"}, {45,"Succ Counting Bloom10 (addall)"},
752752
{70,"SimpleBlockedBloom"}
753753
};
754754
#elif defined( __AVX2__)
@@ -764,7 +764,7 @@ int main(int argc, char * argv[]) {
764764
{41,"Branchless Bloom12 (addall)"},
765765
{42,"Branchless Bloom16 (addall)"},
766766
{43,"Branchless Bloom8 (addall)"},
767-
{44,"Counting Bloom8"}, {45,"Succinct Counting Bloom8"},
767+
{44,"Counting Bloom10 (addall)"}, {45,"Succ Counting Bloom10 (addall)"},
768768
{63,"BlockedBloom16"}, {64,"BlockedBloom64"},
769769
{70,"SimpleBlockedBloom"}
770770
};
@@ -780,7 +780,7 @@ int main(int argc, char * argv[]) {
780780
{41,"Branchless Bloom12 (addall)"},
781781
{42,"Branchless Bloom16 (addall)"},
782782
{43,"Branchless Bloom8 (addall)"},
783-
{44,"Counting Bloom8"}, {45,"Succinct Counting Bloom8"},
783+
{44,"Counting Bloom10 (addall)"}, {45,"Succ Counting Bloom10 (addall)"},
784784
{70,"SimpleBlockedBloom"}
785785
};
786786
#endif
@@ -1217,15 +1217,15 @@ int main(int argc, char * argv[]) {
12171217

12181218
if (algorithmId == 44 || algorithmId < 0 || (algos.find(44) != algos.end())) {
12191219
auto cf = FilterBenchmark<
1220-
CountingBloomFilter<uint64_t, 8, true, SimpleMixSplit>>(
1220+
CountingBloomFilter<uint64_t, 10, true, SimpleMixSplit>>(
12211221
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed);
12221222
cout << setw(NAME_WIDTH) << names[44] << cf << endl;
12231223
}
12241224

12251225
if (algorithmId == 45 || algorithmId < 0 || (algos.find(45) != algos.end())) {
12261226
auto cf = FilterBenchmark<
1227-
SuccinctCountingBloomFilter<uint64_t, 8, true, SimpleMixSplit>>(
1228-
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed);
1227+
SuccinctCountingBloomFilter<uint64_t, 10, true, SimpleMixSplit>>(
1228+
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true);
12291229
cout << setw(NAME_WIDTH) << names[45] << cf << endl;
12301230
}
12311231

src/counting_bloom.h

Lines changed: 96 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,20 +73,21 @@ class CountingBloomFilter {
7373
uint64_t *data;
7474
size_t arrayLength;
7575
HashFamily hasher;
76+
const int blockShift = 16;
77+
const int blockLen = 1 << blockShift;
78+
79+
void AddBlock(uint32_t *tmp, int block, int len);
7680

7781
public:
7882
explicit CountingBloomFilter(const size_t n) : hasher() {
7983
size_t bitCount = 4 * n * bits_per_item;
8084
this->arrayLength = (bitCount + 63) / 64;
8185
data = new uint64_t[arrayLength]();
8286
}
83-
8487
~CountingBloomFilter() { delete[] data; }
85-
8688
Status Add(const ItemType &item);
87-
89+
Status AddAll(const vector<ItemType> data, const size_t start, const size_t end);
8890
Status Contain(const ItemType &item) const;
89-
9091
size_t SizeInBytes() const { return arrayLength * 8; }
9192
};
9293

@@ -105,6 +106,49 @@ Status CountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
105106
return Ok;
106107
}
107108

109+
template <typename ItemType, size_t bits_per_item, bool branchless,
110+
typename HashFamily, int k>
111+
void CountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
112+
AddBlock(uint32_t *tmp, int block, int len) {
113+
for (int i = 0; i < len; i++) {
114+
int index = tmp[(block << blockShift) + i];
115+
data[index >> 6] += 1ULL << ((index << 2) & 63);
116+
}
117+
}
118+
119+
template <typename ItemType, size_t bits_per_item, bool branchless,
120+
typename HashFamily, int k>
121+
Status CountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
122+
AddAll(const vector<ItemType> keys, const size_t start, const size_t end) {
123+
int blocks = 1 + arrayLength / blockLen;
124+
uint32_t *tmp = new uint32_t[blocks * blockLen];
125+
int *tmpLen = new int[blocks]();
126+
for (size_t i = start; i < end; i++) {
127+
uint64_t key = keys[i];
128+
uint64_t hash = hasher(key);
129+
uint32_t a = (uint32_t)(hash >> 32);
130+
uint32_t b = (uint32_t)hash;
131+
for (int j = 0; j < k; j++) {
132+
int index = reduce(a, this->arrayLength);
133+
int block = index >> blockShift;
134+
int len = tmpLen[block];
135+
tmp[(block << blockShift) + len] = (index << 6) + (a & 63);
136+
tmpLen[block] = len + 1;
137+
if (len + 1 == blockLen) {
138+
AddBlock(tmp, block, len + 1);
139+
tmpLen[block] = 0;
140+
}
141+
a += b;
142+
}
143+
}
144+
for (int block = 0; block < blocks; block++) {
145+
AddBlock(tmp, block, tmpLen[block]);
146+
}
147+
delete[] tmp;
148+
delete[] tmpLen;
149+
return Ok;
150+
}
151+
108152
template <typename ItemType, size_t bits_per_item, bool branchless,
109153
typename HashFamily, int k>
110154
Status CountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
@@ -137,9 +181,12 @@ class SuccinctCountingBloomFilter {
137181
size_t overflowLength;
138182
size_t nextFreeOverflow;
139183
HashFamily hasher;
184+
const int blockShift = 13;
185+
const int blockLen = 1 << blockShift;
140186

141187
void Increment(size_t group, int bit);
142188
int ReadCount(size_t group, int bit);
189+
void AddBlock(uint32_t *tmp, int block, int len);
143190

144191
public:
145192
explicit SuccinctCountingBloomFilter(const size_t n) : hasher() {
@@ -155,13 +202,10 @@ class SuccinctCountingBloomFilter {
155202
overflow[i] = i + 4;
156203
}
157204
}
158-
159205
~SuccinctCountingBloomFilter() { delete[] data; delete[] counts; delete[] overflow; }
160-
161206
Status Add(const ItemType &item);
162-
207+
Status AddAll(const vector<ItemType> data, const size_t start, const size_t end);
163208
Status Contain(const ItemType &item) const;
164-
165209
size_t SizeInBytes() const { return arrayLength * 8 * 2 + overflowLength * 8; }
166210
};
167211

@@ -180,6 +224,50 @@ Status SuccinctCountingBloomFilter<ItemType, bits_per_item, branchless, HashFami
180224
return Ok;
181225
}
182226

227+
template <typename ItemType, size_t bits_per_item, bool branchless,
228+
typename HashFamily, int k>
229+
void SuccinctCountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
230+
AddBlock(uint32_t *tmp, int block, int len) {
231+
for (int i = 0; i < len; i++) {
232+
uint32_t index = tmp[(block << blockShift) + i];
233+
uint32_t group = index >> 6;
234+
Increment(group, index & 63);
235+
}
236+
}
237+
238+
template <typename ItemType, size_t bits_per_item, bool branchless,
239+
typename HashFamily, int k>
240+
Status SuccinctCountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::
241+
AddAll(const vector<ItemType> keys, const size_t start, const size_t end) {
242+
int blocks = 1 + arrayLength / blockLen;
243+
uint32_t *tmp = new uint32_t[blocks * blockLen];
244+
int *tmpLen = new int[blocks]();
245+
for (size_t i = start; i < end; i++) {
246+
uint64_t key = keys[i];
247+
uint64_t hash = hasher(key);
248+
uint32_t a = (uint32_t)(hash >> 32);
249+
uint32_t b = (uint32_t)hash;
250+
for (int j = 0; j < k; j++) {
251+
int index = reduce(a, this->arrayLength);
252+
int block = index >> blockShift;
253+
int len = tmpLen[block];
254+
tmp[(block << blockShift) + len] = (index << 6) + (a & 63);
255+
tmpLen[block] = len + 1;
256+
if (len + 1 == blockLen) {
257+
AddBlock(tmp, block, len + 1);
258+
tmpLen[block] = 0;
259+
}
260+
a += b;
261+
}
262+
}
263+
for (int block = 0; block < blocks; block++) {
264+
AddBlock(tmp, block, tmpLen[block]);
265+
}
266+
delete[] tmp;
267+
delete[] tmpLen;
268+
return Ok;
269+
}
270+
183271
template <typename ItemType, size_t bits_per_item, bool branchless,
184272
typename HashFamily, int k>
185273
void SuccinctCountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily, k>::

0 commit comments

Comments
 (0)