Skip to content

Commit 603adbc

Browse files
committed
New counting Bloom filter and succinct counting Bloom filter
1 parent 41f59c5 commit 603adbc

File tree

2 files changed

+355
-31
lines changed

2 files changed

+355
-31
lines changed

benchmarks/bulk-insert-and-query.cc

Lines changed: 83 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "xorfilter_2n.h"
3030
#include "xorfilter_plus.h"
3131
#include "bloom.h"
32+
#include "counting_bloom.h"
3233
#include "gcs.h"
3334
#ifdef __AVX2__
3435
#include "gqf_cpp.h"
@@ -49,6 +50,7 @@ using namespace xorfilter2;
4950
using namespace xorfilter2n;
5051
using namespace xorfilter_plus;
5152
using namespace bloomfilter;
53+
using namespace counting_bloomfilter;
5254
using namespace gcsfilter;
5355
#ifdef __AVX2__
5456
using namespace gqfilter;
@@ -485,6 +487,37 @@ struct FilterAPI<BloomFilter<ItemType, bits_per_item, branchless, HashFamily>> {
485487
}
486488
};
487489

490+
template <typename ItemType, size_t bits_per_item, bool branchless, typename HashFamily>
491+
struct FilterAPI<CountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily>> {
492+
using Table = CountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily>;
493+
static Table ConstructFromAddCount(size_t add_count) { return Table(add_count); }
494+
static void Add(uint64_t key, Table* table) {
495+
table->Add(key);
496+
}
497+
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
498+
throw std::runtime_error("Unsupported");
499+
}
500+
CONTAIN_ATTRIBUTES
501+
static bool Contain(uint64_t key, const Table * table) {
502+
return (0 == table->Contain(key));
503+
}
504+
};
505+
506+
template <typename ItemType, size_t bits_per_item, bool branchless, typename HashFamily>
507+
struct FilterAPI<SuccinctCountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily>> {
508+
using Table = SuccinctCountingBloomFilter<ItemType, bits_per_item, branchless, HashFamily>;
509+
static Table ConstructFromAddCount(size_t add_count) { return Table(add_count); }
510+
static void Add(uint64_t key, Table* table) {
511+
table->Add(key);
512+
}
513+
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
514+
throw std::runtime_error("Unsupported");
515+
}
516+
CONTAIN_ATTRIBUTES
517+
static bool Contain(uint64_t key, const Table * table) {
518+
return (0 == table->Contain(key));
519+
}
520+
};
488521

489522
// assuming that first1,last1 and first2, last2 are sorted,
490523
// this tries to find out how many of first1,last1 can be
@@ -709,10 +742,13 @@ int main(int argc, char * argv[]) {
709742
{8,"Bloom12" }, {9,"Bloom16"}, {10,"BlockedBloom"},
710743
{11,"sort"}, {12,"Xor+8"}, {13,"Xor+16"},
711744
{14,"GCS"}, {22, "Xor10 (NBitArray)"}, {23, "Xor14 (NBitArray)"},
712-
{25, "Xor10"},{26, "Xor10.666"}, {37,"Bloom8 (addall)"},
713-
{38,"Bloom12 (addall)"}, {43,"Branchless Bloom8 (addall)"},
714-
{41,"Branchless Bloom12 (addall)"},{42,"Branchless Bloom16 (addall)"},
745+
{25, "Xor10"},{26, "Xor10.666"},
746+
{37,"Bloom8 (addall)"}, {38,"Bloom12 (addall)"},
715747
{40,"BlockedBloom (addall)"},
748+
{41,"Branchless Bloom12 (addall)"},
749+
{42,"Branchless Bloom16 (addall)"},
750+
{43,"Branchless Bloom8 (addall)"},
751+
{44,"Counting Bloom8"}, {45,"Succinct Counting Bloom8"},
716752
{70,"SimpleBlockedBloom"}
717753
};
718754
#elif defined( __AVX2__)
@@ -722,10 +758,14 @@ int main(int argc, char * argv[]) {
722758
{8,"Bloom12" }, {9,"Bloom16"}, {10,"BlockedBloom"},
723759
{11,"sort"}, {12,"Xor+8"}, {13,"Xor+16"},
724760
{14,"GCS"}, {15,"CQF"}, {22, "Xor10 (NBitArray)"}, {23, "Xor14 (NBitArray)"},
725-
{25, "Xor10"},{26, "Xor10.666"}, {37,"Bloom8 (addall)"},
726-
{38,"Bloom12 (addall)"},{39,"Bloom16 (addall)"}, {43,"Branchless Bloom8 (addall)"},
727-
{41,"Branchless Bloom12 (addall)"},{42,"Branchless Bloom16 (addall)"},
728-
{40,"BlockedBloom (addall)"}, {63,"BlockedBloom16"}, {64,"BlockedBloom64"},
761+
{25, "Xor10"},{26, "Xor10.666"},
762+
{37,"Bloom8 (addall)"}, {38,"Bloom12 (addall)"}, {39,"Bloom16 (addall)"},
763+
{40,"BlockedBloom (addall)"},
764+
{41,"Branchless Bloom12 (addall)"},
765+
{42,"Branchless Bloom16 (addall)"},
766+
{43,"Branchless Bloom8 (addall)"},
767+
{44,"Counting Bloom8"}, {45,"Succinct Counting Bloom8"},
768+
{63,"BlockedBloom16"}, {64,"BlockedBloom64"},
729769
{70,"SimpleBlockedBloom"}
730770
};
731771
#else
@@ -735,9 +775,12 @@ int main(int argc, char * argv[]) {
735775
{8,"Bloom12" }, {9,"Bloom16"},
736776
{11,"sort"}, {12,"Xor+8"}, {13,"Xor+16"},
737777
{14,"GCS"}, {22, "Xor10 (NBitArray)"}, {23, "Xor14 (NBitArray)"},
738-
{25, "Xor10"},{26, "Xor10.666"}, {37,"Bloom8 (addall)"},
739-
{38,"Bloom12 (addall)"},{39,"Bloom16 (addall)"}, {43,"Branchless Bloom8 (addall)"},
740-
{41,"Branchless Bloom12 (addall)"},{42,"Branchless Bloom16 (addall)"},
778+
{25, "Xor10"},{26, "Xor10.666"},
779+
{37,"Bloom8 (addall)"}, {38,"Bloom12 (addall)"}, {39,"Bloom16 (addall)"},
780+
{41,"Branchless Bloom12 (addall)"},
781+
{42,"Branchless Bloom16 (addall)"},
782+
{43,"Branchless Bloom8 (addall)"},
783+
{44,"Counting Bloom8"}, {45,"Succinct Counting Bloom8"},
741784
{70,"SimpleBlockedBloom"}
742785
};
743786
#endif
@@ -962,7 +1005,6 @@ int main(int argc, char * argv[]) {
9621005
cout << setw(NAME_WIDTH) << names[10] << cf << endl;
9631006
}
9641007
#endif
965-
9661008
#ifdef __AVX2__
9671009
if (algorithmId == 10 || algorithmId < 0 || (algos.find(10) != algos.end())) {
9681010
auto cf = FilterBenchmark<SimdBlockFilterFixed<>>(
@@ -974,10 +1016,8 @@ int main(int argc, char * argv[]) {
9741016
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed);
9751017
cout << setw(NAME_WIDTH) << names[64] << cf << endl;
9761018
}
977-
9781019
#endif
9791020
#ifdef __SSSE3__
980-
9811021
if (algorithmId == 63 || algorithmId < 0 || (algos.find(63) != algos.end())) {
9821022
auto cf = FilterBenchmark<SimdBlockFilterFixed16<SimpleMixSplit>>(
9831023
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed);
@@ -1075,7 +1115,6 @@ int main(int argc, char * argv[]) {
10751115
cout << setw(NAME_WIDTH) << names[22] << cf << endl;
10761116
}
10771117

1078-
10791118
if (algorithmId == 23 || (algos.find(23) != algos.end())) {
10801119
auto cf = FilterBenchmark<
10811120
XorFilter2<uint64_t, uint16_t, NBitArray<uint16_t, 14>, SimpleMixSplit>>(
@@ -1140,14 +1179,20 @@ int main(int argc, char * argv[]) {
11401179
cout << setw(NAME_WIDTH) << names[39] << cf << endl;
11411180
}
11421181

1143-
1144-
1145-
if (algorithmId == 43 || algorithmId < 0 || (algos.find(43) != algos.end())) {
1146-
auto cf = FilterBenchmark<
1147-
BloomFilter<uint64_t, 8, true, SimpleMixSplit>>(
1182+
#ifdef __AVX2__
1183+
if (algorithmId == 40 || algorithmId < 0 || (algos.find(40) != algos.end())) {
1184+
auto cf = FilterBenchmark<SimdBlockFilterFixed<SimpleMixSplit>>(
11481185
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true);
1149-
cout << setw(NAME_WIDTH) << names[43] << cf << endl;
1186+
cout << setw(NAME_WIDTH) << names[40] << cf << endl;
1187+
}
1188+
#endif
1189+
#ifdef __aarch64__
1190+
if (algorithmId == 40 || algorithmId < 0 || (algos.find(40) != algos.end())) {
1191+
auto cf = FilterBenchmark<SimdBlockFilterFixed<SimpleMixSplit>>(
1192+
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true);
1193+
cout << setw(NAME_WIDTH) << names[40] << cf << endl;
11501194
}
1195+
#endif
11511196

11521197
if (algorithmId == 41 || algorithmId < 0 || (algos.find(41) != algos.end())) {
11531198
auto cf = FilterBenchmark<
@@ -1162,20 +1207,27 @@ int main(int argc, char * argv[]) {
11621207
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true);
11631208
cout << setw(NAME_WIDTH) << names[42] << cf << endl;
11641209
}
1165-
#ifdef __AVX2__
1166-
if (algorithmId == 40 || algorithmId < 0 || (algos.find(40) != algos.end())) {
1167-
auto cf = FilterBenchmark<SimdBlockFilterFixed<SimpleMixSplit>>(
1210+
1211+
if (algorithmId == 43 || algorithmId < 0 || (algos.find(43) != algos.end())) {
1212+
auto cf = FilterBenchmark<
1213+
BloomFilter<uint64_t, 8, true, SimpleMixSplit>>(
11681214
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true);
1169-
cout << setw(NAME_WIDTH) << names[40] << cf << endl;
1215+
cout << setw(NAME_WIDTH) << names[43] << cf << endl;
11701216
}
1171-
#endif
1172-
#ifdef __aarch64__
1173-
if (algorithmId == 40 || algorithmId < 0 || (algos.find(40) != algos.end())) {
1174-
auto cf = FilterBenchmark<SimdBlockFilterFixed<SimpleMixSplit>>(
1175-
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true);
1176-
cout << setw(NAME_WIDTH) << names[40] << cf << endl;
1217+
1218+
if (algorithmId == 44 || algorithmId < 0 || (algos.find(44) != algos.end())) {
1219+
auto cf = FilterBenchmark<
1220+
CountingBloomFilter<uint64_t, 8, true, SimpleMixSplit>>(
1221+
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed);
1222+
cout << setw(NAME_WIDTH) << names[44] << cf << endl;
1223+
}
1224+
1225+
if (algorithmId == 45 || algorithmId < 0 || (algos.find(45) != algos.end())) {
1226+
auto cf = FilterBenchmark<
1227+
SuccinctCountingBloomFilter<uint64_t, 8, true, SimpleMixSplit>>(
1228+
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed);
1229+
cout << setw(NAME_WIDTH) << names[45] << cf << endl;
11771230
}
1178-
#endif
11791231

11801232
// broken algorithms (don't always find all key)
11811233
/*

0 commit comments

Comments
 (0)