Skip to content

Commit 011006e

Browse files
Merge pull request #2 from lemire/master
Tweaking the code so that it is safer and easier to add support for bulk additions
2 parents e9b4222 + 27077f5 commit 011006e

File tree

1 file changed

+32
-16
lines changed

1 file changed

+32
-16
lines changed

benchmarks/bulk-insert-and-query.cc

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ struct FilterAPI<CuckooFilter<ItemType, bits_per_item, TableType, HashFamily>> {
140140
}
141141
}
142142
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
143+
throw std::runtime_error("Unsupported");
143144
}
144145

145146
CONTAIN_ATTRIBUTES
@@ -158,6 +159,7 @@ struct FilterAPI<CuckooFilterStable<ItemType, bits_per_item, TableType, HashFami
158159
}
159160
}
160161
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
162+
throw std::runtime_error("Unsupported");
161163
}
162164
static bool Contain(uint64_t key, const Table * table) {
163165
return (0 == table->Contain(key));
@@ -176,6 +178,7 @@ struct FilterAPI<SimdBlockFilter<HashFamily>> {
176178
table->Add(key);
177179
}
178180
static void AddAll(const vector<uint64_t> keys, const size_t start, const size_t end, Table* table) {
181+
throw std::runtime_error("Unsupported");
179182
}
180183

181184
CONTAIN_ATTRIBUTES
@@ -195,6 +198,7 @@ struct FilterAPI<SimdBlockFilterFixed<HashFamily>> {
195198
table->Add(key);
196199
}
197200
static void AddAll(const vector<uint64_t> keys, const size_t start, const size_t end, Table* table) {
201+
throw std::runtime_error("Unsupported");
198202
}
199203

200204
CONTAIN_ATTRIBUTES
@@ -210,6 +214,7 @@ struct FilterAPI<XorFilter<ItemType, FingerprintType>> {
210214
using Table = XorFilter<ItemType, FingerprintType>;
211215
static Table ConstructFromAddCount(size_t add_count) { return Table(add_count); }
212216
static void Add(uint64_t key, Table* table) {
217+
throw std::runtime_error("Unsupported");
213218
}
214219
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
215220
table->AddAll(keys, start, end);
@@ -226,6 +231,7 @@ struct FilterAPI<XorFilter<ItemType, FingerprintType, HashFamily>> {
226231
using Table = XorFilter<ItemType, FingerprintType, HashFamily>;
227232
static Table ConstructFromAddCount(size_t add_count) { return Table(add_count); }
228233
static void Add(uint64_t key, Table* table) {
234+
throw std::runtime_error("Unsupported");
229235
}
230236
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
231237
table->AddAll(keys, start, end);
@@ -242,6 +248,7 @@ struct FilterAPI<XorFilter2<ItemType, FingerprintType, FingerprintStorageType, H
242248
using Table = XorFilter2<ItemType, FingerprintType, FingerprintStorageType, HashFamily>;
243249
static Table ConstructFromAddCount(size_t add_count) { return Table(add_count); }
244250
static void Add(uint64_t key, Table* table) {
251+
throw std::runtime_error("Unsupported");
245252
}
246253
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
247254
table->AddAll(keys, start, end);
@@ -258,6 +265,7 @@ struct FilterAPI<XorFilter2n<ItemType, FingerprintType, FingerprintStorageType,
258265
using Table = XorFilter2n<ItemType, FingerprintType, FingerprintStorageType, HashFamily>;
259266
static Table ConstructFromAddCount(size_t add_count) { return Table(add_count); }
260267
static void Add(uint64_t key, Table* table) {
268+
throw std::runtime_error("Unsupported");
261269
}
262270
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
263271
table->AddAll(keys, start, end);
@@ -274,6 +282,7 @@ struct FilterAPI<XorFilterPlus<ItemType, FingerprintType, HashFamily>> {
274282
using Table = XorFilterPlus<ItemType, FingerprintType, HashFamily>;
275283
static Table ConstructFromAddCount(size_t add_count) { return Table(add_count); }
276284
static void Add(uint64_t key, Table* table) {
285+
throw std::runtime_error("Unsupported");
277286
}
278287
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
279288
table->AddAll(keys, start, end);
@@ -290,6 +299,7 @@ struct FilterAPI<GcsFilter<ItemType, bits_per_item, HashFamily>> {
290299
using Table = GcsFilter<ItemType, bits_per_item, HashFamily>;
291300
static Table ConstructFromAddCount(size_t add_count) { return Table(add_count); }
292301
static void Add(uint64_t key, Table* table) {
302+
throw std::runtime_error("Unsupported");
293303
}
294304
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
295305
table->AddAll(keys, start, end);
@@ -310,6 +320,7 @@ struct FilterAPI<GQFilter<ItemType, bits_per_item, HashFamily>> {
310320
table->Add(key);
311321
}
312322
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
323+
throw std::runtime_error("Unsupported");
313324
}
314325

315326
CONTAIN_ATTRIBUTES
@@ -327,6 +338,7 @@ struct FilterAPI<BloomFilter<ItemType, bits_per_item, HashFamily>> {
327338
table->Add(key);
328339
}
329340
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
341+
throw std::runtime_error("Unsupported");
330342
}
331343

332344
CONTAIN_ATTRIBUTES
@@ -389,7 +401,7 @@ bool has_duplicates(vector<uint64_t> a) {
389401

390402
template <typename Table>
391403
Statistics FilterBenchmark(
392-
size_t add_count, const vector<uint64_t>& to_add, const vector<uint64_t>& to_lookup, int seed) {
404+
size_t add_count, const vector<uint64_t>& to_add, const vector<uint64_t>& to_lookup, int seed, bool batchedadd = false) {
393405
if (add_count > to_add.size()) {
394406
throw out_of_range("to_add must contain at least add_count values");
395407
}
@@ -424,12 +436,16 @@ Statistics FilterBenchmark(
424436

425437
// Add values until failure or until we run out of values to add:
426438
auto start_time = NowNanos();
427-
428-
for (size_t added = 0; added < add_count; ++added) {
429-
FilterAPI<Table>::Add(to_add[added], &filter);
439+
if(batchedadd) {
440+
// for the XorFilter
441+
FilterAPI<Table>::AddAll(to_add, 0, add_count, &filter);
442+
} else {
443+
for (size_t added = 0; added < add_count; ++added) {
444+
FilterAPI<Table>::Add(to_add[added], &filter);
445+
}
430446
}
431-
// for the XorFilter
432-
FilterAPI<Table>::AddAll(to_add, 0, add_count, &filter);
447+
448+
433449
// sanity check:
434450
for (size_t added = 0; added < add_count; ++added) {
435451
assert(FilterAPI<Table>::Contain(to_add[added], &filter) == 1);
@@ -611,21 +627,21 @@ int main(int argc, char * argv[]) {
611627
if (algorithmId == 0 || algorithmId < 0) {
612628
auto cf = FilterBenchmark<
613629
XorFilter<uint64_t, uint8_t, SimpleMixSplit>>(
614-
add_count, to_add, to_lookup, seed);
630+
add_count, to_add, to_lookup, seed, true);
615631
cout << setw(NAME_WIDTH) << "Xor8" << cf << endl;
616632
}
617633

618634
if (algorithmId == 1 || algorithmId < 0) {
619635
auto cf = FilterBenchmark<
620636
XorFilter2<uint64_t, uint32_t, UInt12Array, SimpleMixSplit>>(
621-
add_count, to_add, to_lookup, seed);
637+
add_count, to_add, to_lookup, seed, true);
622638
cout << setw(NAME_WIDTH) << "Xor12" << cf << endl;
623639
}
624640

625641
if (algorithmId == 2 || algorithmId < 0) {
626642
auto cf = FilterBenchmark<
627643
XorFilter<uint64_t, uint16_t, SimpleMixSplit>>(
628-
add_count, to_add, to_lookup, seed);
644+
add_count, to_add, to_lookup, seed, true);
629645
cout << setw(NAME_WIDTH) << "Xor16" << cf << endl;
630646
}
631647

@@ -696,21 +712,21 @@ int main(int argc, char * argv[]) {
696712
if (algorithmId == 12 || algorithmId < 0) {
697713
auto cf = FilterBenchmark<
698714
XorFilterPlus<uint64_t, uint8_t, SimpleMixSplit>>(
699-
add_count, to_add, to_lookup, seed);
715+
add_count, to_add, to_lookup, seed, true);
700716
cout << setw(NAME_WIDTH) << "Xor+8" << cf << endl;
701717
}
702718

703719
if (algorithmId == 13 || algorithmId < 0) {
704720
auto cf = FilterBenchmark<
705721
XorFilterPlus<uint64_t, uint16_t, SimpleMixSplit>>(
706-
add_count, to_add, to_lookup, seed);
722+
add_count, to_add, to_lookup, seed, true);
707723
cout << setw(NAME_WIDTH) << "Xor+16" << cf << endl;
708724
}
709725

710726
if (algorithmId == 14 || algorithmId < 0) {
711727
auto cf = FilterBenchmark<
712728
GcsFilter<uint64_t, 8, SimpleMixSplit>>(
713-
add_count, to_add, to_lookup, seed);
729+
add_count, to_add, to_lookup, seed, true);
714730
cout << setw(NAME_WIDTH) << "GCS" << cf << endl;
715731
}
716732

@@ -765,28 +781,28 @@ int main(int argc, char * argv[]) {
765781
if (algorithmId == 21) {
766782
auto cf = FilterBenchmark<
767783
XorFilter2n<uint64_t, uint8_t, UIntArray<uint8_t>, SimpleMixSplit>>(
768-
add_count, to_add, to_lookup, seed);
784+
add_count, to_add, to_lookup, seed, true);
769785
cout << setw(NAME_WIDTH) << "Xor8-2^n" << cf << endl;
770786
}
771787

772788
if (algorithmId == 22) {
773789
auto cf = FilterBenchmark<
774790
XorFilter2<uint64_t, uint16_t, NBitArray<uint16_t, 10>, SimpleMixSplit>>(
775-
add_count, to_add, to_lookup, seed);
791+
add_count, to_add, to_lookup, seed, true);
776792
cout << setw(NAME_WIDTH) << "Xor10" << cf << endl;
777793
}
778794

779795
if (algorithmId == 23) {
780796
auto cf = FilterBenchmark<
781797
XorFilter2<uint64_t, uint16_t, NBitArray<uint16_t, 14>, SimpleMixSplit>>(
782-
add_count, to_add, to_lookup, seed);
798+
add_count, to_add, to_lookup, seed, true);
783799
cout << setw(NAME_WIDTH) << "Xor14" << cf << endl;
784800
}
785801

786802
if (algorithmId == 24) {
787803
auto cf = FilterBenchmark<
788804
XorFilter2<uint64_t, uint32_t, UInt10Array, SimpleMixSplit>>(
789-
add_count, to_add, to_lookup, seed);
805+
add_count, to_add, to_lookup, seed, true);
790806
cout << setw(NAME_WIDTH) << "Xor10.x" << cf << endl;
791807
}
792808

0 commit comments

Comments
 (0)