Skip to content

Commit bd72df2

Browse files
committed
Adding "named" support for the new schemes + making it easier/cleaner to run smaller tests.
1 parent 992c083 commit bd72df2

File tree

1 file changed

+24
-19
lines changed

1 file changed

+24
-19
lines changed

benchmarks/bulk-insert-and-query.cc

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ using namespace gqfilter;
5757
#endif
5858

5959
// The number of items sampled when determining the lookup performance
60-
const size_t SAMPLE_SIZE = 10 * 1000 * 1000;
60+
const size_t MAX_SAMPLE_SIZE = 10 * 1000 * 1000;
6161

6262
// The statistics gathered for each table type:
6363
struct Statistics {
@@ -652,10 +652,12 @@ int main(int argc, char * argv[]) {
652652
{5,"Cuckoo16"}, {6,"CuckooSemiSort13" }, {7,"Bloom8"},
653653
{8,"Bloom12" }, {9,"Bloom16"}, {10,"BlockedBloom"},
654654
{11,"sort"}, {12,"Xor+8"}, {13,"Xor+16"},
655-
{14,"GCS"}, {15,"CQF"}, {25, "Xor10"}, {37,"Bloom8 (addall)"},
655+
{14,"GCS"}, {15,"CQF"}, {22, "Xor10 (NBitArray)"}, {23, "Xor14 (NBitArray)"},
656+
{24, "Xor10.x"}, {25, "Xor10"},{26, "Xor10.666"}, {37,"Bloom8 (addall)"},
656657
{38,"Bloom12 (addall)"},{39,"Bloom16 (addall)"},
657658
{40,"BlockedBloom (addall)"}
658659
};
660+
659661
if (argc < 2) {
660662
cout << "Usage: " << argv[0] << " <numberOfEntries> [<algorithmId> [<seed>]]" << endl;
661663
cout << " numberOfEntries: number of keys, we recommend at least 100000000" << endl;
@@ -699,12 +701,16 @@ int main(int argc, char * argv[]) {
699701
return 2;
700702
}
701703
}
704+
size_t actual_sample_size = MAX_SAMPLE_SIZE;
705+
if (actual_sample_size > add_count) {
706+
actual_sample_size = add_count;
707+
}
702708
vector<uint64_t> to_add = seed == -1 ?
703-
GenerateRandom64Fast(add_count, rand()) :
704-
GenerateRandom64Fast(add_count, seed);
709+
GenerateRandom64Fast(actual_sample_size, rand()) :
710+
GenerateRandom64Fast(actual_sample_size, seed);
705711
vector<uint64_t> to_lookup = seed == -1 ?
706-
GenerateRandom64Fast(SAMPLE_SIZE, rand()) :
707-
GenerateRandom64Fast(SAMPLE_SIZE, seed + add_count);
712+
GenerateRandom64Fast(actual_sample_size, rand()) :
713+
GenerateRandom64Fast(actual_sample_size, seed + add_count);
708714

709715
if (seed >= 0 && seed < 64) {
710716
// 0-64 are special seeds
@@ -728,7 +734,7 @@ int main(int argc, char * argv[]) {
728734
}
729735
}
730736

731-
assert(to_lookup.size() == SAMPLE_SIZE);
737+
assert(to_lookup.size() == actual_sample_size);
732738
size_t distinct_lookup;
733739
size_t distinct_add;
734740
std::cout << "checking match size... " << std::flush;
@@ -749,14 +755,11 @@ int main(int argc, char * argv[]) {
749755
cout << "WARNING: Filter contains "<< (to_add.size() - distinct_add) << " duplicates." << endl;
750756
hasduplicates = true;
751757
}
752-
size_t actual_sample_size = SAMPLE_SIZE;
753-
if (actual_sample_size > add_count) {
754-
cout << "WARNING: Your set contains only " << add_count << ". We can't very well support a sample size of " << SAMPLE_SIZE << endl;
755-
actual_sample_size = add_count;
756-
}
758+
757759

758760
if (actual_sample_size > to_lookup.size()) {
759-
throw out_of_range("to_lookup must contain at least SAMPLE_SIZE values");
761+
std::cerr << "actual_sample_size = "<< actual_sample_size << std::endl;
762+
throw out_of_range("to_lookup must contain at least actual_sample_size values");
760763
}
761764

762765
std::vector<samples_t> mixed_sets;
@@ -952,42 +955,44 @@ int main(int argc, char * argv[]) {
952955
auto cf = FilterBenchmark<
953956
XorFilter2<uint64_t, uint16_t, NBitArray<uint16_t, 10>, SimpleMixSplit>>(
954957
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true);
955-
cout << setw(NAME_WIDTH) << "Xor10" << cf << endl;
958+
cout << setw(NAME_WIDTH) << names[22] << cf << endl;
956959
}
957960

961+
958962
if (algorithmId == 23 || (algos.find(23) != algos.end())) {
959963
auto cf = FilterBenchmark<
960964
XorFilter2<uint64_t, uint16_t, NBitArray<uint16_t, 14>, SimpleMixSplit>>(
961965
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true);
962-
cout << setw(NAME_WIDTH) << "Xor14" << cf << endl;
966+
cout << setw(NAME_WIDTH) << names[23] << cf << endl;
963967
}
964968

969+
965970
if (algorithmId == 24 || (algos.find(24) != algos.end())) {
966971
auto cf = FilterBenchmark<
967972
XorFilter2<uint64_t, uint32_t, UInt10Array, SimpleMixSplit>>(
968973
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true);
969-
cout << setw(NAME_WIDTH) << "Xor10.x" << cf << endl;
974+
cout << setw(NAME_WIDTH) << names[24] << cf << endl;
970975
}
971976

972977
if (algorithmId == 25 || (algos.find(25) != algos.end())) {
973978
auto cf = FilterBenchmark<
974979
XorFilter10<uint64_t, SimpleMixSplit>>(
975980
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true);
976-
cout << setw(NAME_WIDTH) << "Xor10" << cf << endl;
981+
cout << setw(NAME_WIDTH) << names[25] << cf << endl;
977982
}
978983

979984
if (algorithmId == 26 || (algos.find(26) != algos.end())) {
980985
auto cf = FilterBenchmark<
981986
XorFilter10_666<uint64_t, SimpleMixSplit>>(
982987
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true);
983-
cout << setw(NAME_WIDTH) << "Xor10.666" << cf << endl;
988+
cout << setw(NAME_WIDTH) << names[26] << cf << endl;
984989
}
985990

986991
if (algorithmId == 27 || (algos.find(27) != algos.end())) {
987992
auto cf = FilterBenchmark<
988993
XorFilter13<uint64_t, SimpleMixSplit>>(
989994
add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true);
990-
cout << setw(NAME_WIDTH) << "Xor13" << cf << endl;
995+
cout << setw(NAME_WIDTH) << names[27] << cf << endl;
991996
}
992997

993998
if (algorithmId == 37 || algorithmId < 0 || (algos.find(37) != algos.end())) {

0 commit comments

Comments
 (0)