@@ -57,7 +57,7 @@ using namespace gqfilter;
5757#endif
5858
5959// The number of items sampled when determining the lookup performance
60- const size_t SAMPLE_SIZE = 10 * 1000 * 1000 ;
60+ const size_t MAX_SAMPLE_SIZE = 10 * 1000 * 1000 ;
6161
6262// The statistics gathered for each table type:
6363struct Statistics {
@@ -652,10 +652,12 @@ int main(int argc, char * argv[]) {
652652 {5 ," Cuckoo16" }, {6 ," CuckooSemiSort13" }, {7 ," Bloom8" },
653653 {8 ," Bloom12" }, {9 ," Bloom16" }, {10 ," BlockedBloom" },
654654 {11 ," sort" }, {12 ," Xor+8" }, {13 ," Xor+16" },
655- {14 ," GCS" }, {15 ," CQF" }, {25 , " Xor10" }, {37 ," Bloom8 (addall)" },
655+ {14 ," GCS" }, {15 ," CQF" }, {22 , " Xor10 (NBitArray)" }, {23 , " Xor14 (NBitArray)" },
656+ {24 , " Xor10.x" }, {25 , " Xor10" },{26 , " Xor10.666" }, {37 ," Bloom8 (addall)" },
656657 {38 ," Bloom12 (addall)" },{39 ," Bloom16 (addall)" },
657658 {40 ," BlockedBloom (addall)" }
658659 };
660+
659661 if (argc < 2 ) {
660662 cout << " Usage: " << argv[0 ] << " <numberOfEntries> [<algorithmId> [<seed>]]" << endl;
661663 cout << " numberOfEntries: number of keys, we recommend at least 100000000" << endl;
@@ -699,12 +701,16 @@ int main(int argc, char * argv[]) {
699701 return 2 ;
700702 }
701703 }
704+ size_t actual_sample_size = MAX_SAMPLE_SIZE;
705+ if (actual_sample_size > add_count) {
706+ actual_sample_size = add_count;
707+ }
702708 vector<uint64_t > to_add = seed == -1 ?
703- GenerateRandom64Fast (add_count , rand ()) :
704- GenerateRandom64Fast (add_count , seed);
709+ GenerateRandom64Fast (actual_sample_size , rand ()) :
710+ GenerateRandom64Fast (actual_sample_size , seed);
705711 vector<uint64_t > to_lookup = seed == -1 ?
706- GenerateRandom64Fast (SAMPLE_SIZE , rand ()) :
707- GenerateRandom64Fast (SAMPLE_SIZE , seed + add_count);
712+ GenerateRandom64Fast (actual_sample_size , rand ()) :
713+ GenerateRandom64Fast (actual_sample_size , seed + add_count);
708714
709715 if (seed >= 0 && seed < 64 ) {
710716 // 0-64 are special seeds
@@ -728,7 +734,7 @@ int main(int argc, char * argv[]) {
728734 }
729735 }
730736
731- assert (to_lookup.size () == SAMPLE_SIZE );
737+ assert (to_lookup.size () == actual_sample_size );
732738 size_t distinct_lookup;
733739 size_t distinct_add;
734740 std::cout << " checking match size... " << std::flush;
@@ -749,14 +755,11 @@ int main(int argc, char * argv[]) {
749755 cout << " WARNING: Filter contains " << (to_add.size () - distinct_add) << " duplicates." << endl;
750756 hasduplicates = true ;
751757 }
752- size_t actual_sample_size = SAMPLE_SIZE;
753- if (actual_sample_size > add_count) {
754- cout << " WARNING: Your set contains only " << add_count << " . We can't very well support a sample size of " << SAMPLE_SIZE << endl;
755- actual_sample_size = add_count;
756- }
758+
757759
758760 if (actual_sample_size > to_lookup.size ()) {
759- throw out_of_range (" to_lookup must contain at least SAMPLE_SIZE values" );
761+ std::cerr << " actual_sample_size = " << actual_sample_size << std::endl;
762+ throw out_of_range (" to_lookup must contain at least actual_sample_size values" );
760763 }
761764
762765 std::vector<samples_t > mixed_sets;
@@ -952,42 +955,44 @@ int main(int argc, char * argv[]) {
952955 auto cf = FilterBenchmark<
953956 XorFilter2<uint64_t , uint16_t , NBitArray<uint16_t , 10 >, SimpleMixSplit>>(
954957 add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true );
955- cout << setw (NAME_WIDTH) << " Xor10 " << cf << endl;
958+ cout << setw (NAME_WIDTH) << names[ 22 ] << cf << endl;
956959 }
957960
961+
958962 if (algorithmId == 23 || (algos.find (23 ) != algos.end ())) {
959963 auto cf = FilterBenchmark<
960964 XorFilter2<uint64_t , uint16_t , NBitArray<uint16_t , 14 >, SimpleMixSplit>>(
961965 add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true );
962- cout << setw (NAME_WIDTH) << " Xor14 " << cf << endl;
966+ cout << setw (NAME_WIDTH) << names[ 23 ] << cf << endl;
963967 }
964968
969+
965970 if (algorithmId == 24 || (algos.find (24 ) != algos.end ())) {
966971 auto cf = FilterBenchmark<
967972 XorFilter2<uint64_t , uint32_t , UInt10Array, SimpleMixSplit>>(
968973 add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true );
969- cout << setw (NAME_WIDTH) << " Xor10.x " << cf << endl;
974+ cout << setw (NAME_WIDTH) << names[ 24 ] << cf << endl;
970975 }
971976
972977 if (algorithmId == 25 || (algos.find (25 ) != algos.end ())) {
973978 auto cf = FilterBenchmark<
974979 XorFilter10<uint64_t , SimpleMixSplit>>(
975980 add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true );
976- cout << setw (NAME_WIDTH) << " Xor10 " << cf << endl;
981+ cout << setw (NAME_WIDTH) << names[ 25 ] << cf << endl;
977982 }
978983
979984 if (algorithmId == 26 || (algos.find (26 ) != algos.end ())) {
980985 auto cf = FilterBenchmark<
981986 XorFilter10_666<uint64_t , SimpleMixSplit>>(
982987 add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true );
983- cout << setw (NAME_WIDTH) << " Xor10.666 " << cf << endl;
988+ cout << setw (NAME_WIDTH) << names[ 26 ] << cf << endl;
984989 }
985990
986991 if (algorithmId == 27 || (algos.find (27 ) != algos.end ())) {
987992 auto cf = FilterBenchmark<
988993 XorFilter13<uint64_t , SimpleMixSplit>>(
989994 add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true );
990- cout << setw (NAME_WIDTH) << " Xor13 " << cf << endl;
995+ cout << setw (NAME_WIDTH) << names[ 27 ] << cf << endl;
991996 }
992997
993998 if (algorithmId == 37 || algorithmId < 0 || (algos.find (37 ) != algos.end ())) {
0 commit comments