@@ -57,7 +57,7 @@ using namespace gqfilter;
5757#endif
5858
5959// The number of items sampled when determining the lookup performance
60- const size_t SAMPLE_SIZE = 10 * 1000 * 1000 ;
60+ const size_t MAX_SAMPLE_SIZE = 10 * 1000 * 1000 ;
6161
6262// The statistics gathered for each table type:
6363struct Statistics {
@@ -509,9 +509,13 @@ Statistics FilterBenchmark(
509509
510510#ifdef __linux__
511511 unified.end (results);
512- printf (" cycles: %10.zu (%10.3f per key) instructions: %10.zu (%10.3f per key, %10.3f per cycle) cache misses: %10.zu (%10.3f per key) branch misses: %10.zu (%10.3f per key)\n " ,
513- (size_t )results[0 ], results[0 ]*1.0 /add_count, (size_t )results[1 ], results[1 ]*1.0 /add_count , results[1 ]*1.0 /results[0 ], (size_t )results[2 ], results[2 ]*1.0 /add_count,
514- (size_t )results[3 ], results[3 ] * 1.0 /add_count);
512+ printf (" adds " );
513+ printf (" cycles: %4.1f/key, instructions: (%4.1f/key, %4.1f/cycle) cache misses: %4.2f/key branch misses: %4.2f/key\n " ,
514+ results[0 ]*1.0 /add_count,
515+ results[1 ]*1.0 /add_count ,
516+ results[1 ]*1.0 /results[0 ],
517+ results[2 ]*1.0 /add_count,
518+ results[3 ] * 1.0 /add_count);
515519#else
516520 std::cout << " ." << std::flush;
517521#endif
@@ -539,9 +543,13 @@ Statistics FilterBenchmark(
539543 const auto lookup_time = NowNanos () - start_time;
540544#ifdef __linux__
541545 unified.end (results);
542- printf (" cycles: %10.zu (%10.3f per key) instructions: %10.zu (%10.3f per key, %10.3f per cycle) cache misses: %10.zu (%10.3f per key) branch misses: %10.zu (%10.3f per key)\n " ,
543- (size_t )results[0 ], results[0 ]*1.0 /to_lookup_mixed.size (), (size_t )results[1 ], results[1 ]*1.0 /to_lookup_mixed.size () , results[1 ]*1.0 /results[0 ], (size_t )results[2 ], results[2 ]*1.0 /to_lookup_mixed.size (),
544- (size_t )results[3 ], results[3 ] * 1.0 /to_lookup_mixed.size ());
546+ printf (" %3.2f%% " ,found_probability);
547+ printf (" cycles: %4.1f/key, instructions: (%4.1f/key, %4.1f/cycle) cache misses: %4.2f/key branch misses: %4.1f/key\n " ,
548+ results[0 ]*1.0 /to_lookup_mixed.size (),
549+ results[1 ]*1.0 /to_lookup_mixed.size (),
550+ results[1 ]*1.0 /results[0 ],
551+ results[2 ]*1.0 /to_lookup_mixed.size (),
552+ results[3 ] * 1.0 /to_lookup_mixed.size ());
545553#else
546554 std::cout << " ." << std::flush;
547555#endif
@@ -652,18 +660,21 @@ int main(int argc, char * argv[]) {
652660 {5 ," Cuckoo16" }, {6 ," CuckooSemiSort13" }, {7 ," Bloom8" },
653661 {8 ," Bloom12" }, {9 ," Bloom16" }, {10 ," BlockedBloom" },
654662 {11 ," sort" }, {12 ," Xor+8" }, {13 ," Xor+16" },
655- {14 ," GCS" }, {15 ," CQF" }, {25 , " Xor10" }, {37 ," Bloom8 (addall)" },
663+ {14 ," GCS" }, {15 ," CQF" }, {22 , " Xor10 (NBitArray)" }, {23 , " Xor14 (NBitArray)" },
664+ {25 , " Xor10" },{26 , " Xor10.666" }, {37 ," Bloom8 (addall)" },
656665 {38 ," Bloom12 (addall)" },{39 ," Bloom16 (addall)" },
657666 {40 ," BlockedBloom (addall)" }
658667 };
668+
659669 if (argc < 2 ) {
660670 cout << " Usage: " << argv[0 ] << " <numberOfEntries> [<algorithmId> [<seed>]]" << endl;
661671 cout << " numberOfEntries: number of keys, we recommend at least 100000000" << endl;
662- cout << " algorithmId: -1 for all ( default) , or 0..n to only run this algorithm" << endl;
672+ cout << " algorithmId: -1 for all default algos , or 0..n to only run this algorithm" << endl;
663673 cout << " algorithmId: can also be a comma-separated list of non-negative integers" << endl;
664674 for (auto i : names) {
665675 cout << " " << i.first << " : " << i.second << endl;
666676 }
677+ cout << " algorithmId: can also be set to the string 'all' if you want to run them all, including some that are excluded by default" << endl;
667678 cout << " seed: seed for the PRNG; -1 for random seed (default)" << endl;
668679 return 1 ;
669680 }
@@ -674,14 +685,19 @@ int main(int argc, char * argv[]) {
674685 cerr << " Invalid number: " << argv[1 ];
675686 return 2 ;
676687 }
677- int algorithmId = -1 ;
688+ int algorithmId = -1 ; // -1 is just the default
678689 std::set<int > algos;
679690 if (argc > 2 ) {
680- if (strstr (argv[2 ]," ," ) != NULL ) {
691+ if (strcmp (argv[2 ]," all" ) == 0 ) {
692+ for (auto i : names) {// we add all the named algos.
693+ algos.insert (i.first );
694+ }
695+ } else if (strstr (argv[2 ]," ," ) != NULL ) {
681696 // we have a list of algos
682697 algorithmId = 9999999 ; // disabling
683698 parse_comma_separated (argv[2 ], algos);
684699 } else {
700+ // we select just one
685701 stringstream input_string_2 (argv[2 ]);
686702 input_string_2 >> algorithmId;
687703 if (input_string_2.fail ()) {
@@ -699,12 +715,16 @@ int main(int argc, char * argv[]) {
699715 return 2 ;
700716 }
701717 }
718+ size_t actual_sample_size = MAX_SAMPLE_SIZE;
719+ if (actual_sample_size > add_count) {
720+ actual_sample_size = add_count;
721+ }
702722 vector<uint64_t > to_add = seed == -1 ?
703723 GenerateRandom64Fast (add_count, rand ()) :
704724 GenerateRandom64Fast (add_count, seed);
705725 vector<uint64_t > to_lookup = seed == -1 ?
706- GenerateRandom64Fast (SAMPLE_SIZE , rand ()) :
707- GenerateRandom64Fast (SAMPLE_SIZE , seed + add_count);
726+ GenerateRandom64Fast (actual_sample_size , rand ()) :
727+ GenerateRandom64Fast (actual_sample_size , seed + add_count);
708728
709729 if (seed >= 0 && seed < 64 ) {
710730 // 0-64 are special seeds
@@ -728,7 +748,7 @@ int main(int argc, char * argv[]) {
728748 }
729749 }
730750
731- assert (to_lookup.size () == SAMPLE_SIZE );
751+ assert (to_lookup.size () == actual_sample_size );
732752 size_t distinct_lookup;
733753 size_t distinct_add;
734754 std::cout << " checking match size... " << std::flush;
@@ -749,14 +769,11 @@ int main(int argc, char * argv[]) {
749769 cout << " WARNING: Filter contains " << (to_add.size () - distinct_add) << " duplicates." << endl;
750770 hasduplicates = true ;
751771 }
752- size_t actual_sample_size = SAMPLE_SIZE;
753- if (actual_sample_size > add_count) {
754- cout << " WARNING: Your set contains only " << add_count << " . We can't very well support a sample size of " << SAMPLE_SIZE << endl;
755- actual_sample_size = add_count;
756- }
772+
757773
758774 if (actual_sample_size > to_lookup.size ()) {
759- throw out_of_range (" to_lookup must contain at least SAMPLE_SIZE values" );
775+ std::cerr << " actual_sample_size = " << actual_sample_size << std::endl;
776+ throw out_of_range (" to_lookup must contain at least actual_sample_size values" );
760777 }
761778
762779 std::vector<samples_t > mixed_sets;
@@ -952,42 +969,44 @@ int main(int argc, char * argv[]) {
952969 auto cf = FilterBenchmark<
953970 XorFilter2<uint64_t , uint16_t , NBitArray<uint16_t , 10 >, SimpleMixSplit>>(
954971 add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true );
955- cout << setw (NAME_WIDTH) << " Xor10 " << cf << endl;
972+ cout << setw (NAME_WIDTH) << names[ 22 ] << cf << endl;
956973 }
957974
975+
958976 if (algorithmId == 23 || (algos.find (23 ) != algos.end ())) {
959977 auto cf = FilterBenchmark<
960978 XorFilter2<uint64_t , uint16_t , NBitArray<uint16_t , 14 >, SimpleMixSplit>>(
961979 add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true );
962- cout << setw (NAME_WIDTH) << " Xor14 " << cf << endl;
980+ cout << setw (NAME_WIDTH) << names[ 23 ] << cf << endl;
963981 }
964982
965- if (algorithmId == 24 || (algos.find (24 ) != algos.end ())) {
983+ // this algo overflows and crashes
984+ /* if (algorithmId == 24 || (algos.find(24) != algos.end())) {
966985 auto cf = FilterBenchmark<
967986 XorFilter2<uint64_t, uint32_t, UInt10Array, SimpleMixSplit>>(
968987 add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true);
969- cout << setw (NAME_WIDTH) << " Xor10.x " << cf << endl;
970- }
988+ cout << setw(NAME_WIDTH) << names[24] << cf << endl;
989+ }*/
971990
972991 if (algorithmId == 25 || (algos.find (25 ) != algos.end ())) {
973992 auto cf = FilterBenchmark<
974993 XorFilter10<uint64_t , SimpleMixSplit>>(
975994 add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true );
976- cout << setw (NAME_WIDTH) << " Xor10 " << cf << endl;
995+ cout << setw (NAME_WIDTH) << names[ 25 ] << cf << endl;
977996 }
978997
979998 if (algorithmId == 26 || (algos.find (26 ) != algos.end ())) {
980999 auto cf = FilterBenchmark<
9811000 XorFilter10_666<uint64_t , SimpleMixSplit>>(
9821001 add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true );
983- cout << setw (NAME_WIDTH) << " Xor10.666 " << cf << endl;
1002+ cout << setw (NAME_WIDTH) << names[ 26 ] << cf << endl;
9841003 }
9851004
9861005 if (algorithmId == 27 || (algos.find (27 ) != algos.end ())) {
9871006 auto cf = FilterBenchmark<
9881007 XorFilter13<uint64_t , SimpleMixSplit>>(
9891008 add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true );
990- cout << setw (NAME_WIDTH) << " Xor13 " << cf << endl;
1009+ cout << setw (NAME_WIDTH) << names[ 27 ] << cf << endl;
9911010 }
9921011
9931012 if (algorithmId == 37 || algorithmId < 0 || (algos.find (37 ) != algos.end ())) {
0 commit comments