@@ -418,6 +418,20 @@ Statistics FilterBenchmark(
418418
419419 Table filter = FilterAPI<Table>::ConstructFromAddCount (add_count);
420420 Statistics result;
421+ #ifdef __linux__
422+ vector<int > evts;
423+ evts.push_back (PERF_COUNT_HW_CPU_CYCLES);
424+ evts.push_back (PERF_COUNT_HW_INSTRUCTIONS);
425+ evts.push_back (PERF_COUNT_HW_CACHE_MISSES);
426+ evts.push_back (PERF_COUNT_HW_BRANCH_MISSES);
427+ LinuxEvents<PERF_TYPE_HARDWARE> unified (evts);
428+ vector<unsigned long long > results;
429+ results.resize (evts.size ());
430+ cout << endl;
431+ unified.start ();
432+ #else
433+ std::cout << " -" << std::flush;
434+ #endif
421435
422436 // Add values until failure or until we run out of values to add:
423437 auto start_time = NowNanos ();
@@ -433,54 +447,51 @@ Statistics FilterBenchmark(
433447 }
434448 std::cout << " \r \r " << std::flush;
435449
436-
437450 // sanity check:
438451 for (size_t added = 0 ; added < add_count; ++added) {
439452 assert (FilterAPI<Table>::Contain (to_add[added], &filter) == 1 );
440453 }
441454 auto time = NowNanos () - start_time;
455+
456+ #ifdef __linux__
457+ unified.end (results);
458+ printf (" cycles: %10.zu (%10.3f per key) instructions: %10.zu (%10.3f per key, %10.3f per cycle) cache misses: %10.zu (%10.3f per key) branch misses: %10.zu (%10.3f per key)\n " ,
459+ (size_t )results[0 ], results[0 ]*1.0 /to_lookup_mixed.size (), (size_t )results[1 ], results[1 ]*1.0 /to_lookup_mixed.size () , results[1 ]*1.0 /results[0 ], (size_t )results[2 ], results[2 ]*1.0 /to_lookup_mixed.size (),
460+ (size_t )results[3 ], results[3 ] * 1.0 /to_lookup_mixed.size ());
461+ #else
462+ std::cout << " ." << std::flush;
463+ #endif
464+
442465 result.add_count = add_count;
443466 result.nanos_per_add = static_cast <double >(time) / add_count;
444467 result.bits_per_item = static_cast <double >(CHAR_BIT * filter.SizeInBytes ()) / add_count;
445468 size_t found_count = 0 ;
446- #ifdef __linux__
447- vector<int > evts;
448- evts.push_back (PERF_COUNT_HW_CPU_CYCLES);
449- evts.push_back (PERF_COUNT_HW_INSTRUCTIONS);
450- evts.push_back (PERF_COUNT_HW_CACHE_MISSES);
451- evts.push_back (PERF_COUNT_HW_BRANCH_MISSES);
452- LinuxEvents<PERF_TYPE_HARDWARE> unified (evts);
453- vector<unsigned long long > results;
454- results.resize (evts.size ());
455- cout << endl;
456- #endif
457469
458470 for (auto t : mixed_sets) {
459471 const double found_probability = t.found_probability ;
460472 const auto to_lookup_mixed = t.to_lookup_mixed ;
461473 size_t true_match = t.true_match ;
462474
463- const auto start_time = NowNanos ();
464- found_count = 0 ;
465475#ifdef __linux__
466476 unified.start ();
467477#else
468478 std::cout << " -" << std::flush;
469479#endif
480+ const auto start_time = NowNanos ();
481+ found_count = 0 ;
470482 for (const auto v : to_lookup_mixed) {
471483 found_count += FilterAPI<Table>::Contain (v, &filter);
472484 }
485+ const auto lookup_time = NowNanos () - start_time;
473486#ifdef __linux__
474487 unified.end (results);
475- printf (" cycles = %10.zu (cycles per key %10.3f) instructions = %10.zu (ins/key %10.3f,ins/cycles %10.3f) cache misses = %10.zu (misses per keys %10.3f) branch misses = %10.zu (misses per keys %10.3f) \n " ,
488+ printf (" cycles: %10.zu (%10.3f per key ) instructions: %10.zu (%10.3f per key, %10.3f per cycle ) cache misses: %10.zu (%10.3f per key ) branch misses: %10.zu (%10.3f per key) \n " ,
476489 (size_t )results[0 ], results[0 ]*1.0 /to_lookup_mixed.size (), (size_t )results[1 ], results[1 ]*1.0 /to_lookup_mixed.size () , results[1 ]*1.0 /results[0 ], (size_t )results[2 ], results[2 ]*1.0 /to_lookup_mixed.size (),
477490 (size_t )results[3 ], results[3 ] * 1.0 /to_lookup_mixed.size ());
478491#else
479492 std::cout << " ." << std::flush;
480493#endif
481494
482- const auto lookup_time = NowNanos () - start_time;
483-
484495 if (found_count < true_match) {
485496 cerr << " ERROR: Expected to find at least " << true_match << " found " << found_count << endl;
486497 cerr << " ERROR: This is a potential bug!" << endl;
0 commit comments