|
8 | 8 | #include <assert.h>
|
9 | 9 | #include <iostream>
|
10 | 10 | #include <iomanip>
|
| 11 | +#include <algorithm> |
| 12 | +#include <regex> |
| 13 | +#include <numeric> |
11 | 14 |
|
12 |
| -benchmark::BenchRunner::BenchmarkMap &benchmark::BenchRunner::benchmarks() { |
13 |
| - static std::map<std::string, benchmark::BenchFunction> benchmarks_map; |
14 |
| - return benchmarks_map; |
| 15 | +void benchmark::ConsolePrinter::header() |
| 16 | +{ |
| 17 | + std::cout << "# Benchmark, evals, iterations, total, min, max, median" << std::endl; |
15 | 18 | }
|
16 | 19 |
|
17 |
| -benchmark::BenchRunner::BenchRunner(std::string name, benchmark::BenchFunction func) |
| 20 | +void benchmark::ConsolePrinter::result(const State& state) |
18 | 21 | {
|
19 |
| - benchmarks().insert(std::make_pair(name, func)); |
| 22 | + auto results = state.m_elapsed_results; |
| 23 | + std::sort(results.begin(), results.end()); |
| 24 | + |
| 25 | + double total = state.m_num_iters * std::accumulate(results.begin(), results.end(), 0.0); |
| 26 | + |
| 27 | + double front = 0; |
| 28 | + double back = 0; |
| 29 | + double median = 0; |
| 30 | + |
| 31 | + if (!results.empty()) { |
| 32 | + front = results.front(); |
| 33 | + back = results.back(); |
| 34 | + |
| 35 | + size_t mid = results.size() / 2; |
| 36 | + median = results[mid]; |
| 37 | + if (0 == results.size() % 2) { |
| 38 | + median = (results[mid] + results[mid + 1]) / 2; |
| 39 | + } |
| 40 | + } |
| 41 | + |
| 42 | + std::cout << std::setprecision(6); |
| 43 | + std::cout << state.m_name << ", " << state.m_num_evals << ", " << state.m_num_iters << ", " << total << ", " << front << ", " << back << ", " << median << std::endl; |
20 | 44 | }
|
21 | 45 |
|
22 |
| -void |
23 |
| -benchmark::BenchRunner::RunAll(benchmark::duration elapsedTimeForOne) |
| 46 | +void benchmark::ConsolePrinter::footer() {} |
| 47 | +benchmark::PlotlyPrinter::PlotlyPrinter(std::string plotly_url, int64_t width, int64_t height) |
| 48 | + : m_plotly_url(plotly_url), m_width(width), m_height(height) |
24 | 49 | {
|
25 |
| - perf_init(); |
26 |
| - if (std::ratio_less_equal<benchmark::clock::period, std::micro>::value) { |
27 |
| - std::cerr << "WARNING: Clock precision is worse than microsecond - benchmarks may be less accurate!\n"; |
28 |
| - } |
29 |
| - std::cout << "#Benchmark" << "," << "count" << "," << "min(ns)" << "," << "max(ns)" << "," << "average(ns)" << "," |
30 |
| - << "min_cycles" << "," << "max_cycles" << "," << "average_cycles" << "\n"; |
| 50 | +} |
31 | 51 |
|
32 |
| - for (const auto &p: benchmarks()) { |
33 |
| - State state(p.first, elapsedTimeForOne); |
34 |
| - p.second(state); |
35 |
| - } |
36 |
| - perf_fini(); |
| 52 | +void benchmark::PlotlyPrinter::header() |
| 53 | +{ |
| 54 | + std::cout << "<html><head>" |
| 55 | + << "<script src=\"" << m_plotly_url << "\"></script>" |
| 56 | + << "</head><body><div id=\"myDiv\" style=\"width:" << m_width << "px; height:" << m_height << "px\"></div>" |
| 57 | + << "<script> var data = [" |
| 58 | + << std::endl; |
37 | 59 | }
|
38 | 60 |
|
39 |
| -bool benchmark::State::KeepRunning() |
| 61 | +void benchmark::PlotlyPrinter::result(const State& state) |
40 | 62 | {
|
41 |
| - if (count & countMask) { |
42 |
| - ++count; |
43 |
| - return true; |
| 63 | + std::cout << "{ " << std::endl |
| 64 | + << " name: '" << state.m_name << "', " << std::endl |
| 65 | + << " y: ["; |
| 66 | + |
| 67 | + const char* prefix = ""; |
| 68 | + for (const auto& e : state.m_elapsed_results) { |
| 69 | + std::cout << prefix << std::setprecision(6) << e; |
| 70 | + prefix = ", "; |
44 | 71 | }
|
45 |
| - time_point now; |
| 72 | + std::cout << "]," << std::endl |
| 73 | + << " boxpoints: 'all', jitter: 0.3, pointpos: 0, type: 'box'," |
| 74 | + << std::endl |
| 75 | + << "}," << std::endl; |
| 76 | +} |
| 77 | + |
| 78 | +void benchmark::PlotlyPrinter::footer() |
| 79 | +{ |
| 80 | + std::cout << "]; var layout = { showlegend: false, yaxis: { rangemode: 'tozero', autorange: true } };" |
| 81 | + << "Plotly.newPlot('myDiv', data, layout);" |
| 82 | + << "</script></body></html>"; |
| 83 | +} |
46 | 84 |
|
47 |
| - uint64_t nowCycles; |
48 |
| - if (count == 0) { |
49 |
| - lastTime = beginTime = now = clock::now(); |
50 |
| - lastCycles = beginCycles = nowCycles = perf_cpucycles(); |
| 85 | + |
| 86 | +benchmark::BenchRunner::BenchmarkMap& benchmark::BenchRunner::benchmarks() |
| 87 | +{ |
| 88 | + static std::map<std::string, Bench> benchmarks_map; |
| 89 | + return benchmarks_map; |
| 90 | +} |
| 91 | + |
| 92 | +benchmark::BenchRunner::BenchRunner(std::string name, benchmark::BenchFunction func, uint64_t num_iters_for_one_second) |
| 93 | +{ |
| 94 | + benchmarks().insert(std::make_pair(name, Bench{func, num_iters_for_one_second})); |
| 95 | +} |
| 96 | + |
| 97 | +void benchmark::BenchRunner::RunAll(Printer& printer, uint64_t num_evals, double scaling, const std::string& filter, bool is_list_only) |
| 98 | +{ |
| 99 | + perf_init(); |
| 100 | + if (!std::ratio_less_equal<benchmark::clock::period, std::micro>::value) { |
| 101 | + std::cerr << "WARNING: Clock precision is worse than microsecond - benchmarks may be less accurate!\n"; |
51 | 102 | }
|
52 |
| - else { |
53 |
| - now = clock::now(); |
54 |
| - auto elapsed = now - lastTime; |
55 |
| - auto elapsedOne = elapsed / (countMask + 1); |
56 |
| - if (elapsedOne < minTime) minTime = elapsedOne; |
57 |
| - if (elapsedOne > maxTime) maxTime = elapsedOne; |
58 |
| - |
59 |
| - // We only use relative values, so don't have to handle 64-bit wrap-around specially |
60 |
| - nowCycles = perf_cpucycles(); |
61 |
| - uint64_t elapsedOneCycles = (nowCycles - lastCycles) / (countMask + 1); |
62 |
| - if (elapsedOneCycles < minCycles) minCycles = elapsedOneCycles; |
63 |
| - if (elapsedOneCycles > maxCycles) maxCycles = elapsedOneCycles; |
64 |
| - |
65 |
| - if (elapsed*128 < maxElapsed) { |
66 |
| - // If the execution was much too fast (1/128th of maxElapsed), increase the count mask by 8x and restart timing. |
67 |
| - // The restart avoids including the overhead of this code in the measurement. |
68 |
| - countMask = ((countMask<<3)|7) & ((1LL<<60)-1); |
69 |
| - count = 0; |
70 |
| - minTime = duration::max(); |
71 |
| - maxTime = duration::zero(); |
72 |
| - minCycles = std::numeric_limits<uint64_t>::max(); |
73 |
| - maxCycles = std::numeric_limits<uint64_t>::min(); |
74 |
| - return true; |
| 103 | + |
| 104 | + std::regex reFilter(filter); |
| 105 | + std::smatch baseMatch; |
| 106 | + |
| 107 | + printer.header(); |
| 108 | + |
| 109 | + for (const auto& p : benchmarks()) { |
| 110 | + if (!std::regex_match(p.first, baseMatch, reFilter)) { |
| 111 | + continue; |
| 112 | + } |
| 113 | + |
| 114 | + uint64_t num_iters = static_cast<uint64_t>(p.second.num_iters_for_one_second * scaling); |
| 115 | + if (0 == num_iters) { |
| 116 | + num_iters = 1; |
75 | 117 | }
|
76 |
| - if (elapsed*16 < maxElapsed) { |
77 |
| - uint64_t newCountMask = ((countMask<<1)|1) & ((1LL<<60)-1); |
78 |
| - if ((count & newCountMask)==0) { |
79 |
| - countMask = newCountMask; |
80 |
| - } |
| 118 | + State state(p.first, num_evals, num_iters, printer); |
| 119 | + if (!is_list_only) { |
| 120 | + p.second.func(state); |
81 | 121 | }
|
| 122 | + printer.result(state); |
82 | 123 | }
|
83 |
| - lastTime = now; |
84 |
| - lastCycles = nowCycles; |
85 |
| - ++count; |
86 | 124 |
|
87 |
| - if (now - beginTime < maxElapsed) return true; // Keep going |
| 125 | + printer.footer(); |
88 | 126 |
|
89 |
| - --count; |
| 127 | + perf_fini(); |
| 128 | +} |
90 | 129 |
|
91 |
| - assert(count != 0 && "count == 0 => (now == 0 && beginTime == 0) => return above"); |
| 130 | +bool benchmark::State::UpdateTimer(const benchmark::time_point current_time) |
| 131 | +{ |
| 132 | + if (m_start_time != time_point()) { |
| 133 | + std::chrono::duration<double> diff = current_time - m_start_time; |
| 134 | + m_elapsed_results.push_back(diff.count() / m_num_iters); |
92 | 135 |
|
93 |
| - // Output results |
94 |
| - // Duration casts are only necessary here because hardware with sub-nanosecond clocks |
95 |
| - // will lose precision. |
96 |
| - int64_t min_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(minTime).count(); |
97 |
| - int64_t max_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(maxTime).count(); |
98 |
| - int64_t avg_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>((now-beginTime)/count).count(); |
99 |
| - int64_t averageCycles = (nowCycles-beginCycles)/count; |
100 |
| - std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << min_elapsed << "," << max_elapsed << "," << avg_elapsed << "," |
101 |
| - << minCycles << "," << maxCycles << "," << averageCycles << "\n"; |
102 |
| - std::cout.copyfmt(std::ios(nullptr)); |
| 136 | + if (m_elapsed_results.size() == m_num_evals) { |
| 137 | + return false; |
| 138 | + } |
| 139 | + } |
103 | 140 |
|
104 |
| - return false; |
| 141 | + m_num_iters_left = m_num_iters - 1; |
| 142 | + return true; |
105 | 143 | }
|
0 commit comments