Skip to content

Commit 4e798d5

Browse files
committed
more accurate benchmarks.
1 parent 59a6cf1 commit 4e798d5

File tree

4 files changed

+272
-446
lines changed

4 files changed

+272
-446
lines changed

benchmarks/benchmark.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ void evaluateProperties(const std::vector<T> &lines,
8686
}
8787

8888
struct diy_float_t {
89-
uint64_t significand;
89+
diy_float_t(uint64_t significand, int exponent, bool is_negative)
90+
: significand(significand), exponent(exponent), is_negative(is_negative) {}
91+
uint64_t significand;
9092
int exponent;
9193
bool is_negative;
9294
};

benchmarks/benchutil.h

Lines changed: 48 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -1,136 +1,72 @@
11
#ifndef BENCHUTIL_H
22
#define BENCHUTIL_H
33

4+
#include "counters/event_counter.h"
45
#include <cfloat>
56
#include <cstdio>
67

7-
#if defined(__linux__) || (__APPLE__ && __aarch64__)
8-
#define USING_COUNTERS
9-
#include "counters/event_counter.h"
10-
#else
11-
#include <chrono>
12-
#endif
8+
#include <atomic>
9+
event_collector collector;
1310

14-
#ifdef USING_COUNTERS
15-
template <class T, class Func>
16-
std::vector<event_count> time_it_ns(const std::vector<T> &lines,
17-
Func&& function, size_t repeat) {
18-
std::vector<event_count> aggregate;
19-
event_collector collector;
20-
bool printed_bug = false;
21-
for (size_t i = 0; i < repeat; i++) {
11+
template <class function_type>
12+
event_aggregate bench(const function_type &&function, size_t min_repeat = 10,
13+
size_t min_time_ns = 100000000,
14+
size_t max_repeat = 1000000) {
15+
event_aggregate aggregate{};
16+
size_t N = min_repeat;
17+
if (N == 0) {
18+
N = 1;
19+
}
20+
for (size_t i = 0; i < N; i++) {
21+
std::atomic_thread_fence(std::memory_order_acquire);
2222
collector.start();
23-
if (function(lines) == 0 && !printed_bug) {
24-
printf("bug\n");
25-
printed_bug = true;
23+
function();
24+
std::atomic_thread_fence(std::memory_order_release);
25+
event_count allocate_count = collector.end();
26+
aggregate << allocate_count;
27+
if ((i + 1 == N) && (aggregate.total_elapsed_ns() < min_time_ns) &&
28+
(N < max_repeat)) {
29+
N *= 10;
2630
}
27-
aggregate.push_back(collector.end());
2831
}
2932
return aggregate;
3033
}
3134

3235
template <class T, class Func>
3336
void pretty_print(const std::vector<T> &lines, const std::string &name,
34-
Func&& function, size_t repeat = 100) {
37+
Func &&function, size_t repeat = 100) {
3538
const size_t number_of_floats = lines.size();
3639
const double volume = static_cast<double>(function(lines));
37-
const double volumeMB = volume / (1024. * 1024.);
38-
const std::vector<event_count> events = time_it_ns(lines, function, repeat);
39-
double average_ns{0};
40-
double min_ns{DBL_MAX};
41-
double cycles_min{DBL_MAX};
42-
double instructions_min{DBL_MAX};
43-
double cycles_avg{0};
44-
double instructions_avg{0};
45-
double branches_min{0};
46-
double branches_avg{0};
47-
double branch_misses_min{0};
48-
double branch_misses_avg{0};
49-
for (event_count e : events) {
50-
const double ns = e.elapsed_ns();
51-
average_ns += ns;
52-
min_ns = std::min(min_ns, ns);
53-
54-
const double cycles = e.cycles();
55-
cycles_avg += cycles;
56-
cycles_min = std::min(cycles_min, cycles);
57-
58-
const double instructions = e.instructions();
59-
instructions_avg += instructions;
60-
instructions_min = std::min(instructions_min, instructions);
61-
62-
const double branches = e.branches();
63-
branches_avg += branches;
64-
branches_min = std::min(branches_min, branches);
65-
66-
const double branch_misses = e.missed_branches();
67-
branch_misses_avg += branch_misses;
68-
branch_misses_min = std::min(branch_misses_min, branch_misses);
69-
}
70-
cycles_avg /= events.size();
71-
instructions_avg /= events.size();
72-
average_ns /= events.size();
73-
branches_avg /= events.size();
40+
const double volumeMB = volume / 1'000'000;
41+
auto agg = bench([&function, &lines]() { return function(lines); }, repeat);
7442

7543
printf("%-30s: %8.2f MB/s (+/- %.1f %%) ", name.data(),
76-
volumeMB * 1000000000 / min_ns,
77-
(average_ns - min_ns) * 100.0 / average_ns);
44+
volumeMB * 1000'000'000 / agg.fastest_elapsed_ns(),
45+
(agg.elapsed_ns() - agg.fastest_elapsed_ns()) * 100.0 /
46+
agg.elapsed_ns());
7847
printf("%8.2f MB ", volumeMB);
79-
printf("%8.2f Mfloat/s ", number_of_floats * 1000 / min_ns);
80-
if (instructions_min > 0) {
81-
printf(" %8.2f i/B %8.2f i/f (+/- %.1f %%) ", instructions_min / volume,
82-
instructions_min / number_of_floats,
83-
(instructions_avg - instructions_min) * 100.0 / instructions_avg);
48+
printf(" %8.2f ns/f ", agg.fastest_elapsed_ns() / number_of_floats);
49+
printf("%8.2f Mfloat/s\n",
50+
number_of_floats * 1000 / agg.fastest_elapsed_ns());
51+
if (collector.has_events()) {
52+
printf(" ");
53+
printf(" %8.2f i/B %8.2f i/f (+/- %.1f %%) ",
54+
agg.fastest_instructions() / volume,
55+
agg.fastest_instructions() / number_of_floats,
56+
(agg.instructions() - agg.fastest_instructions()) * 100.0 /
57+
agg.instructions());
8458

85-
printf(" %8.2f c/B %8.2f c/f (+/- %.1f %%) ", cycles_min / volume,
86-
cycles_min / number_of_floats,
87-
(cycles_avg - cycles_min) * 100.0 / cycles_avg);
88-
printf(" %8.2f i/c ", instructions_min / cycles_min);
89-
printf(" %8.2f b/f ", branches_avg / number_of_floats);
90-
printf(" %8.2f bm/f ", branch_misses_avg / number_of_floats);
91-
printf(" %8.2f GHz ", cycles_min / min_ns);
92-
}
93-
printf("\n");
94-
}
95-
#else
96-
template <class T, class Func>
97-
std::pair<double, double> time_it_ns(const std::vector<T> &lines,
98-
Func&& function, size_t repeat) {
99-
typename std::chrono::high_resolution_clock::time_point t1, t2;
100-
double average = 0;
101-
double min_value = DBL_MAX;
102-
bool printed_bug = false;
103-
for (size_t i = 0; i < repeat; i++) {
104-
t1 = std::chrono::high_resolution_clock::now();
105-
if (function(lines) == 0 && !printed_bug) {
106-
printf("bug\n");
107-
printed_bug = true;
108-
}
109-
t2 = std::chrono::high_resolution_clock::now();
110-
const double dif =
111-
std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
112-
average += dif;
113-
min_value = std::min(min_value, dif);
59+
printf(" %8.2f c/B %8.2f c/f (+/- %.1f %%)\n", agg.fastest_cycles() / volume,
60+
agg.fastest_cycles() / number_of_floats,
61+
(agg.cycles() - agg.fastest_cycles()) * 100.0 / agg.cycles());
62+
printf(" ");
63+
printf(" %8.2f i/c ", agg.fastest_instructions() / agg.fastest_cycles());
64+
printf(" %8.2f b/f ", agg.branches() / number_of_floats);
65+
printf(" ");
66+
printf(" %8.2f bm/f ", agg.branch_misses() / number_of_floats);
67+
printf(" %8.2f GHz ", agg.fastest_cycles() / agg.fastest_elapsed_ns());
68+
printf("\n");
11469
}
115-
average /= repeat;
116-
return std::make_pair(min_value, average);
70+
11771
}
118-
119-
template <class T, class Func>
120-
void pretty_print(const std::vector<T> &lines, const std::string &name,
121-
Func&& function, size_t repeat = 100) {
122-
const size_t number_of_floats = lines.size();
123-
const double volume = static_cast<double>(function(lines));
124-
const double volumeMB = volume / (1024. * 1024.);
125-
const std::pair<double, double> result = time_it_ns(lines, function, repeat);
126-
127-
printf("%-30s: %8.2f MB/s (+/- %.1f %%) ", name.data(),
128-
volumeMB * 1000000000 / result.first,
129-
(result.second - result.first) * 100.0 / result.second);
130-
printf("%8.2f MB ", volumeMB);
131-
printf("%8.2f Mfloat/s ", number_of_floats * 1000 / result.first);
132-
printf(" %8.2f ns/f \n", double(result.first) / number_of_floats);
133-
}
134-
135-
#endif
13672
#endif //// BENCHUTIL_H

0 commit comments

Comments
 (0)