33
33
// see https://semver.org/
34
34
#define ANKERL_NANOBENCH_VERSION_MAJOR 4 // incompatible API changes
35
35
#define ANKERL_NANOBENCH_VERSION_MINOR 3 // backwards-compatible changes
36
- #define ANKERL_NANOBENCH_VERSION_PATCH 4 // backwards-compatible bug fixes
36
+ #define ANKERL_NANOBENCH_VERSION_PATCH 6 // backwards-compatible bug fixes
37
37
38
38
// /////////////////////////////////////////////////////////////////////////////////////////////////
39
39
// public facing api - as minimal as possible
88
88
} while (0 )
89
89
#endif
90
90
91
- #if defined(__linux__) && defined(PERF_EVENT_IOC_ID) && defined(PERF_COUNT_HW_REF_CPU_CYCLES) && defined(PERF_FLAG_FD_CLOEXEC) && \
92
- !defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS)
93
- // only enable perf counters on kernel 3.14 which seems to have all the necessary defines. The three PERF_... defines are not in
94
- // kernel 2.6.32 (all others are).
95
- # define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS () 1
96
- #else
97
- # define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS () 0
91
+ #define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS () 0
92
+ #if defined(__linux__) && !defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS)
93
+ # include < linux/version.h>
94
+ # if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
95
+ // PERF_COUNT_HW_REF_CPU_CYCLES only available since kernel 3.3
96
+ // PERF_FLAG_FD_CLOEXEC since kernel 3.14
97
+ # undef ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS
98
+ # define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS () 1
99
+ # endif
98
100
#endif
99
101
100
102
#if defined(__clang__)
@@ -2210,20 +2212,20 @@ struct IterationLogic::Impl {
2210
2212
columns.emplace_back (10 , 1 , " err%" , " %" , rErrorMedian * 100.0 );
2211
2213
2212
2214
double rInsMedian = -1.0 ;
2213
- if (mResult .has (Result::Measure::instructions)) {
2215
+ if (mBench . performanceCounters () && mResult .has (Result::Measure::instructions)) {
2214
2216
rInsMedian = mResult .median (Result::Measure::instructions);
2215
2217
columns.emplace_back (18 , 2 , " ins/" + mBench .unit (), " " , rInsMedian / mBench .batch ());
2216
2218
}
2217
2219
2218
2220
double rCycMedian = -1.0 ;
2219
- if (mResult .has (Result::Measure::cpucycles)) {
2221
+ if (mBench . performanceCounters () && mResult .has (Result::Measure::cpucycles)) {
2220
2222
rCycMedian = mResult .median (Result::Measure::cpucycles);
2221
2223
columns.emplace_back (18 , 2 , " cyc/" + mBench .unit (), " " , rCycMedian / mBench .batch ());
2222
2224
}
2223
2225
if (rInsMedian > 0.0 && rCycMedian > 0.0 ) {
2224
2226
columns.emplace_back (9 , 3 , " IPC" , " " , rCycMedian <= 0.0 ? 0.0 : rInsMedian / rCycMedian);
2225
2227
}
2226
- if (mResult .has (Result::Measure::branchinstructions)) {
2228
+ if (mBench . performanceCounters () && mResult .has (Result::Measure::branchinstructions)) {
2227
2229
double rBraMedian = mResult .median (Result::Measure::branchinstructions);
2228
2230
columns.emplace_back (17 , 2 , " bra/" + mBench .unit (), " " , rBraMedian / mBench .batch ());
2229
2231
if (mResult .has (Result::Measure::branchmisses)) {
@@ -2402,6 +2404,14 @@ class LinuxPerformanceCounters {
2402
2404
return (a + divisor / 2 ) / divisor;
2403
2405
}
2404
2406
2407
+ ANKERL_NANOBENCH_NO_SANITIZE (" integer" , " undefined" )
2408
+ static inline uint32_t mix (uint32_t x) noexcept {
2409
+ x ^= x << 13 ;
2410
+ x ^= x >> 17 ;
2411
+ x ^= x << 5 ;
2412
+ return x;
2413
+ }
2414
+
2405
2415
template <typename Op>
2406
2416
ANKERL_NANOBENCH_NO_SANITIZE (" integer" , " undefined" )
2407
2417
void calibrate (Op&& op) {
@@ -2441,15 +2451,10 @@ class LinuxPerformanceCounters {
2441
2451
uint64_t const numIters = 100000U + (std::random_device{}() & 3 );
2442
2452
uint64_t n = numIters;
2443
2453
uint32_t x = 1234567 ;
2444
- auto fn = [&]() {
2445
- x ^= x << 13 ;
2446
- x ^= x >> 17 ;
2447
- x ^= x << 5 ;
2448
- };
2449
2454
2450
2455
beginMeasure ();
2451
2456
while (n-- > 0 ) {
2452
- fn ( );
2457
+ x = mix (x );
2453
2458
}
2454
2459
endMeasure ();
2455
2460
detail::doNotOptimizeAway (x);
@@ -2459,8 +2464,8 @@ class LinuxPerformanceCounters {
2459
2464
beginMeasure ();
2460
2465
while (n-- > 0 ) {
2461
2466
// we now run *twice* so we can easily calculate the overhead
2462
- fn ( );
2463
- fn ( );
2467
+ x = mix (x );
2468
+ x = mix (x );
2464
2469
}
2465
2470
endMeasure ();
2466
2471
detail::doNotOptimizeAway (x);
0 commit comments