Skip to content

Commit eb2ab3d

Browse files
committed
Merge bitcoin/bitcoin#28877: bench: Update nanobench to 4.3.11
fe434a4 bench: Update nanobench to 4.3.11 (TheCharlatan) Pull request description: The newest version fixes the false positive `* Turbo is enabled, CPU frequency will fluctuate` warning on AMD CPUs. The file was directly taken from the release page: https://github.com/martinus/nanobench/releases/tag/v4.3.11. Other changes from the release notes: * Check for failures in parseFile(), perf events tweaks by tommi-cujo in martinus/nanobench#84 * Workaround missing noexcept for std::string move assignment by tommi-cujo in martinus/nanobench#87 * removed the link by martinus in martinus/nanobench#89 * Lots of minor cleanups by martinus in martinus/nanobench#85 * Add linter for version & clang-format. Updated version by martinus in martinus/nanobench#90 ACKs for top commit: fanquake: ACK fe434a4 - have not tested. Tree-SHA512: a8f15e1db1d993673e4b295a3bab22e67ee3c9f3c0bcbef28974fe9ff37dbb741967a526088d5b148c8d25c9d57cd3b844238100c17b23038638787461805678
2 parents b217b80 + fe434a4 commit eb2ab3d

File tree

1 file changed

+56
-42
lines changed

1 file changed

+56
-42
lines changed

src/bench/nanobench.h

Lines changed: 56 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
// see https://semver.org/
3434
#define ANKERL_NANOBENCH_VERSION_MAJOR 4 // incompatible API changes
3535
#define ANKERL_NANOBENCH_VERSION_MINOR 3 // backwards-compatible changes
36-
#define ANKERL_NANOBENCH_VERSION_PATCH 10 // backwards-compatible bug fixes
36+
#define ANKERL_NANOBENCH_VERSION_PATCH 11 // backwards-compatible bug fixes
3737

3838
///////////////////////////////////////////////////////////////////////////////////////////////////
3939
// public facing api - as minimal as possible
@@ -120,6 +120,10 @@
120120
# define ANKERL_NANOBENCH_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
121121
#endif
122122

123+
// noexcept may be missing for std::string.
124+
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58265
125+
#define ANKERL_NANOBENCH_PRIVATE_NOEXCEPT_STRING_MOVE() std::is_nothrow_move_assignable<std::string>::value
126+
123127
// declarations ///////////////////////////////////////////////////////////////////////////////////
124128

125129
namespace ankerl {
@@ -404,7 +408,7 @@ struct Config {
404408
Config();
405409
~Config();
406410
Config& operator=(Config const& other);
407-
Config& operator=(Config&& other) noexcept;
411+
Config& operator=(Config&& other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE));
408412
Config(Config const& other);
409413
Config(Config&& other) noexcept;
410414
};
@@ -430,7 +434,7 @@ class Result {
430434

431435
~Result();
432436
Result& operator=(Result const& other);
433-
Result& operator=(Result&& other) noexcept;
437+
Result& operator=(Result&& other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE));
434438
Result(Result const& other);
435439
Result(Result&& other) noexcept;
436440

@@ -596,7 +600,7 @@ class Rng final {
596600
*
597601
* @return Vector containing the full state:
598602
*/
599-
std::vector<uint64_t> state() const;
603+
ANKERL_NANOBENCH(NODISCARD) std::vector<uint64_t> state() const;
600604

601605
private:
602606
static constexpr uint64_t rotl(uint64_t x, unsigned k) noexcept;
@@ -628,7 +632,7 @@ class Bench {
628632
Bench();
629633

630634
Bench(Bench&& other) noexcept;
631-
Bench& operator=(Bench&& other) noexcept;
635+
Bench& operator=(Bench&& other) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE));
632636
Bench(Bench const& other);
633637
Bench& operator=(Bench const& other);
634638
~Bench() noexcept;
@@ -818,7 +822,7 @@ class Bench {
818822
* Default is zero, so we are fully relying on clockResolutionMultiple(). In most cases this is exactly what you want. If you see
819823
* that the evaluation is unreliable with a high `err%`, you can increase either minEpochTime() or minEpochIterations().
820824
*
821-
* @see maxEpochTim), minEpochIterations
825+
* @see maxEpochTime, minEpochIterations
822826
*
823827
* @param t Minimum time each epoch should take.
824828
*/
@@ -1030,7 +1034,7 @@ void doNotOptimizeAway(T const& val);
10301034

10311035
// These assembly magic is directly from what Google Benchmark is doing. I have previously used what facebook's folly was doing, but
10321036
// this seemed to have compilation problems in some cases. Google Benchmark seemed to be the most well tested anyways.
1033-
// see https://github.com/google/benchmark/blob/master/include/benchmark/benchmark.h#L307
1037+
// see https://github.com/google/benchmark/blob/v1.7.1/include/benchmark/benchmark.h#L443-L446
10341038
template <typename T>
10351039
void doNotOptimizeAway(T const& val) {
10361040
// NOLINTNEXTLINE(hicpp-no-assembler)
@@ -1781,7 +1785,7 @@ bool isEndlessRunning(std::string const& name);
17811785
bool isWarningsEnabled();
17821786

17831787
template <typename T>
1784-
T parseFile(std::string const& filename);
1788+
T parseFile(std::string const& filename, bool* fail);
17851789

17861790
void gatherStabilityInformation(std::vector<std::string>& warnings, std::vector<std::string>& recommendations);
17871791
void printStabilityInformationOnce(std::ostream* outStream);
@@ -1839,7 +1843,7 @@ class Number {
18391843
public:
18401844
Number(int width, int precision, double value);
18411845
Number(int width, int precision, int64_t value);
1842-
std::string to_s() const;
1846+
ANKERL_NANOBENCH(NODISCARD) std::string to_s() const;
18431847

18441848
private:
18451849
friend std::ostream& operator<<(std::ostream& os, Number const& n);
@@ -1857,11 +1861,11 @@ std::ostream& operator<<(std::ostream& os, Number const& n);
18571861

18581862
class MarkDownColumn {
18591863
public:
1860-
MarkDownColumn(int w, int prec, std::string tit, std::string suff, double val);
1861-
std::string title() const;
1862-
std::string separator() const;
1863-
std::string invalid() const;
1864-
std::string value() const;
1864+
MarkDownColumn(int w, int prec, std::string tit, std::string suff, double val) noexcept;
1865+
ANKERL_NANOBENCH(NODISCARD) std::string title() const;
1866+
ANKERL_NANOBENCH(NODISCARD) std::string separator() const;
1867+
ANKERL_NANOBENCH(NODISCARD) std::string invalid() const;
1868+
ANKERL_NANOBENCH(NODISCARD) std::string value() const;
18651869

18661870
private:
18671871
int mWidth;
@@ -1976,20 +1980,23 @@ PerformanceCounters& performanceCounters() {
19761980
}
19771981

19781982
// Windows version of doNotOptimizeAway
1979-
// see https://github.com/google/benchmark/blob/master/include/benchmark/benchmark.h#L307
1980-
// see https://github.com/facebook/folly/blob/master/folly/Benchmark.h#L280
1981-
// see https://docs.microsoft.com/en-us/cpp/preprocessor/optimize
1983+
// see https://github.com/google/benchmark/blob/v1.7.1/include/benchmark/benchmark.h#L514
1984+
// see https://github.com/facebook/folly/blob/v2023.01.30.00/folly/lang/Hint-inl.h#L54-L58
1985+
// see https://learn.microsoft.com/en-us/cpp/preprocessor/optimize
19821986
# if defined(_MSC_VER)
19831987
# pragma optimize("", off)
19841988
void doNotOptimizeAwaySink(void const*) {}
19851989
# pragma optimize("", on)
19861990
# endif
19871991

19881992
template <typename T>
1989-
T parseFile(std::string const& filename) {
1993+
T parseFile(std::string const& filename, bool* fail) {
19901994
std::ifstream fin(filename); // NOLINT(misc-const-correctness)
19911995
T num{};
19921996
fin >> num;
1997+
if (fail != nullptr) {
1998+
*fail = fin.fail();
1999+
}
19932000
return num;
19942001
}
19952002

@@ -2032,16 +2039,15 @@ void gatherStabilityInformation(std::vector<std::string>& warnings, std::vector<
20322039
if (nprocs <= 0) {
20332040
warnings.emplace_back("couldn't figure out number of processors - no governor, turbo check possible");
20342041
} else {
2035-
20362042
// check frequency scaling
20372043
for (long id = 0; id < nprocs; ++id) {
20382044
auto idStr = detail::fmt::to_s(static_cast<uint64_t>(id));
20392045
auto sysCpu = "/sys/devices/system/cpu/cpu" + idStr;
2040-
auto minFreq = parseFile<int64_t>(sysCpu + "/cpufreq/scaling_min_freq");
2041-
auto maxFreq = parseFile<int64_t>(sysCpu + "/cpufreq/scaling_max_freq");
2046+
auto minFreq = parseFile<int64_t>(sysCpu + "/cpufreq/scaling_min_freq", nullptr);
2047+
auto maxFreq = parseFile<int64_t>(sysCpu + "/cpufreq/scaling_max_freq", nullptr);
20422048
if (minFreq != maxFreq) {
2043-
auto minMHz = static_cast<double>(minFreq) / 1000.0;
2044-
auto maxMHz = static_cast<double>(maxFreq) / 1000.0;
2049+
auto minMHz = d(minFreq) / 1000.0;
2050+
auto maxMHz = d(maxFreq) / 1000.0;
20452051
warnings.emplace_back("CPU frequency scaling enabled: CPU " + idStr + " between " +
20462052
detail::fmt::Number(1, 1, minMHz).to_s() + " and " + detail::fmt::Number(1, 1, maxMHz).to_s() +
20472053
" MHz");
@@ -2050,13 +2056,15 @@ void gatherStabilityInformation(std::vector<std::string>& warnings, std::vector<
20502056
}
20512057
}
20522058

2053-
auto currentGovernor = parseFile<std::string>("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor");
2054-
if ("performance" != currentGovernor) {
2059+
auto fail = false;
2060+
auto currentGovernor = parseFile<std::string>("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor", &fail);
2061+
if (!fail && "performance" != currentGovernor) {
20552062
warnings.emplace_back("CPU governor is '" + currentGovernor + "' but should be 'performance'");
20562063
recommendPyPerf = true;
20572064
}
20582065

2059-
if (0 == parseFile<int>("/sys/devices/system/cpu/intel_pstate/no_turbo")) {
2066+
auto noTurbo = parseFile<int>("/sys/devices/system/cpu/intel_pstate/no_turbo", &fail);
2067+
if (!fail && noTurbo == 0) {
20602068
warnings.emplace_back("Turbo is enabled, CPU frequency will fluctuate");
20612069
recommendPyPerf = true;
20622070
}
@@ -2250,10 +2258,9 @@ struct IterationLogic::Impl {
22502258
mNumIters = 0;
22512259
}
22522260

2253-
ANKERL_NANOBENCH_LOG(mBench.name() << ": " << detail::fmt::Number(20, 3, static_cast<double>(elapsed.count())) << " elapsed, "
2254-
<< detail::fmt::Number(20, 3, static_cast<double>(mTargetRuntimePerEpoch.count()))
2255-
<< " target. oldIters=" << oldIters << ", mNumIters=" << mNumIters
2256-
<< ", mState=" << static_cast<int>(mState));
2261+
ANKERL_NANOBENCH_LOG(mBench.name() << ": " << detail::fmt::Number(20, 3, d(elapsed.count())) << " elapsed, "
2262+
<< detail::fmt::Number(20, 3, d(mTargetRuntimePerEpoch.count())) << " target. oldIters="
2263+
<< oldIters << ", mNumIters=" << mNumIters << ", mState=" << static_cast<int>(mState));
22572264
}
22582265

22592266
// NOLINTNEXTLINE(readability-function-cognitive-complexity)
@@ -2357,7 +2364,7 @@ struct IterationLogic::Impl {
23572364
}
23582365
os << fmt::MarkDownCode(mBench.name());
23592366
if (showUnstable) {
2360-
auto avgIters = static_cast<double>(mTotalNumIters) / static_cast<double>(mBench.epochs());
2367+
auto avgIters = d(mTotalNumIters) / d(mBench.epochs());
23612368
// NOLINTNEXTLINE(bugprone-incorrect-roundings)
23622369
auto suggestedIters = static_cast<uint64_t>(avgIters * 10 + 0.5);
23632370

@@ -2435,7 +2442,7 @@ class LinuxPerformanceCounters {
24352442
bool monitor(perf_sw_ids swId, Target target);
24362443
bool monitor(perf_hw_id hwId, Target target);
24372444

2438-
bool hasError() const noexcept {
2445+
ANKERL_NANOBENCH(NODISCARD) bool hasError() const noexcept {
24392446
return mHasError;
24402447
}
24412448

@@ -2691,16 +2698,23 @@ PerformanceCounters::PerformanceCounters()
26912698
, mVal()
26922699
, mHas() {
26932700

2694-
mHas.pageFaults = mPc->monitor(PERF_COUNT_SW_PAGE_FAULTS, LinuxPerformanceCounters::Target(&mVal.pageFaults, true, false));
2701+
// HW events
26952702
mHas.cpuCycles = mPc->monitor(PERF_COUNT_HW_REF_CPU_CYCLES, LinuxPerformanceCounters::Target(&mVal.cpuCycles, true, false));
2696-
mHas.contextSwitches =
2697-
mPc->monitor(PERF_COUNT_SW_CONTEXT_SWITCHES, LinuxPerformanceCounters::Target(&mVal.contextSwitches, true, false));
2703+
if (!mHas.cpuCycles) {
2704+
// Fallback to cycles counter, reference cycles not available in many systems.
2705+
mHas.cpuCycles = mPc->monitor(PERF_COUNT_HW_CPU_CYCLES, LinuxPerformanceCounters::Target(&mVal.cpuCycles, true, false));
2706+
}
26982707
mHas.instructions = mPc->monitor(PERF_COUNT_HW_INSTRUCTIONS, LinuxPerformanceCounters::Target(&mVal.instructions, true, true));
26992708
mHas.branchInstructions =
27002709
mPc->monitor(PERF_COUNT_HW_BRANCH_INSTRUCTIONS, LinuxPerformanceCounters::Target(&mVal.branchInstructions, true, false));
27012710
mHas.branchMisses = mPc->monitor(PERF_COUNT_HW_BRANCH_MISSES, LinuxPerformanceCounters::Target(&mVal.branchMisses, true, false));
27022711
// mHas.branchMisses = false;
27032712

2713+
// SW events
2714+
mHas.pageFaults = mPc->monitor(PERF_COUNT_SW_PAGE_FAULTS, LinuxPerformanceCounters::Target(&mVal.pageFaults, true, false));
2715+
mHas.contextSwitches =
2716+
mPc->monitor(PERF_COUNT_SW_CONTEXT_SWITCHES, LinuxPerformanceCounters::Target(&mVal.contextSwitches, true, false));
2717+
27042718
mPc->start();
27052719
mPc->calibrate([] {
27062720
auto before = ankerl::nanobench::Clock::now();
@@ -2789,7 +2803,7 @@ void StreamStateRestorer::restore() {
27892803
Number::Number(int width, int precision, int64_t value)
27902804
: mWidth(width)
27912805
, mPrecision(precision)
2792-
, mValue(static_cast<double>(value)) {}
2806+
, mValue(d(value)) {}
27932807

27942808
Number::Number(int width, int precision, double value)
27952809
: mWidth(width)
@@ -2823,7 +2837,7 @@ std::ostream& operator<<(std::ostream& os, Number const& n) {
28232837
return n.write(os);
28242838
}
28252839

2826-
MarkDownColumn::MarkDownColumn(int w, int prec, std::string tit, std::string suff, double val)
2840+
MarkDownColumn::MarkDownColumn(int w, int prec, std::string tit, std::string suff, double val) noexcept
28272841
: mWidth(w)
28282842
, mPrecision(prec)
28292843
, mTitle(std::move(tit))
@@ -2884,14 +2898,14 @@ std::ostream& operator<<(std::ostream& os, MarkDownCode const& mdCode) {
28842898
Config::Config() = default;
28852899
Config::~Config() = default;
28862900
Config& Config::operator=(Config const&) = default;
2887-
Config& Config::operator=(Config&&) noexcept = default;
2901+
Config& Config::operator=(Config&&) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default;
28882902
Config::Config(Config const&) = default;
28892903
Config::Config(Config&&) noexcept = default;
28902904

28912905
// provide implementation here so it's only generated once
28922906
Result::~Result() = default;
28932907
Result& Result::operator=(Result const&) = default;
2894-
Result& Result::operator=(Result&&) noexcept = default;
2908+
Result& Result::operator=(Result&&) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default;
28952909
Result::Result(Result const&) = default;
28962910
Result::Result(Result&&) noexcept = default;
28972911

@@ -2992,7 +3006,7 @@ double Result::medianAbsolutePercentError(Measure m) const {
29923006
auto data = mNameToMeasurements[detail::u(m)];
29933007

29943008
// calculates MdAPE which is the median of percentage error
2995-
// see https://www.spiderfinancial.com/support/documentation/numxl/reference-manual/forecasting-performance/mdape
3009+
// see https://support.numxl.com/hc/en-us/articles/115001223503-MdAPE-Median-Absolute-Percentage-Error
29963010
auto med = calcMedian(data);
29973011

29983012
// transform the data to absolute error
@@ -3106,7 +3120,7 @@ Bench::Bench() {
31063120
}
31073121

31083122
Bench::Bench(Bench&&) noexcept = default;
3109-
Bench& Bench::operator=(Bench&&) noexcept = default;
3123+
Bench& Bench::operator=(Bench&&) noexcept(ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default;
31103124
Bench::Bench(Bench const&) = default;
31113125
Bench& Bench::operator=(Bench const&) = default;
31123126
Bench::~Bench() noexcept = default;
@@ -3423,7 +3437,7 @@ BigO::BigO(std::string bigOName, RangeMeasure const& rangeMeasure)
34233437
sumMeasure += rm.second;
34243438
}
34253439

3426-
auto n = static_cast<double>(rangeMeasure.size());
3440+
auto n = detail::d(rangeMeasure.size());
34273441
auto mean = sumMeasure / n;
34283442
mNormalizedRootMeanSquare = std::sqrt(err / n) / mean;
34293443
}

0 commit comments

Comments
 (0)