3333// see https://semver.org/
3434#define ANKERL_NANOBENCH_VERSION_MAJOR 4 // incompatible API changes
3535#define ANKERL_NANOBENCH_VERSION_MINOR 3 // backwards-compatible changes
36- #define ANKERL_NANOBENCH_VERSION_PATCH 10 // backwards-compatible bug fixes
36+ #define ANKERL_NANOBENCH_VERSION_PATCH 11 // backwards-compatible bug fixes
3737
3838// /////////////////////////////////////////////////////////////////////////////////////////////////
3939// public facing api - as minimal as possible
120120# define ANKERL_NANOBENCH_IS_TRIVIALLY_COPYABLE (...) std::is_trivially_copyable<__VA_ARGS__>::value
121121#endif
122122
123+ // noexcept may be missing for std::string.
124+ // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58265
125+ #define ANKERL_NANOBENCH_PRIVATE_NOEXCEPT_STRING_MOVE () std::is_nothrow_move_assignable<std::string>::value
126+
123127// declarations ///////////////////////////////////////////////////////////////////////////////////
124128
125129namespace ankerl {
@@ -404,7 +408,7 @@ struct Config {
404408 Config ();
405409 ~Config ();
406410 Config& operator =(Config const & other);
407- Config& operator =(Config&& other) noexcept ;
411+ Config& operator =(Config&& other) noexcept (ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) ;
408412 Config (Config const & other);
409413 Config (Config&& other) noexcept ;
410414};
@@ -430,7 +434,7 @@ class Result {
430434
431435 ~Result ();
432436 Result& operator =(Result const & other);
433- Result& operator =(Result&& other) noexcept ;
437+ Result& operator =(Result&& other) noexcept (ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) ;
434438 Result (Result const & other);
435439 Result (Result&& other) noexcept ;
436440
@@ -596,7 +600,7 @@ class Rng final {
596600 *
597601 * @return Vector containing the full state:
598602 */
599- std::vector<uint64_t > state () const ;
603+ ANKERL_NANOBENCH (NODISCARD) std::vector<uint64_t > state () const ;
600604
601605private:
602606 static constexpr uint64_t rotl (uint64_t x, unsigned k) noexcept ;
@@ -628,7 +632,7 @@ class Bench {
628632 Bench ();
629633
630634 Bench (Bench&& other) noexcept ;
631- Bench& operator =(Bench&& other) noexcept ;
635+ Bench& operator =(Bench&& other) noexcept (ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) ;
632636 Bench (Bench const & other);
633637 Bench& operator =(Bench const & other);
634638 ~Bench () noexcept ;
@@ -818,7 +822,7 @@ class Bench {
818822 * Default is zero, so we are fully relying on clockResolutionMultiple(). In most cases this is exactly what you want. If you see
819823 * that the evaluation is unreliable with a high `err%`, you can increase either minEpochTime() or minEpochIterations().
820824 *
821- * @see maxEpochTim) , minEpochIterations
825+ * @see maxEpochTime , minEpochIterations
822826 *
823827 * @param t Minimum time each epoch should take.
824828 */
@@ -1030,7 +1034,7 @@ void doNotOptimizeAway(T const& val);
10301034
10311035// These assembly magic is directly from what Google Benchmark is doing. I have previously used what facebook's folly was doing, but
10321036// this seemed to have compilation problems in some cases. Google Benchmark seemed to be the most well tested anyways.
1033- // see https://github.com/google/benchmark/blob/master /include/benchmark/benchmark.h#L307
1037+ // see https://github.com/google/benchmark/blob/v1.7.1 /include/benchmark/benchmark.h#L443-L446
10341038template <typename T>
10351039void doNotOptimizeAway (T const & val) {
10361040 // NOLINTNEXTLINE(hicpp-no-assembler)
@@ -1781,7 +1785,7 @@ bool isEndlessRunning(std::string const& name);
17811785bool isWarningsEnabled ();
17821786
17831787template <typename T>
1784- T parseFile (std::string const & filename);
1788+ T parseFile (std::string const & filename, bool * fail );
17851789
17861790void gatherStabilityInformation (std::vector<std::string>& warnings, std::vector<std::string>& recommendations);
17871791void printStabilityInformationOnce (std::ostream* outStream);
@@ -1839,7 +1843,7 @@ class Number {
18391843public:
18401844 Number (int width, int precision, double value);
18411845 Number (int width, int precision, int64_t value);
1842- std::string to_s () const ;
1846+ ANKERL_NANOBENCH (NODISCARD) std::string to_s () const ;
18431847
18441848private:
18451849 friend std::ostream& operator <<(std::ostream& os, Number const & n);
@@ -1857,11 +1861,11 @@ std::ostream& operator<<(std::ostream& os, Number const& n);
18571861
18581862class MarkDownColumn {
18591863public:
1860- MarkDownColumn (int w, int prec, std::string tit, std::string suff, double val);
1861- std::string title () const ;
1862- std::string separator () const ;
1863- std::string invalid () const ;
1864- std::string value () const ;
1864+ MarkDownColumn (int w, int prec, std::string tit, std::string suff, double val) noexcept ;
1865+ ANKERL_NANOBENCH (NODISCARD) std::string title () const ;
1866+ ANKERL_NANOBENCH (NODISCARD) std::string separator () const ;
1867+ ANKERL_NANOBENCH (NODISCARD) std::string invalid () const ;
1868+ ANKERL_NANOBENCH (NODISCARD) std::string value () const ;
18651869
18661870private:
18671871 int mWidth ;
@@ -1976,20 +1980,23 @@ PerformanceCounters& performanceCounters() {
19761980}
19771981
19781982// Windows version of doNotOptimizeAway
1979- // see https://github.com/google/benchmark/blob/master /include/benchmark/benchmark.h#L307
1980- // see https://github.com/facebook/folly/blob/master /folly/Benchmark .h#L280
1981- // see https://docs .microsoft.com/en-us/cpp/preprocessor/optimize
1983+ // see https://github.com/google/benchmark/blob/v1.7.1 /include/benchmark/benchmark.h#L514
1984+ // see https://github.com/facebook/folly/blob/v2023.01.30.00 /folly/lang/Hint-inl .h#L54-L58
1985+ // see https://learn .microsoft.com/en-us/cpp/preprocessor/optimize
19821986# if defined(_MSC_VER)
19831987# pragma optimize("", off)
19841988void doNotOptimizeAwaySink (void const *) {}
19851989# pragma optimize("", on)
19861990# endif
19871991
19881992template <typename T>
1989- T parseFile (std::string const & filename) {
1993+ T parseFile (std::string const & filename, bool * fail ) {
19901994 std::ifstream fin (filename); // NOLINT(misc-const-correctness)
19911995 T num{};
19921996 fin >> num;
1997+ if (fail != nullptr ) {
1998+ *fail = fin.fail ();
1999+ }
19932000 return num;
19942001}
19952002
@@ -2032,16 +2039,15 @@ void gatherStabilityInformation(std::vector<std::string>& warnings, std::vector<
20322039 if (nprocs <= 0 ) {
20332040 warnings.emplace_back (" couldn't figure out number of processors - no governor, turbo check possible" );
20342041 } else {
2035-
20362042 // check frequency scaling
20372043 for (long id = 0 ; id < nprocs; ++id) {
20382044 auto idStr = detail::fmt::to_s (static_cast <uint64_t >(id));
20392045 auto sysCpu = " /sys/devices/system/cpu/cpu" + idStr;
2040- auto minFreq = parseFile<int64_t >(sysCpu + " /cpufreq/scaling_min_freq" );
2041- auto maxFreq = parseFile<int64_t >(sysCpu + " /cpufreq/scaling_max_freq" );
2046+ auto minFreq = parseFile<int64_t >(sysCpu + " /cpufreq/scaling_min_freq" , nullptr );
2047+ auto maxFreq = parseFile<int64_t >(sysCpu + " /cpufreq/scaling_max_freq" , nullptr );
20422048 if (minFreq != maxFreq) {
2043- auto minMHz = static_cast < double > (minFreq) / 1000.0 ;
2044- auto maxMHz = static_cast < double > (maxFreq) / 1000.0 ;
2049+ auto minMHz = d (minFreq) / 1000.0 ;
2050+ auto maxMHz = d (maxFreq) / 1000.0 ;
20452051 warnings.emplace_back (" CPU frequency scaling enabled: CPU " + idStr + " between " +
20462052 detail::fmt::Number (1 , 1 , minMHz).to_s () + " and " + detail::fmt::Number (1 , 1 , maxMHz).to_s () +
20472053 " MHz" );
@@ -2050,13 +2056,15 @@ void gatherStabilityInformation(std::vector<std::string>& warnings, std::vector<
20502056 }
20512057 }
20522058
2053- auto currentGovernor = parseFile<std::string>(" /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor" );
2054- if (" performance" != currentGovernor) {
2059+ auto fail = false ;
2060+ auto currentGovernor = parseFile<std::string>(" /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor" , &fail);
2061+ if (!fail && " performance" != currentGovernor) {
20552062 warnings.emplace_back (" CPU governor is '" + currentGovernor + " ' but should be 'performance'" );
20562063 recommendPyPerf = true ;
20572064 }
20582065
2059- if (0 == parseFile<int >(" /sys/devices/system/cpu/intel_pstate/no_turbo" )) {
2066+ auto noTurbo = parseFile<int >(" /sys/devices/system/cpu/intel_pstate/no_turbo" , &fail);
2067+ if (!fail && noTurbo == 0 ) {
20602068 warnings.emplace_back (" Turbo is enabled, CPU frequency will fluctuate" );
20612069 recommendPyPerf = true ;
20622070 }
@@ -2250,10 +2258,9 @@ struct IterationLogic::Impl {
22502258 mNumIters = 0 ;
22512259 }
22522260
2253- ANKERL_NANOBENCH_LOG (mBench .name () << " : " << detail::fmt::Number (20 , 3 , static_cast <double >(elapsed.count ())) << " elapsed, "
2254- << detail::fmt::Number (20 , 3 , static_cast <double >(mTargetRuntimePerEpoch .count ()))
2255- << " target. oldIters=" << oldIters << " , mNumIters=" << mNumIters
2256- << " , mState=" << static_cast <int >(mState ));
2261+ ANKERL_NANOBENCH_LOG (mBench .name () << " : " << detail::fmt::Number (20 , 3 , d (elapsed.count ())) << " elapsed, "
2262+ << detail::fmt::Number (20 , 3 , d (mTargetRuntimePerEpoch .count ())) << " target. oldIters="
2263+ << oldIters << " , mNumIters=" << mNumIters << " , mState=" << static_cast <int >(mState ));
22572264 }
22582265
22592266 // NOLINTNEXTLINE(readability-function-cognitive-complexity)
@@ -2357,7 +2364,7 @@ struct IterationLogic::Impl {
23572364 }
23582365 os << fmt::MarkDownCode (mBench .name ());
23592366 if (showUnstable) {
2360- auto avgIters = static_cast < double > (mTotalNumIters ) / static_cast < double > (mBench .epochs ());
2367+ auto avgIters = d (mTotalNumIters ) / d (mBench .epochs ());
23612368 // NOLINTNEXTLINE(bugprone-incorrect-roundings)
23622369 auto suggestedIters = static_cast <uint64_t >(avgIters * 10 + 0.5 );
23632370
@@ -2435,7 +2442,7 @@ class LinuxPerformanceCounters {
24352442 bool monitor (perf_sw_ids swId, Target target);
24362443 bool monitor (perf_hw_id hwId, Target target);
24372444
2438- bool hasError () const noexcept {
2445+ ANKERL_NANOBENCH (NODISCARD) bool hasError () const noexcept {
24392446 return mHasError ;
24402447 }
24412448
@@ -2691,16 +2698,23 @@ PerformanceCounters::PerformanceCounters()
26912698 , mVal()
26922699 , mHas() {
26932700
2694- mHas . pageFaults = mPc -> monitor (PERF_COUNT_SW_PAGE_FAULTS, LinuxPerformanceCounters::Target (& mVal . pageFaults , true , false ));
2701+ // HW events
26952702 mHas .cpuCycles = mPc ->monitor (PERF_COUNT_HW_REF_CPU_CYCLES, LinuxPerformanceCounters::Target (&mVal .cpuCycles , true , false ));
2696- mHas .contextSwitches =
2697- mPc ->monitor (PERF_COUNT_SW_CONTEXT_SWITCHES, LinuxPerformanceCounters::Target (&mVal .contextSwitches , true , false ));
2703+ if (!mHas .cpuCycles ) {
2704+ // Fallback to cycles counter, reference cycles not available in many systems.
2705+ mHas .cpuCycles = mPc ->monitor (PERF_COUNT_HW_CPU_CYCLES, LinuxPerformanceCounters::Target (&mVal .cpuCycles , true , false ));
2706+ }
26982707 mHas .instructions = mPc ->monitor (PERF_COUNT_HW_INSTRUCTIONS, LinuxPerformanceCounters::Target (&mVal .instructions , true , true ));
26992708 mHas .branchInstructions =
27002709 mPc ->monitor (PERF_COUNT_HW_BRANCH_INSTRUCTIONS, LinuxPerformanceCounters::Target (&mVal .branchInstructions , true , false ));
27012710 mHas .branchMisses = mPc ->monitor (PERF_COUNT_HW_BRANCH_MISSES, LinuxPerformanceCounters::Target (&mVal .branchMisses , true , false ));
27022711 // mHas.branchMisses = false;
27032712
2713+ // SW events
2714+ mHas .pageFaults = mPc ->monitor (PERF_COUNT_SW_PAGE_FAULTS, LinuxPerformanceCounters::Target (&mVal .pageFaults , true , false ));
2715+ mHas .contextSwitches =
2716+ mPc ->monitor (PERF_COUNT_SW_CONTEXT_SWITCHES, LinuxPerformanceCounters::Target (&mVal .contextSwitches , true , false ));
2717+
27042718 mPc ->start ();
27052719 mPc ->calibrate ([] {
27062720 auto before = ankerl::nanobench::Clock::now ();
@@ -2789,7 +2803,7 @@ void StreamStateRestorer::restore() {
27892803Number::Number (int width, int precision, int64_t value)
27902804 : mWidth (width)
27912805 , mPrecision (precision)
2792- , mValue (static_cast < double > (value)) {}
2806+ , mValue (d (value)) {}
27932807
27942808Number::Number (int width, int precision, double value)
27952809 : mWidth (width)
@@ -2823,7 +2837,7 @@ std::ostream& operator<<(std::ostream& os, Number const& n) {
28232837 return n.write (os);
28242838}
28252839
2826- MarkDownColumn::MarkDownColumn (int w, int prec, std::string tit, std::string suff, double val)
2840+ MarkDownColumn::MarkDownColumn (int w, int prec, std::string tit, std::string suff, double val) noexcept
28272841 : mWidth (w)
28282842 , mPrecision (prec)
28292843 , mTitle (std::move(tit))
@@ -2884,14 +2898,14 @@ std::ostream& operator<<(std::ostream& os, MarkDownCode const& mdCode) {
28842898Config::Config () = default ;
28852899Config::~Config () = default ;
28862900Config& Config::operator =(Config const &) = default ;
2887- Config& Config::operator =(Config&&) noexcept = default ;
2901+ Config& Config::operator =(Config&&) noexcept (ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default ;
28882902Config::Config (Config const &) = default ;
28892903Config::Config (Config&&) noexcept = default ;
28902904
28912905// provide implementation here so it's only generated once
28922906Result::~Result () = default ;
28932907Result& Result::operator =(Result const &) = default ;
2894- Result& Result::operator =(Result&&) noexcept = default ;
2908+ Result& Result::operator =(Result&&) noexcept (ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default ;
28952909Result::Result (Result const &) = default ;
28962910Result::Result (Result&&) noexcept = default ;
28972911
@@ -2992,7 +3006,7 @@ double Result::medianAbsolutePercentError(Measure m) const {
29923006 auto data = mNameToMeasurements [detail::u (m)];
29933007
29943008 // calculates MdAPE which is the median of percentage error
2995- // see https://www.spiderfinancial .com/support/documentation/numxl/reference-manual/forecasting-performance/mdape
3009+ // see https://support.numxl .com/hc/en-us/articles/115001223503-MdAPE-Median-Absolute-Percentage-Error
29963010 auto med = calcMedian (data);
29973011
29983012 // transform the data to absolute error
@@ -3106,7 +3120,7 @@ Bench::Bench() {
31063120}
31073121
31083122Bench::Bench (Bench&&) noexcept = default ;
3109- Bench& Bench::operator =(Bench&&) noexcept = default ;
3123+ Bench& Bench::operator =(Bench&&) noexcept (ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default ;
31103124Bench::Bench (Bench const &) = default ;
31113125Bench& Bench::operator =(Bench const &) = default ;
31123126Bench::~Bench () noexcept = default ;
@@ -3423,7 +3437,7 @@ BigO::BigO(std::string bigOName, RangeMeasure const& rangeMeasure)
34233437 sumMeasure += rm.second ;
34243438 }
34253439
3426- auto n = static_cast < double > (rangeMeasure.size ());
3440+ auto n = detail::d (rangeMeasure.size ());
34273441 auto mean = sumMeasure / n;
34283442 mNormalizedRootMeanSquare = std::sqrt (err / n) / mean;
34293443}
0 commit comments