33
33
// see https://semver.org/
34
34
#define ANKERL_NANOBENCH_VERSION_MAJOR 4 // incompatible API changes
35
35
#define ANKERL_NANOBENCH_VERSION_MINOR 3 // backwards-compatible changes
36
- #define ANKERL_NANOBENCH_VERSION_PATCH 10 // backwards-compatible bug fixes
36
+ #define ANKERL_NANOBENCH_VERSION_PATCH 11 // backwards-compatible bug fixes
37
37
38
38
// /////////////////////////////////////////////////////////////////////////////////////////////////
39
39
// public facing api - as minimal as possible
120
120
# define ANKERL_NANOBENCH_IS_TRIVIALLY_COPYABLE (...) std::is_trivially_copyable<__VA_ARGS__>::value
121
121
#endif
122
122
123
+ // noexcept may be missing for std::string.
124
+ // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58265
125
+ #define ANKERL_NANOBENCH_PRIVATE_NOEXCEPT_STRING_MOVE () std::is_nothrow_move_assignable<std::string>::value
126
+
123
127
// declarations ///////////////////////////////////////////////////////////////////////////////////
124
128
125
129
namespace ankerl {
@@ -404,7 +408,7 @@ struct Config {
404
408
Config ();
405
409
~Config ();
406
410
Config& operator =(Config const & other);
407
- Config& operator =(Config&& other) noexcept ;
411
+ Config& operator =(Config&& other) noexcept (ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) ;
408
412
Config (Config const & other);
409
413
Config (Config&& other) noexcept ;
410
414
};
@@ -430,7 +434,7 @@ class Result {
430
434
431
435
~Result ();
432
436
Result& operator =(Result const & other);
433
- Result& operator =(Result&& other) noexcept ;
437
+ Result& operator =(Result&& other) noexcept (ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) ;
434
438
Result (Result const & other);
435
439
Result (Result&& other) noexcept ;
436
440
@@ -596,7 +600,7 @@ class Rng final {
596
600
*
597
601
* @return Vector containing the full state:
598
602
*/
599
- std::vector<uint64_t > state () const ;
603
+ ANKERL_NANOBENCH (NODISCARD) std::vector<uint64_t > state () const ;
600
604
601
605
private:
602
606
static constexpr uint64_t rotl (uint64_t x, unsigned k) noexcept ;
@@ -628,7 +632,7 @@ class Bench {
628
632
Bench ();
629
633
630
634
Bench (Bench&& other) noexcept ;
631
- Bench& operator =(Bench&& other) noexcept ;
635
+ Bench& operator =(Bench&& other) noexcept (ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) ;
632
636
Bench (Bench const & other);
633
637
Bench& operator =(Bench const & other);
634
638
~Bench () noexcept ;
@@ -818,7 +822,7 @@ class Bench {
818
822
* Default is zero, so we are fully relying on clockResolutionMultiple(). In most cases this is exactly what you want. If you see
819
823
* that the evaluation is unreliable with a high `err%`, you can increase either minEpochTime() or minEpochIterations().
820
824
*
821
- * @see maxEpochTim) , minEpochIterations
825
+ * @see maxEpochTime , minEpochIterations
822
826
*
823
827
* @param t Minimum time each epoch should take.
824
828
*/
@@ -1030,7 +1034,7 @@ void doNotOptimizeAway(T const& val);
1030
1034
1031
1035
// These assembly magic is directly from what Google Benchmark is doing. I have previously used what facebook's folly was doing, but
1032
1036
// this seemed to have compilation problems in some cases. Google Benchmark seemed to be the most well tested anyways.
1033
- // see https://github.com/google/benchmark/blob/master /include/benchmark/benchmark.h#L307
1037
+ // see https://github.com/google/benchmark/blob/v1.7.1 /include/benchmark/benchmark.h#L443-L446
1034
1038
template <typename T>
1035
1039
void doNotOptimizeAway (T const & val) {
1036
1040
// NOLINTNEXTLINE(hicpp-no-assembler)
@@ -1781,7 +1785,7 @@ bool isEndlessRunning(std::string const& name);
1781
1785
bool isWarningsEnabled ();
1782
1786
1783
1787
template <typename T>
1784
- T parseFile (std::string const & filename);
1788
+ T parseFile (std::string const & filename, bool * fail );
1785
1789
1786
1790
void gatherStabilityInformation (std::vector<std::string>& warnings, std::vector<std::string>& recommendations);
1787
1791
void printStabilityInformationOnce (std::ostream* outStream);
@@ -1839,7 +1843,7 @@ class Number {
1839
1843
public:
1840
1844
Number (int width, int precision, double value);
1841
1845
Number (int width, int precision, int64_t value);
1842
- std::string to_s () const ;
1846
+ ANKERL_NANOBENCH (NODISCARD) std::string to_s () const ;
1843
1847
1844
1848
private:
1845
1849
friend std::ostream& operator <<(std::ostream& os, Number const & n);
@@ -1857,11 +1861,11 @@ std::ostream& operator<<(std::ostream& os, Number const& n);
1857
1861
1858
1862
class MarkDownColumn {
1859
1863
public:
1860
- MarkDownColumn (int w, int prec, std::string tit, std::string suff, double val);
1861
- std::string title () const ;
1862
- std::string separator () const ;
1863
- std::string invalid () const ;
1864
- std::string value () const ;
1864
+ MarkDownColumn (int w, int prec, std::string tit, std::string suff, double val) noexcept ;
1865
+ ANKERL_NANOBENCH (NODISCARD) std::string title () const ;
1866
+ ANKERL_NANOBENCH (NODISCARD) std::string separator () const ;
1867
+ ANKERL_NANOBENCH (NODISCARD) std::string invalid () const ;
1868
+ ANKERL_NANOBENCH (NODISCARD) std::string value () const ;
1865
1869
1866
1870
private:
1867
1871
int mWidth ;
@@ -1976,20 +1980,23 @@ PerformanceCounters& performanceCounters() {
1976
1980
}
1977
1981
1978
1982
// Windows version of doNotOptimizeAway
1979
- // see https://github.com/google/benchmark/blob/master /include/benchmark/benchmark.h#L307
1980
- // see https://github.com/facebook/folly/blob/master /folly/Benchmark .h#L280
1981
- // see https://docs .microsoft.com/en-us/cpp/preprocessor/optimize
1983
+ // see https://github.com/google/benchmark/blob/v1.7.1 /include/benchmark/benchmark.h#L514
1984
+ // see https://github.com/facebook/folly/blob/v2023.01.30.00 /folly/lang/Hint-inl .h#L54-L58
1985
+ // see https://learn .microsoft.com/en-us/cpp/preprocessor/optimize
1982
1986
# if defined(_MSC_VER)
1983
1987
# pragma optimize("", off)
1984
1988
void doNotOptimizeAwaySink (void const *) {}
1985
1989
# pragma optimize("", on)
1986
1990
# endif
1987
1991
1988
1992
template <typename T>
1989
- T parseFile (std::string const & filename) {
1993
+ T parseFile (std::string const & filename, bool * fail ) {
1990
1994
std::ifstream fin (filename); // NOLINT(misc-const-correctness)
1991
1995
T num{};
1992
1996
fin >> num;
1997
+ if (fail != nullptr ) {
1998
+ *fail = fin.fail ();
1999
+ }
1993
2000
return num;
1994
2001
}
1995
2002
@@ -2032,16 +2039,15 @@ void gatherStabilityInformation(std::vector<std::string>& warnings, std::vector<
2032
2039
if (nprocs <= 0 ) {
2033
2040
warnings.emplace_back (" couldn't figure out number of processors - no governor, turbo check possible" );
2034
2041
} else {
2035
-
2036
2042
// check frequency scaling
2037
2043
for (long id = 0 ; id < nprocs; ++id) {
2038
2044
auto idStr = detail::fmt::to_s (static_cast <uint64_t >(id));
2039
2045
auto sysCpu = " /sys/devices/system/cpu/cpu" + idStr;
2040
- auto minFreq = parseFile<int64_t >(sysCpu + " /cpufreq/scaling_min_freq" );
2041
- auto maxFreq = parseFile<int64_t >(sysCpu + " /cpufreq/scaling_max_freq" );
2046
+ auto minFreq = parseFile<int64_t >(sysCpu + " /cpufreq/scaling_min_freq" , nullptr );
2047
+ auto maxFreq = parseFile<int64_t >(sysCpu + " /cpufreq/scaling_max_freq" , nullptr );
2042
2048
if (minFreq != maxFreq) {
2043
- auto minMHz = static_cast < double > (minFreq) / 1000.0 ;
2044
- auto maxMHz = static_cast < double > (maxFreq) / 1000.0 ;
2049
+ auto minMHz = d (minFreq) / 1000.0 ;
2050
+ auto maxMHz = d (maxFreq) / 1000.0 ;
2045
2051
warnings.emplace_back (" CPU frequency scaling enabled: CPU " + idStr + " between " +
2046
2052
detail::fmt::Number (1 , 1 , minMHz).to_s () + " and " + detail::fmt::Number (1 , 1 , maxMHz).to_s () +
2047
2053
" MHz" );
@@ -2050,13 +2056,15 @@ void gatherStabilityInformation(std::vector<std::string>& warnings, std::vector<
2050
2056
}
2051
2057
}
2052
2058
2053
- auto currentGovernor = parseFile<std::string>(" /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor" );
2054
- if (" performance" != currentGovernor) {
2059
+ auto fail = false ;
2060
+ auto currentGovernor = parseFile<std::string>(" /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor" , &fail);
2061
+ if (!fail && " performance" != currentGovernor) {
2055
2062
warnings.emplace_back (" CPU governor is '" + currentGovernor + " ' but should be 'performance'" );
2056
2063
recommendPyPerf = true ;
2057
2064
}
2058
2065
2059
- if (0 == parseFile<int >(" /sys/devices/system/cpu/intel_pstate/no_turbo" )) {
2066
+ auto noTurbo = parseFile<int >(" /sys/devices/system/cpu/intel_pstate/no_turbo" , &fail);
2067
+ if (!fail && noTurbo == 0 ) {
2060
2068
warnings.emplace_back (" Turbo is enabled, CPU frequency will fluctuate" );
2061
2069
recommendPyPerf = true ;
2062
2070
}
@@ -2250,10 +2258,9 @@ struct IterationLogic::Impl {
2250
2258
mNumIters = 0 ;
2251
2259
}
2252
2260
2253
- ANKERL_NANOBENCH_LOG (mBench .name () << " : " << detail::fmt::Number (20 , 3 , static_cast <double >(elapsed.count ())) << " elapsed, "
2254
- << detail::fmt::Number (20 , 3 , static_cast <double >(mTargetRuntimePerEpoch .count ()))
2255
- << " target. oldIters=" << oldIters << " , mNumIters=" << mNumIters
2256
- << " , mState=" << static_cast <int >(mState ));
2261
+ ANKERL_NANOBENCH_LOG (mBench .name () << " : " << detail::fmt::Number (20 , 3 , d (elapsed.count ())) << " elapsed, "
2262
+ << detail::fmt::Number (20 , 3 , d (mTargetRuntimePerEpoch .count ())) << " target. oldIters="
2263
+ << oldIters << " , mNumIters=" << mNumIters << " , mState=" << static_cast <int >(mState ));
2257
2264
}
2258
2265
2259
2266
// NOLINTNEXTLINE(readability-function-cognitive-complexity)
@@ -2357,7 +2364,7 @@ struct IterationLogic::Impl {
2357
2364
}
2358
2365
os << fmt::MarkDownCode (mBench .name ());
2359
2366
if (showUnstable) {
2360
- auto avgIters = static_cast < double > (mTotalNumIters ) / static_cast < double > (mBench .epochs ());
2367
+ auto avgIters = d (mTotalNumIters ) / d (mBench .epochs ());
2361
2368
// NOLINTNEXTLINE(bugprone-incorrect-roundings)
2362
2369
auto suggestedIters = static_cast <uint64_t >(avgIters * 10 + 0.5 );
2363
2370
@@ -2435,7 +2442,7 @@ class LinuxPerformanceCounters {
2435
2442
bool monitor (perf_sw_ids swId, Target target);
2436
2443
bool monitor (perf_hw_id hwId, Target target);
2437
2444
2438
- bool hasError () const noexcept {
2445
+ ANKERL_NANOBENCH (NODISCARD) bool hasError () const noexcept {
2439
2446
return mHasError ;
2440
2447
}
2441
2448
@@ -2691,16 +2698,23 @@ PerformanceCounters::PerformanceCounters()
2691
2698
, mVal()
2692
2699
, mHas() {
2693
2700
2694
- mHas . pageFaults = mPc -> monitor (PERF_COUNT_SW_PAGE_FAULTS, LinuxPerformanceCounters::Target (& mVal . pageFaults , true , false ));
2701
+ // HW events
2695
2702
mHas .cpuCycles = mPc ->monitor (PERF_COUNT_HW_REF_CPU_CYCLES, LinuxPerformanceCounters::Target (&mVal .cpuCycles , true , false ));
2696
- mHas .contextSwitches =
2697
- mPc ->monitor (PERF_COUNT_SW_CONTEXT_SWITCHES, LinuxPerformanceCounters::Target (&mVal .contextSwitches , true , false ));
2703
+ if (!mHas .cpuCycles ) {
2704
+ // Fallback to cycles counter, reference cycles not available in many systems.
2705
+ mHas .cpuCycles = mPc ->monitor (PERF_COUNT_HW_CPU_CYCLES, LinuxPerformanceCounters::Target (&mVal .cpuCycles , true , false ));
2706
+ }
2698
2707
mHas .instructions = mPc ->monitor (PERF_COUNT_HW_INSTRUCTIONS, LinuxPerformanceCounters::Target (&mVal .instructions , true , true ));
2699
2708
mHas .branchInstructions =
2700
2709
mPc ->monitor (PERF_COUNT_HW_BRANCH_INSTRUCTIONS, LinuxPerformanceCounters::Target (&mVal .branchInstructions , true , false ));
2701
2710
mHas .branchMisses = mPc ->monitor (PERF_COUNT_HW_BRANCH_MISSES, LinuxPerformanceCounters::Target (&mVal .branchMisses , true , false ));
2702
2711
// mHas.branchMisses = false;
2703
2712
2713
+ // SW events
2714
+ mHas .pageFaults = mPc ->monitor (PERF_COUNT_SW_PAGE_FAULTS, LinuxPerformanceCounters::Target (&mVal .pageFaults , true , false ));
2715
+ mHas .contextSwitches =
2716
+ mPc ->monitor (PERF_COUNT_SW_CONTEXT_SWITCHES, LinuxPerformanceCounters::Target (&mVal .contextSwitches , true , false ));
2717
+
2704
2718
mPc ->start ();
2705
2719
mPc ->calibrate ([] {
2706
2720
auto before = ankerl::nanobench::Clock::now ();
@@ -2789,7 +2803,7 @@ void StreamStateRestorer::restore() {
2789
2803
Number::Number (int width, int precision, int64_t value)
2790
2804
: mWidth (width)
2791
2805
, mPrecision (precision)
2792
- , mValue (static_cast < double > (value)) {}
2806
+ , mValue (d (value)) {}
2793
2807
2794
2808
Number::Number (int width, int precision, double value)
2795
2809
: mWidth (width)
@@ -2823,7 +2837,7 @@ std::ostream& operator<<(std::ostream& os, Number const& n) {
2823
2837
return n.write (os);
2824
2838
}
2825
2839
2826
- MarkDownColumn::MarkDownColumn (int w, int prec, std::string tit, std::string suff, double val)
2840
+ MarkDownColumn::MarkDownColumn (int w, int prec, std::string tit, std::string suff, double val) noexcept
2827
2841
: mWidth (w)
2828
2842
, mPrecision (prec)
2829
2843
, mTitle (std::move(tit))
@@ -2884,14 +2898,14 @@ std::ostream& operator<<(std::ostream& os, MarkDownCode const& mdCode) {
2884
2898
Config::Config () = default ;
2885
2899
Config::~Config () = default ;
2886
2900
Config& Config::operator =(Config const &) = default ;
2887
- Config& Config::operator =(Config&&) noexcept = default ;
2901
+ Config& Config::operator =(Config&&) noexcept (ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default ;
2888
2902
Config::Config (Config const &) = default ;
2889
2903
Config::Config (Config&&) noexcept = default ;
2890
2904
2891
2905
// provide implementation here so it's only generated once
2892
2906
Result::~Result () = default ;
2893
2907
Result& Result::operator =(Result const &) = default ;
2894
- Result& Result::operator =(Result&&) noexcept = default ;
2908
+ Result& Result::operator =(Result&&) noexcept (ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default ;
2895
2909
Result::Result (Result const &) = default ;
2896
2910
Result::Result (Result&&) noexcept = default ;
2897
2911
@@ -2992,7 +3006,7 @@ double Result::medianAbsolutePercentError(Measure m) const {
2992
3006
auto data = mNameToMeasurements [detail::u (m)];
2993
3007
2994
3008
// calculates MdAPE which is the median of percentage error
2995
- // see https://www.spiderfinancial .com/support/documentation/numxl/reference-manual/forecasting-performance/mdape
3009
+ // see https://support.numxl .com/hc/en-us/articles/115001223503-MdAPE-Median-Absolute-Percentage-Error
2996
3010
auto med = calcMedian (data);
2997
3011
2998
3012
// transform the data to absolute error
@@ -3106,7 +3120,7 @@ Bench::Bench() {
3106
3120
}
3107
3121
3108
3122
Bench::Bench (Bench&&) noexcept = default ;
3109
- Bench& Bench::operator =(Bench&&) noexcept = default ;
3123
+ Bench& Bench::operator =(Bench&&) noexcept (ANKERL_NANOBENCH(NOEXCEPT_STRING_MOVE)) = default ;
3110
3124
Bench::Bench (Bench const &) = default ;
3111
3125
Bench& Bench::operator =(Bench const &) = default ;
3112
3126
Bench::~Bench () noexcept = default ;
@@ -3423,7 +3437,7 @@ BigO::BigO(std::string bigOName, RangeMeasure const& rangeMeasure)
3423
3437
sumMeasure += rm.second ;
3424
3438
}
3425
3439
3426
- auto n = static_cast < double > (rangeMeasure.size ());
3440
+ auto n = detail::d (rangeMeasure.size ());
3427
3441
auto mean = sumMeasure / n;
3428
3442
mNormalizedRootMeanSquare = std::sqrt (err / n) / mean;
3429
3443
}
0 commit comments