Skip to content

Commit 03cb2b4

Browse files
committed
Merge bitcoin/bitcoin#23025: bench: update nanobench add -min_time
e148a52 bench: fixed ubsan implicit conversion (Martin Ankerl) da4e2f1 bench: various args improvements (Jon Atack) d312fd9 bench: clean up includes (Jon Atack) 1f10f16 bench: add usage description and documentation (Martin Ankerl) d3c6f8b bench: introduce -min_time argument (Martin Ankerl) 9fef832 bench: make EvictionProtection.* work with any number of iterations (Martin Ankerl) 153e686 bench: change AddrManGood to AddrManAddThenGood (Martin Ankerl) 468b232 bench: remove unnecessary & incorrect multiplication in MuHashDiv (Martin Ankerl) eed99cf bench: update nanobench from 4.3.4 to 4.3.6 (Martin Ankerl) Pull request description: This PR updates the nanobench with the latest release from upstream, v4.3.6. It fixes the missing performance counters. Due to discussions on #22999 I have done some work that should make the benchmark results more reliable. It introduces a new flag `-min_time` that allows to run a benchmark for much longer then the default. When results are unreliable, choosing a large timeframe here should usually get repeatable results even when frequency scaling cannot be disabled. The default is now 10ms. For this to work I have changed the `AddrManGood` and `EvictionProtection` benchmarks so they work with any number of iterations. Also, this adds more usage documentation to `bench_bitcoin -h` and I've cherry-picked two changes from #22999 authored by Jon Atack ACKs for top commit: jonatack: re-ACK e148a52 laanwj: Code review ACK e148a52 Tree-SHA512: 2da6de19a5c85ac234b190025e195c727546166dbb75e3f9267e667a73677ba1e29b7765877418a42b1407b65df901e0130763936525e6f1450f18f08837c40c
2 parents 01b5cfb + e148a52 commit 03cb2b4

File tree

9 files changed

+128
-72
lines changed

9 files changed

+128
-72
lines changed

src/bench/addrman.cpp

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -103,41 +103,33 @@ static void AddrManGetAddr(benchmark::Bench& bench)
103103
});
104104
}
105105

106-
static void AddrManGood(benchmark::Bench& bench)
106+
static void AddrManAddThenGood(benchmark::Bench& bench)
107107
{
108-
/* Create many CAddrMan objects - one to be modified at each loop iteration.
109-
* This is necessary because the CAddrMan::Good() method modifies the
110-
* object, affecting the timing of subsequent calls to the same method and
111-
* we want to do the same amount of work in every loop iteration. */
112-
113-
bench.epochs(5).epochIterations(1);
114-
const uint64_t addrman_count{bench.epochs() * bench.epochIterations()};
115-
Assert(addrman_count == 5U);
116-
117-
std::vector<std::unique_ptr<CAddrMan>> addrmans(addrman_count);
118-
for (size_t i{0}; i < addrman_count; ++i) {
119-
addrmans[i] = std::make_unique<CAddrMan>(/* asmap */ std::vector<bool>(), /* deterministic */ false, /* consistency_check_ratio */ 0);
120-
FillAddrMan(*addrmans[i]);
121-
}
122-
123108
auto markSomeAsGood = [](CAddrMan& addrman) {
124109
for (size_t source_i = 0; source_i < NUM_SOURCES; ++source_i) {
125110
for (size_t addr_i = 0; addr_i < NUM_ADDRESSES_PER_SOURCE; ++addr_i) {
126-
if (addr_i % 32 == 0) {
127-
addrman.Good(g_addresses[source_i][addr_i]);
128-
}
111+
addrman.Good(g_addresses[source_i][addr_i]);
129112
}
130113
}
131114
};
132115

133-
uint64_t i = 0;
116+
CreateAddresses();
117+
134118
bench.run([&] {
135-
markSomeAsGood(*addrmans.at(i));
136-
++i;
119+
// To make the benchmark independent of the number of evaluations, we always prepare a new addrman.
120+
// This is necessary because CAddrMan::Good() method modifies the object, affecting the timing of subsequent calls
121+
// to the same method and we want to do the same amount of work in every loop iteration.
122+
//
123+
// This has some overhead (exactly the result of AddrManAdd benchmark), but that overhead is constant so improvements in
124+
// CAddrMan::Good() will still be noticeable.
125+
CAddrMan addrman(/* asmap */ std::vector<bool>(), /* deterministic */ false, /* consistency_check_ratio */ 0);
126+
AddAddressesToAddrMan(addrman);
127+
128+
markSomeAsGood(addrman);
137129
});
138130
}
139131

140132
BENCHMARK(AddrManAdd);
141133
BENCHMARK(AddrManSelect);
142134
BENCHMARK(AddrManGetAddr);
143-
BENCHMARK(AddrManGood);
135+
BENCHMARK(AddrManAddThenGood);

src/bench/bech32.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
44

55
#include <bench/bench.h>
6-
#include <bench/nanobench.h>
76

87
#include <bech32.h>
98
#include <util/strencodings.h>

src/bench/bench.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,18 @@
44

55
#include <bench/bench.h>
66

7-
#include <chainparams.h>
87
#include <test/util/setup_common.h>
9-
#include <validation.h>
108

9+
#include <chrono>
10+
#include <fstream>
11+
#include <functional>
12+
#include <iostream>
13+
#include <map>
1114
#include <regex>
15+
#include <string>
16+
#include <vector>
17+
18+
using namespace std::chrono_literals;
1219

1320
const std::function<void(const std::string&)> G_TEST_LOG_FUN{};
1421

@@ -61,6 +68,12 @@ void benchmark::BenchRunner::RunAll(const Args& args)
6168

6269
Bench bench;
6370
bench.name(p.first);
71+
if (args.min_time > 0ms) {
72+
// convert to nanos before dividing to reduce rounding errors
73+
std::chrono::nanoseconds min_time_ns = args.min_time;
74+
bench.minEpochTime(min_time_ns / bench.epochs());
75+
}
76+
6477
if (args.asymptote.empty()) {
6578
p.second(bench);
6679
} else {

src/bench/bench.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,12 @@ using ankerl::nanobench::Bench;
4141
typedef std::function<void(Bench&)> BenchFunction;
4242

4343
struct Args {
44-
std::string regex_filter;
4544
bool is_list_only;
45+
std::chrono::milliseconds min_time;
4646
std::vector<double> asymptote;
4747
std::string output_csv;
4848
std::string output_json;
49+
std::string regex_filter;
4950
};
5051

5152
class BenchRunner

src/bench/bench_bitcoin.cpp

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,28 @@
44

55
#include <bench/bench.h>
66

7+
#include <clientversion.h>
78
#include <crypto/sha256.h>
89
#include <util/strencodings.h>
910
#include <util/system.h>
1011

11-
#include <memory>
12+
#include <chrono>
13+
#include <cstdint>
14+
#include <iostream>
15+
#include <sstream>
16+
#include <vector>
1217

1318
static const char* DEFAULT_BENCH_FILTER = ".*";
19+
static constexpr int64_t DEFAULT_MIN_TIME_MS{10};
1420

1521
static void SetupBenchArgs(ArgsManager& argsman)
1622
{
1723
SetupHelpOptions(argsman);
1824

19-
argsman.AddArg("-asymptote=n1,n2,n3,...", "Test asymptotic growth of the runtime of an algorithm, if supported by the benchmark", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
25+
argsman.AddArg("-asymptote=<n1,n2,n3,...>", "Test asymptotic growth of the runtime of an algorithm, if supported by the benchmark", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
2026
argsman.AddArg("-filter=<regex>", strprintf("Regular expression filter to select benchmark by name (default: %s)", DEFAULT_BENCH_FILTER), ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
21-
argsman.AddArg("-list", "List benchmarks without executing them", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
27+
argsman.AddArg("-list", "List benchmarks without executing them", ArgsManager::ALLOW_BOOL, OptionsCategory::OPTIONS);
28+
argsman.AddArg("-min_time=<milliseconds>", strprintf("Minimum runtime per benchmark, in milliseconds (default: %d)", DEFAULT_MIN_TIME_MS), ArgsManager::ALLOW_INT, OptionsCategory::OPTIONS);
2229
argsman.AddArg("-output_csv=<output.csv>", "Generate CSV file with the most important benchmark results", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
2330
argsman.AddArg("-output_json=<output.json>", "Generate JSON file with all benchmark results", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
2431
}
@@ -48,17 +55,62 @@ int main(int argc, char** argv)
4855
}
4956

5057
if (HelpRequested(argsman)) {
51-
std::cout << argsman.GetHelpMessage();
58+
std::cout << "Usage: bench_bitcoin [options]\n"
59+
"\n"
60+
<< argsman.GetHelpMessage()
61+
<< "Description:\n"
62+
"\n"
63+
" bench_bitcoin executes microbenchmarks. The quality of the benchmark results\n"
64+
" highly depend on the stability of the machine. It can sometimes be difficult\n"
65+
" to get stable, repeatable results, so here are a few tips:\n"
66+
"\n"
67+
" * Use pyperf [1] to disable frequency scaling, turbo boost etc. For best\n"
68+
" results, use CPU pinning and CPU isolation (see [2]).\n"
69+
"\n"
70+
" * Each call of run() should do exactly the same work. E.g. inserting into\n"
71+
" a std::vector doesn't do that as it will reallocate on certain calls. Make\n"
72+
" sure each run has exactly the same preconditions.\n"
73+
"\n"
74+
" * If results are still not reliable, increase runtime with e.g.\n"
75+
" -min_time=5000 to let a benchmark run for at least 5 seconds.\n"
76+
"\n"
77+
" * bench_bitcoin uses nanobench [3] for which there is extensive\n"
78+
" documentation available online.\n"
79+
"\n"
80+
"Environment Variables:\n"
81+
"\n"
82+
" To attach a profiler you can run a benchmark in endless mode. This can be\n"
83+
" done with the environment variable NANOBENCH_ENDLESS. E.g. like so:\n"
84+
"\n"
85+
" NANOBENCH_ENDLESS=MuHash ./bench_bitcoin -filter=MuHash\n"
86+
"\n"
87+
" In rare cases it can be useful to suppress stability warnings. This can be\n"
88+
" done with the environment variable NANOBENCH_SUPPRESS_WARNINGS, e.g:\n"
89+
"\n"
90+
" NANOBENCH_SUPPRESS_WARNINGS=1 ./bench_bitcoin\n"
91+
"\n"
92+
"Notes:\n"
93+
"\n"
94+
" 1. pyperf\n"
95+
" https://github.com/psf/pyperf\n"
96+
"\n"
97+
" 2. CPU pinning & isolation\n"
98+
" https://pyperf.readthedocs.io/en/latest/system.html\n"
99+
"\n"
100+
" 3. nanobench\n"
101+
" https://github.com/martinus/nanobench\n"
102+
"\n";
52103

53104
return EXIT_SUCCESS;
54105
}
55106

56107
benchmark::Args args;
57-
args.regex_filter = argsman.GetArg("-filter", DEFAULT_BENCH_FILTER);
58-
args.is_list_only = argsman.GetBoolArg("-list", false);
59108
args.asymptote = parseAsymptote(argsman.GetArg("-asymptote", ""));
109+
args.is_list_only = argsman.GetBoolArg("-list", false);
110+
args.min_time = std::chrono::milliseconds(argsman.GetArg("-min_time", DEFAULT_MIN_TIME_MS));
60111
args.output_csv = argsman.GetArg("-output_csv", "");
61112
args.output_json = argsman.GetArg("-output_json", "");
113+
args.regex_filter = argsman.GetArg("-filter", DEFAULT_BENCH_FILTER);
62114

63115
benchmark::BenchRunner::RunAll(args);
64116

src/bench/crypto_hash.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,9 @@ static void MuHash(benchmark::Bench& bench)
110110
{
111111
MuHash3072 acc;
112112
unsigned char key[32] = {0};
113-
int i = 0;
113+
uint32_t i = 0;
114114
bench.run([&] {
115-
key[0] = ++i;
115+
key[0] = ++i & 0xFF;
116116
acc *= MuHash3072(key);
117117
});
118118
}
@@ -134,10 +134,6 @@ static void MuHashDiv(benchmark::Bench& bench)
134134
FastRandomContext rng(true);
135135
MuHash3072 muhash{rng.randbytes(32)};
136136

137-
for (size_t i = 0; i < bench.epochIterations(); ++i) {
138-
acc *= muhash;
139-
}
140-
141137
bench.run([&] {
142138
acc /= muhash;
143139
});

src/bench/nanobench.h

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
// see https://semver.org/
3434
#define ANKERL_NANOBENCH_VERSION_MAJOR 4 // incompatible API changes
3535
#define ANKERL_NANOBENCH_VERSION_MINOR 3 // backwards-compatible changes
36-
#define ANKERL_NANOBENCH_VERSION_PATCH 4 // backwards-compatible bug fixes
36+
#define ANKERL_NANOBENCH_VERSION_PATCH 6 // backwards-compatible bug fixes
3737

3838
///////////////////////////////////////////////////////////////////////////////////////////////////
3939
// public facing api - as minimal as possible
@@ -88,13 +88,15 @@
8888
} while (0)
8989
#endif
9090

91-
#if defined(__linux__) && defined(PERF_EVENT_IOC_ID) && defined(PERF_COUNT_HW_REF_CPU_CYCLES) && defined(PERF_FLAG_FD_CLOEXEC) && \
92-
!defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS)
93-
// only enable perf counters on kernel 3.14 which seems to have all the necessary defines. The three PERF_... defines are not in
94-
// kernel 2.6.32 (all others are).
95-
# define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 1
96-
#else
97-
# define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 0
91+
#define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 0
92+
#if defined(__linux__) && !defined(ANKERL_NANOBENCH_DISABLE_PERF_COUNTERS)
93+
# include <linux/version.h>
94+
# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
95+
// PERF_COUNT_HW_REF_CPU_CYCLES only available since kernel 3.3
96+
// PERF_FLAG_FD_CLOEXEC since kernel 3.14
97+
# undef ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS
98+
# define ANKERL_NANOBENCH_PRIVATE_PERF_COUNTERS() 1
99+
# endif
98100
#endif
99101

100102
#if defined(__clang__)
@@ -2210,20 +2212,20 @@ struct IterationLogic::Impl {
22102212
columns.emplace_back(10, 1, "err%", "%", rErrorMedian * 100.0);
22112213

22122214
double rInsMedian = -1.0;
2213-
if (mResult.has(Result::Measure::instructions)) {
2215+
if (mBench.performanceCounters() && mResult.has(Result::Measure::instructions)) {
22142216
rInsMedian = mResult.median(Result::Measure::instructions);
22152217
columns.emplace_back(18, 2, "ins/" + mBench.unit(), "", rInsMedian / mBench.batch());
22162218
}
22172219

22182220
double rCycMedian = -1.0;
2219-
if (mResult.has(Result::Measure::cpucycles)) {
2221+
if (mBench.performanceCounters() && mResult.has(Result::Measure::cpucycles)) {
22202222
rCycMedian = mResult.median(Result::Measure::cpucycles);
22212223
columns.emplace_back(18, 2, "cyc/" + mBench.unit(), "", rCycMedian / mBench.batch());
22222224
}
22232225
if (rInsMedian > 0.0 && rCycMedian > 0.0) {
22242226
columns.emplace_back(9, 3, "IPC", "", rCycMedian <= 0.0 ? 0.0 : rInsMedian / rCycMedian);
22252227
}
2226-
if (mResult.has(Result::Measure::branchinstructions)) {
2228+
if (mBench.performanceCounters() && mResult.has(Result::Measure::branchinstructions)) {
22272229
double rBraMedian = mResult.median(Result::Measure::branchinstructions);
22282230
columns.emplace_back(17, 2, "bra/" + mBench.unit(), "", rBraMedian / mBench.batch());
22292231
if (mResult.has(Result::Measure::branchmisses)) {
@@ -2402,6 +2404,14 @@ class LinuxPerformanceCounters {
24022404
return (a + divisor / 2) / divisor;
24032405
}
24042406

2407+
ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")
2408+
static inline uint32_t mix(uint32_t x) noexcept {
2409+
x ^= x << 13;
2410+
x ^= x >> 17;
2411+
x ^= x << 5;
2412+
return x;
2413+
}
2414+
24052415
template <typename Op>
24062416
ANKERL_NANOBENCH_NO_SANITIZE("integer", "undefined")
24072417
void calibrate(Op&& op) {
@@ -2441,15 +2451,10 @@ class LinuxPerformanceCounters {
24412451
uint64_t const numIters = 100000U + (std::random_device{}() & 3);
24422452
uint64_t n = numIters;
24432453
uint32_t x = 1234567;
2444-
auto fn = [&]() {
2445-
x ^= x << 13;
2446-
x ^= x >> 17;
2447-
x ^= x << 5;
2448-
};
24492454

24502455
beginMeasure();
24512456
while (n-- > 0) {
2452-
fn();
2457+
x = mix(x);
24532458
}
24542459
endMeasure();
24552460
detail::doNotOptimizeAway(x);
@@ -2459,8 +2464,8 @@ class LinuxPerformanceCounters {
24592464
beginMeasure();
24602465
while (n-- > 0) {
24612466
// we now run *twice* so we can easily calculate the overhead
2462-
fn();
2463-
fn();
2467+
x = mix(x);
2468+
x = mix(x);
24642469
}
24652470
endMeasure();
24662471
detail::doNotOptimizeAway(x);

src/bench/peer_eviction.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,17 @@ static void EvictionProtectionCommon(
2020
{
2121
using Candidates = std::vector<NodeEvictionCandidate>;
2222
FastRandomContext random_context{true};
23-
bench.warmup(100).epochIterations(1100);
2423

2524
Candidates candidates{GetRandomNodeEvictionCandidates(num_candidates, random_context)};
2625
for (auto& c : candidates) {
2726
candidate_setup_fn(c);
2827
}
2928

30-
std::vector<Candidates> copies{
31-
static_cast<size_t>(bench.epochs() * bench.epochIterations()), candidates};
32-
size_t i{0};
29+
3330
bench.run([&] {
34-
ProtectEvictionCandidatesByRatio(copies.at(i));
35-
++i;
31+
// creating a copy has an overhead of about 3%, so it does not influence the benchmark results much.
32+
auto copy = candidates;
33+
ProtectEvictionCandidatesByRatio(copy);
3634
});
3735
}
3836

src/bench/rollingbloom.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,16 @@ static void RollingBloom(benchmark::Bench& bench)
1313
uint32_t count = 0;
1414
bench.run([&] {
1515
count++;
16-
data[0] = count;
17-
data[1] = count >> 8;
18-
data[2] = count >> 16;
19-
data[3] = count >> 24;
16+
data[0] = count & 0xFF;
17+
data[1] = (count >> 8) & 0xFF;
18+
data[2] = (count >> 16) & 0xFF;
19+
data[3] = (count >> 24) & 0xFF;
2020
filter.insert(data);
2121

22-
data[0] = count >> 24;
23-
data[1] = count >> 16;
24-
data[2] = count >> 8;
25-
data[3] = count;
22+
data[0] = (count >> 24) & 0xFF;
23+
data[1] = (count >> 16) & 0xFF;
24+
data[2] = (count >> 8) & 0xFF;
25+
data[3] = count & 0xFF;
2626
filter.contains(data);
2727
});
2828
}

0 commit comments

Comments
 (0)