Skip to content

Commit 3532818

Browse files
committed
bench: Add support for measuring CPU cycles
This adds cycle min/max/avg to the statistics. Supported on x86 and x86_64 (natively through rdtsc), as well as Linux (perf syscall).
1 parent 55b2edd commit 3532818

File tree

5 files changed

+121
-5
lines changed

5 files changed

+121
-5
lines changed

src/Makefile.bench.include

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ bench_bench_bitcoin_SOURCES = \
2222
bench/mempool_eviction.cpp \
2323
bench/verify_script.cpp \
2424
bench/base58.cpp \
25-
bench/lockedpool.cpp
25+
bench/lockedpool.cpp \
26+
bench/perf.cpp \
27+
bench/perf.h
2628

2729
nodist_bench_bench_bitcoin_SOURCES = $(GENERATED_TEST_FILES)
2830

src/bench/bench.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
44

55
#include "bench.h"
6+
#include "perf.h"
67

78
#include <iostream>
89
#include <iomanip>
@@ -26,7 +27,9 @@ BenchRunner::BenchRunner(std::string name, BenchFunction func)
2627
void
2728
BenchRunner::RunAll(double elapsedTimeForOne)
2829
{
29-
std::cout << "#Benchmark" << "," << "count" << "," << "min" << "," << "max" << "," << "average" << "\n";
30+
perf_init();
31+
std::cout << "#Benchmark" << "," << "count" << "," << "min" << "," << "max" << "," << "average" << ","
32+
<< "min_cycles" << "," << "max_cycles" << "," << "average_cycles" << "\n";
3033

3134
for (std::map<std::string,BenchFunction>::iterator it = benchmarks.begin();
3235
it != benchmarks.end(); ++it) {
@@ -35,6 +38,7 @@ BenchRunner::RunAll(double elapsedTimeForOne)
3538
BenchFunction& func = it->second;
3639
func(state);
3740
}
41+
perf_fini();
3842
}
3943

4044
bool State::KeepRunning()
@@ -44,15 +48,24 @@ bool State::KeepRunning()
4448
return true;
4549
}
4650
double now;
51+
uint64_t nowCycles;
4752
if (count == 0) {
4853
lastTime = beginTime = now = gettimedouble();
54+
lastCycles = beginCycles = nowCycles = perf_cpucycles();
4955
}
5056
else {
5157
now = gettimedouble();
5258
double elapsed = now - lastTime;
5359
double elapsedOne = elapsed * countMaskInv;
5460
if (elapsedOne < minTime) minTime = elapsedOne;
5561
if (elapsedOne > maxTime) maxTime = elapsedOne;
62+
63+
// We only use relative values, so don't have to handle 64-bit wrap-around specially
64+
nowCycles = perf_cpucycles();
65+
uint64_t elapsedOneCycles = (nowCycles - lastCycles) * countMaskInv;
66+
if (elapsedOneCycles < minCycles) minCycles = elapsedOneCycles;
67+
if (elapsedOneCycles > maxCycles) maxCycles = elapsedOneCycles;
68+
5669
if (elapsed*128 < maxElapsed) {
5770
// If the execution was much too fast (1/128th of maxElapsed), increase the count mask by 8x and restart timing.
5871
// The restart avoids including the overhead of this code in the measurement.
@@ -61,6 +74,8 @@ bool State::KeepRunning()
6174
count = 0;
6275
minTime = std::numeric_limits<double>::max();
6376
maxTime = std::numeric_limits<double>::min();
77+
minCycles = std::numeric_limits<uint64_t>::max();
78+
maxCycles = std::numeric_limits<uint64_t>::min();
6479
return true;
6580
}
6681
if (elapsed*16 < maxElapsed) {
@@ -72,6 +87,7 @@ bool State::KeepRunning()
7287
}
7388
}
7489
lastTime = now;
90+
lastCycles = nowCycles;
7591
++count;
7692

7793
if (now - beginTime < maxElapsed) return true; // Keep going
@@ -80,7 +96,9 @@ bool State::KeepRunning()
8096

8197
// Output results
8298
double average = (now-beginTime)/count;
83-
std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << minTime << "," << maxTime << "," << average << "\n";
99+
int64_t averageCycles = (nowCycles-beginCycles)/count;
100+
std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << minTime << "," << maxTime << "," << average << ","
101+
<< minCycles << "," << maxCycles << "," << averageCycles << "\n";
84102

85103
return false;
86104
}

src/bench/bench.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,18 @@ namespace benchmark {
4141
double maxElapsed;
4242
double beginTime;
4343
double lastTime, minTime, maxTime, countMaskInv;
44-
int64_t count;
45-
int64_t countMask;
44+
uint64_t count;
45+
uint64_t countMask;
46+
uint64_t beginCycles;
47+
uint64_t lastCycles;
48+
uint64_t minCycles;
49+
uint64_t maxCycles;
4650
public:
4751
State(std::string _name, double _maxElapsed) : name(_name), maxElapsed(_maxElapsed), count(0) {
4852
minTime = std::numeric_limits<double>::max();
4953
maxTime = std::numeric_limits<double>::min();
54+
minCycles = std::numeric_limits<uint64_t>::max();
55+
maxCycles = std::numeric_limits<uint64_t>::min();
5056
countMask = 1;
5157
countMaskInv = 1./(countMask + 1);
5258
}

src/bench/perf.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright (c) 2016 The Bitcoin Core developers
2+
// Distributed under the MIT software license, see the accompanying
3+
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4+
5+
#include "perf.h"
6+
7+
#if defined(__i386__) || defined(__x86_64__)
8+
9+
/* These architectures support quering the cycle counter
10+
* from user space, no need for any syscall overhead.
11+
*/
12+
void perf_init(void) { }
13+
void perf_fini(void) { }
14+
15+
#elif defined(__linux__)
16+
17+
#include <unistd.h>
18+
#include <sys/syscall.h>
19+
#include <linux/perf_event.h>
20+
21+
static int fd = -1;
22+
static struct perf_event_attr attr;
23+
24+
void perf_init(void)
25+
{
26+
attr.type = PERF_TYPE_HARDWARE;
27+
attr.config = PERF_COUNT_HW_CPU_CYCLES;
28+
fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
29+
}
30+
31+
void perf_fini(void)
32+
{
33+
if (fd != -1) {
34+
close(fd);
35+
}
36+
}
37+
38+
uint64_t perf_cpucycles(void)
39+
{
40+
uint64_t result = 0;
41+
if (fd == -1 || read(fd, &result, sizeof(result)) < (ssize_t)sizeof(result)) {
42+
return 0;
43+
}
44+
return result;
45+
}
46+
47+
#else /* Unhandled platform */
48+
49+
void perf_init(void) { }
50+
void perf_fini(void) { }
51+
uint64_t perf_cpucycles(void) { return 0; }
52+
53+
#endif

src/bench/perf.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Copyright (c) 2016 The Bitcoin Core developers
2+
// Distributed under the MIT software license, see the accompanying
3+
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4+
5+
/** Functions for measurement of CPU cycles */
6+
#ifndef H_PERF
7+
#define H_PERF
8+
9+
#include <stdint.h>
10+
11+
#if defined(__i386__)
12+
13+
static inline uint64_t perf_cpucycles(void)
14+
{
15+
uint64_t x;
16+
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
17+
return x;
18+
}
19+
20+
#elif defined(__x86_64__)
21+
22+
static inline uint64_t perf_cpucycles(void)
23+
{
24+
uint32_t hi, lo;
25+
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
26+
return ((uint64_t)lo)|(((uint64_t)hi)<<32);
27+
}
28+
#else
29+
30+
uint64_t perf_cpucycles(void);
31+
32+
#endif
33+
34+
void perf_init(void);
35+
void perf_fini(void);
36+
37+
#endif // H_PERF

0 commit comments

Comments
 (0)