Skip to content

Commit c95419d

Browse files
committed
Add min rel accuracy stopping criterion
Clean up the initial commit Further cleaning of initial commit. Add test. Improvements to comments thanks to review Reformat thanks to clang format. Static cast to avoid conversion warning
1 parent 0da57b8 commit c95419d

11 files changed

+222
-20
lines changed

include/benchmark/benchmark.h

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,12 @@ template <class Q> int BM_Sequential(benchmark::State& state) {
126126
}
127127
BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
128128
129-
Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
130-
benchmark. This option overrides the `benchmark_min_time` flag.
129+
Use `Benchmark::MinTime(double t)` to set the minimum time used to determine how
130+
long to run the benchmark. This option overrides the `benchmark_min_time` flag.
131+
132+
If a benchmark measures time manually, use `Benchmark::MinRelAccuracy(double r)`
133+
to set the required minimum relative accuracy used to determine how long to run
134+
the benchmark. This option overrides the `benchmark_min_rel_accuracy` flag.
131135
132136
void BM_test(benchmark::State& state) {
133137
... body ...
@@ -1209,11 +1213,21 @@ class BENCHMARK_EXPORT Benchmark {
12091213
// multiplier kRangeMultiplier will be used.
12101214
Benchmark* RangeMultiplier(int multiplier);
12111215

1212-
// Set the minimum amount of time to use when running this benchmark. This
1213-
// option overrides the `benchmark_min_time` flag.
1216+
// Set the minimum amount of time to use to determine the required number
1217+
// of iterations when running this benchmark. This option overrides
1218+
// the `benchmark_min_time` flag.
12141219
// REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
12151220
Benchmark* MinTime(double t);
12161221

1222+
// Set the minimum relative accuracy to use to determine the required number
1223+
// of iterations when running this benchmark. This option overrides
1224+
// the `benchmark_min_rel_accuracy` flag.
1225+
// REQUIRES: `r > 0`, `Iterations` has not been called on this benchmark, and
1226+
// time is measured manually, i.e., `UseManualTime` has been called on this
1227+
// benchmark and each benchmark iteration should call
1228+
// `SetIterationTime(seconds)` to report the measured time.
1229+
Benchmark* MinRelAccuracy(double r);
1230+
12171231
// Set the minimum amount of time to run the benchmark before taking runtimes
12181232
// of this benchmark into account. This
12191233
// option overrides the `benchmark_min_warmup_time` flag.
@@ -1339,6 +1353,7 @@ class BENCHMARK_EXPORT Benchmark {
13391353

13401354
int range_multiplier_;
13411355
double min_time_;
1356+
double min_rel_accuracy_;
13421357
double min_warmup_time_;
13431358
IterationCount iterations_;
13441359
int repetitions_;
@@ -1715,6 +1730,7 @@ struct BENCHMARK_EXPORT BenchmarkName {
17151730
std::string function_name;
17161731
std::string args;
17171732
std::string min_time;
1733+
std::string min_rel_accuracy;
17181734
std::string min_warmup_time;
17191735
std::string iterations;
17201736
std::string repetitions;
@@ -1754,6 +1770,7 @@ class BENCHMARK_EXPORT BenchmarkReporter {
17541770
threads(1),
17551771
time_unit(GetDefaultTimeUnit()),
17561772
real_accumulated_time(0),
1773+
manual_accumulated_time_pow2(0),
17571774
cpu_accumulated_time(0),
17581775
max_heapbytes_used(0),
17591776
use_real_time_for_initial_big_o(false),
@@ -1781,6 +1798,7 @@ class BENCHMARK_EXPORT BenchmarkReporter {
17811798
int64_t repetitions;
17821799
TimeUnit time_unit;
17831800
double real_accumulated_time;
1801+
double manual_accumulated_time_pow2;
17841802
double cpu_accumulated_time;
17851803

17861804
// Return a value representing the real time per iteration in the unit

src/benchmark.cc

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,12 @@ BM_DEFINE_bool(benchmark_list_tests, false);
6464
// linked into the binary are run.
6565
BM_DEFINE_string(benchmark_filter, "");
6666

67-
// Specification of how long to run the benchmark.
67+
// Specification of either an exact number of iterations (specified as
68+
// `<integer>x`) or a minimum number of seconds (specified as `<float>s`) used
69+
// to determine how long to run the benchmark.
6870
//
69-
// It can be either an exact number of iterations (specified as `<integer>x`),
70-
// or a minimum number of seconds (specified as `<float>s`). If the latter
71-
// format (ie., min seconds) is used, the system may run the benchmark longer
72-
// until the results are considered significant.
71+
// If the latter format (ie., min seconds) is used, the system may run
72+
// the benchmark longer until the results are considered significant.
7373
//
7474
// For backward compatibility, the `s` suffix may be omitted, in which case,
7575
// the specified number is interpreted as the number of seconds.
@@ -80,6 +80,19 @@ BM_DEFINE_string(benchmark_filter, "");
8080
// benchmark execution, regardless of number of threads.
8181
BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr);
8282

83+
// Specification of required relative accuracy used to determine how
84+
// long to run the benchmark.
85+
//
86+
// REQUIRES: time is measured manually.
87+
//
88+
// Manual timers provide per-iteration times. The relative accuracy is
89+
// measured as the standard deviation of these per-iteration times divided by
90+
// the mean and the square root of the number of iterations. The benchmark is
91+
// run until both of the following conditions are fulfilled:
92+
// 1. the specified minimum time or number of iterations is reached
93+
// 2. the measured relative accuracy meets the specified requirement
94+
BM_DEFINE_double(benchmark_min_rel_accuracy, 0.0);
95+
8396
// Minimum number of seconds a benchmark should be run before results should be
8497
// taken into account. This e.g can be necessary for benchmarks of code which
8598
// needs to fill some form of cache before performance is of interest.
@@ -93,7 +106,7 @@ BM_DEFINE_int32(benchmark_repetitions, 1);
93106

94107
// If enabled, forces each benchmark to execute exactly one iteration and one
95108
// repetition, bypassing any configured
96-
// MinTime()/MinWarmUpTime()/Iterations()/Repetitions()
109+
// MinTime()/MinRelAccuracy()/MinWarmUpTime()/Iterations()/Repetitions()
97110
BM_DEFINE_bool(benchmark_dry_run, false);
98111

99112
// If set, enable random interleaving of repetitions of all benchmarks.
@@ -753,6 +766,8 @@ void ParseCommandLineFlags(int* argc, char** argv) {
753766
ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
754767
ParseStringFlag(argv[i], "benchmark_min_time",
755768
&FLAGS_benchmark_min_time) ||
769+
ParseDoubleFlag(argv[i], "benchmark_min_rel_accuracy",
770+
&FLAGS_benchmark_min_rel_accuracy) ||
756771
ParseDoubleFlag(argv[i], "benchmark_min_warmup_time",
757772
&FLAGS_benchmark_min_warmup_time) ||
758773
ParseInt32Flag(argv[i], "benchmark_repetitions",
@@ -826,7 +841,8 @@ void PrintDefaultHelp() {
826841
"benchmark"
827842
" [--benchmark_list_tests={true|false}]\n"
828843
" [--benchmark_filter=<regex>]\n"
829-
" [--benchmark_min_time=`<integer>x` OR `<float>s` ]\n"
844+
" [--benchmark_min_time=`<integer>x` OR `<float>s`]\n"
845+
" [--benchmark_min_rel_accuracy=<min_rel_accuracy>]\n"
830846
" [--benchmark_min_warmup_time=<min_warmup_time>]\n"
831847
" [--benchmark_repetitions=<num_repetitions>]\n"
832848
" [--benchmark_dry_run={true|false}]\n"

src/benchmark_api_internal.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
2525
statistics_(benchmark_.statistics_),
2626
repetitions_(benchmark_.repetitions_),
2727
min_time_(benchmark_.min_time_),
28+
min_rel_accuracy_(benchmark_.min_rel_accuracy_),
2829
min_warmup_time_(benchmark_.min_warmup_time_),
2930
iterations_(benchmark_.iterations_),
3031
threads_(thread_count),
@@ -53,6 +54,11 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
5354
name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_);
5455
}
5556

57+
if (!IsZero(benchmark->min_rel_accuracy_)) {
58+
name_.min_rel_accuracy =
59+
StrFormat("min_rel_accuracy:%0.3f", benchmark_.min_rel_accuracy_);
60+
}
61+
5662
if (!IsZero(benchmark->min_warmup_time_)) {
5763
name_.min_warmup_time =
5864
StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_);

src/benchmark_api_internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class BenchmarkInstance {
3636
const std::vector<Statistics>& statistics() const { return statistics_; }
3737
int repetitions() const { return repetitions_; }
3838
double min_time() const { return min_time_; }
39+
double min_rel_accuracy() const { return min_rel_accuracy_; }
3940
double min_warmup_time() const { return min_warmup_time_; }
4041
IterationCount iterations() const { return iterations_; }
4142
int threads() const { return threads_; }
@@ -67,6 +68,7 @@ class BenchmarkInstance {
6768
const std::vector<Statistics>& statistics_;
6869
int repetitions_;
6970
double min_time_;
71+
double min_rel_accuracy_;
7072
double min_warmup_time_;
7173
IterationCount iterations_;
7274
int threads_; // Number of concurrent threads to us

src/benchmark_register.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ Benchmark::Benchmark(const std::string& name)
217217
use_default_time_unit_(true),
218218
range_multiplier_(kRangeMultiplier),
219219
min_time_(0),
220+
min_rel_accuracy_(0),
220221
min_warmup_time_(0),
221222
iterations_(0),
222223
repetitions_(0),
@@ -372,6 +373,14 @@ Benchmark* Benchmark::MinTime(double t) {
372373
return this;
373374
}
374375

376+
Benchmark* Benchmark::MinRelAccuracy(double r) {
377+
BM_CHECK(r > 0.0);
378+
BM_CHECK(iterations_ == 0);
379+
BM_CHECK(use_manual_time_);
380+
min_rel_accuracy_ = r;
381+
return this;
382+
}
383+
375384
Benchmark* Benchmark::MinWarmUpTime(double t) {
376385
BM_CHECK(t >= 0.0);
377386
BM_CHECK(iterations_ == 0);

src/benchmark_runner.cc

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ namespace benchmark {
6060

6161
BM_DECLARE_bool(benchmark_dry_run);
6262
BM_DECLARE_string(benchmark_min_time);
63+
BM_DECLARE_double(benchmark_min_rel_accuracy);
6364
BM_DECLARE_double(benchmark_min_warmup_time);
6465
BM_DECLARE_int32(benchmark_repetitions);
6566
BM_DECLARE_bool(benchmark_report_aggregates_only);
@@ -103,6 +104,7 @@ BenchmarkReporter::Run CreateRunReport(
103104
if (report.skipped == 0u) {
104105
if (b.use_manual_time()) {
105106
report.real_accumulated_time = results.manual_time_used;
107+
report.manual_accumulated_time_pow2 = results.manual_time_used_pow2;
106108
} else {
107109
report.real_accumulated_time = results.real_time_used;
108110
}
@@ -151,6 +153,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
151153
results.cpu_time_used += timer.cpu_time_used();
152154
results.real_time_used += timer.real_time_used();
153155
results.manual_time_used += timer.manual_time_used();
156+
results.manual_time_used_pow2 += timer.manual_time_used_pow2();
154157
results.complexity_n += st.complexity_length_n();
155158
internal::Increment(&results.counters, st.counters);
156159
}
@@ -277,6 +280,11 @@ BenchmarkRunner::BenchmarkRunner(
277280
min_time(FLAGS_benchmark_dry_run
278281
? 0
279282
: ComputeMinTime(b_, parsed_benchtime_flag)),
283+
min_rel_accuracy(FLAGS_benchmark_dry_run
284+
? std::numeric_limits<double>::max()
285+
: (!IsZero(b.min_rel_accuracy())
286+
? b.min_rel_accuracy()
287+
: FLAGS_benchmark_min_rel_accuracy)),
280288
min_warmup_time(
281289
FLAGS_benchmark_dry_run
282290
? 0
@@ -347,8 +355,10 @@ BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
347355

348356
// Base decisions off of real time if requested by this benchmark.
349357
i.seconds = i.results.cpu_time_used;
358+
i.seconds_pow2 = 0;
350359
if (b.use_manual_time()) {
351360
i.seconds = i.results.manual_time_used;
361+
i.seconds_pow2 = i.results.manual_time_used_pow2;
352362
} else if (b.use_real_time()) {
353363
i.seconds = i.results.real_time_used;
354364
}
@@ -369,6 +379,11 @@ IterationCount BenchmarkRunner::PredictNumItersNeeded(
369379
const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1;
370380
multiplier = is_significant ? multiplier : 10.0;
371381

382+
if (!IsZero(GetMinRelAccuracy())) {
383+
multiplier =
384+
std::max(multiplier, GetRelAccuracy(i) * 1.4 / GetMinRelAccuracy());
385+
}
386+
372387
// So what seems to be the sufficiently-large iteration count? Round up.
373388
const IterationCount max_next_iters = static_cast<IterationCount>(
374389
std::llround(std::max(multiplier * static_cast<double>(i.iters),
@@ -386,14 +401,12 @@ bool BenchmarkRunner::ShouldReportIterationResults(
386401
// Either it has run for a sufficient amount of time
387402
// or because an error was reported.
388403
return (i.results.skipped_ != 0u) || FLAGS_benchmark_dry_run ||
389-
i.iters >= kMaxIterations || // Too many iterations already.
390-
i.seconds >=
391-
GetMinTimeToApply() || // The elapsed time is large enough.
392-
// CPU time is specified but the elapsed real time greatly exceeds
393-
// the minimum time.
394-
// Note that user provided timers are except from this test.
395-
((i.results.real_time_used >= 5 * GetMinTimeToApply()) &&
396-
!b.use_manual_time());
404+
// Too many iterations already.
405+
i.iters >= kMaxIterations ||
406+
// We have applied for enough time and the relative accuracy is good
407+
// enough. Relative accuracy is checked only for user provided timers.
408+
(HasSufficientTimeToApply(i) &&
409+
(!b.use_manual_time() || HasSufficientRelAccuracy(i)));
397410
}
398411

399412
double BenchmarkRunner::GetMinTimeToApply() const {
@@ -405,6 +418,26 @@ double BenchmarkRunner::GetMinTimeToApply() const {
405418
return warmup_done ? min_time : min_warmup_time;
406419
}
407420

421+
double BenchmarkRunner::GetRelAccuracy(const IterationResults& i) const {
422+
return std::sqrt(i.seconds_pow2 - std::pow(i.seconds, 2.) / static_cast<double>(i.iters)) / i.seconds;
423+
}
424+
425+
bool BenchmarkRunner::HasSufficientTimeToApply(
426+
const IterationResults& i) const {
427+
return i.seconds >= GetMinTimeToApply() ||
428+
// CPU time is specified but the elapsed real time greatly exceeds
429+
// the minimum time.
430+
// Note that user provided timers are except from this test.
431+
(!b.use_manual_time() &&
432+
i.results.real_time_used >= 5 * GetMinTimeToApply());
433+
}
434+
435+
bool BenchmarkRunner::HasSufficientRelAccuracy(
436+
const IterationResults& i) const {
437+
return (IsZero(GetMinRelAccuracy()) ||
438+
(GetRelAccuracy(i) <= GetMinRelAccuracy()));
439+
}
440+
408441
void BenchmarkRunner::FinishWarmUp(const IterationCount& i) {
409442
warmup_done = true;
410443
iters = i;

src/benchmark_runner.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ class BenchmarkRunner {
7171

7272
double GetMinTime() const { return min_time; }
7373

74+
double GetMinRelAccuracy() const { return min_rel_accuracy; }
75+
7476
bool HasExplicitIters() const { return has_explicit_iteration_count; }
7577

7678
IterationCount GetIters() const { return iters; }
@@ -83,6 +85,7 @@ class BenchmarkRunner {
8385

8486
BenchTimeType parsed_benchtime_flag;
8587
const double min_time;
88+
const double min_rel_accuracy;
8689
const double min_warmup_time;
8790
bool warmup_done;
8891
const int repeats;
@@ -102,6 +105,7 @@ class BenchmarkRunner {
102105
internal::ThreadManager::Result results;
103106
IterationCount iters;
104107
double seconds;
108+
double seconds_pow2;
105109
};
106110
IterationResults DoNIterations();
107111

@@ -115,6 +119,12 @@ class BenchmarkRunner {
115119

116120
double GetMinTimeToApply() const;
117121

122+
double GetRelAccuracy(const IterationResults& i) const;
123+
124+
bool HasSufficientTimeToApply(const IterationResults& i) const;
125+
126+
bool HasSufficientRelAccuracy(const IterationResults& i) const;
127+
118128
void FinishWarmUp(const IterationCount& i);
119129

120130
void RunWarmUp();

src/thread_manager.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class ThreadManager {
2626
double real_time_used = 0;
2727
double cpu_time_used = 0;
2828
double manual_time_used = 0;
29+
double manual_time_used_pow2 = 0;
2930
int64_t complexity_n = 0;
3031
std::string report_label_;
3132
std::string skip_message_;

src/thread_timer.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,10 @@ class ThreadTimer {
3838
}
3939

4040
// Called by each thread
41-
void SetIterationTime(double seconds) { manual_time_used_ += seconds; }
41+
void SetIterationTime(double seconds) {
42+
manual_time_used_ += seconds;
43+
manual_time_used_pow2_ += std::pow(seconds, 2.);
44+
}
4245

4346
bool running() const { return running_; }
4447

@@ -60,6 +63,11 @@ class ThreadTimer {
6063
return manual_time_used_;
6164
}
6265

66+
double manual_time_used_pow2() const {
67+
BM_CHECK(!running_);
68+
return manual_time_used_pow2_;
69+
}
70+
6371
private:
6472
double ReadCpuTimerOfChoice() const {
6573
if (measure_process_cpu_time) return ProcessCPUUsage();
@@ -78,6 +86,7 @@ class ThreadTimer {
7886
double cpu_time_used_ = 0;
7987
// Manually set iteration time. User sets this with SetIterationTime(seconds).
8088
double manual_time_used_ = 0;
89+
double manual_time_used_pow2_ = 0;
8190
};
8291

8392
} // namespace internal

test/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ benchmark_add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_
109109
compile_benchmark_test(benchmark_min_time_flag_iters_test)
110110
benchmark_add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test)
111111

112+
compile_benchmark_test(benchmark_min_rel_accuracy_flag_test)
113+
benchmark_add_test(NAME min_rel_accuracy_flag_test COMMAND benchmark_min_rel_accuracy_flag_test)
114+
112115
add_filter_test(filter_simple "Foo" 3)
113116
add_filter_test(filter_simple_negative "-Foo" 2)
114117
add_filter_test(filter_suffix "BM_.*" 4)

0 commit comments

Comments
 (0)