Skip to content

Commit 0bf52e0

Browse files
committed
Add min rel accuracy stopping criterion
Clean up the initial commit Further cleaning of initial commit. Add test. Improvements to comments thanks to review Reformat thanks to clang format. Static cast to avoid conversion warning
1 parent ce80955 commit 0bf52e0

11 files changed

+222
-20
lines changed

include/benchmark/benchmark.h

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,12 @@ template <class Q> int BM_Sequential(benchmark::State& state) {
127127
}
128128
BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
129129
130-
Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
131-
benchmark. This option overrides the `benchmark_min_time` flag.
130+
Use `Benchmark::MinTime(double t)` to set the minimum time used to determine how
131+
long to run the benchmark. This option overrides the `benchmark_min_time` flag.
132+
133+
If a benchmark measures time manually, use `Benchmark::MinRelAccuracy(double r)`
134+
to set the required minimum relative accuracy used to determine how long to run
135+
the benchmark. This option overrides the `benchmark_min_rel_accuracy` flag.
132136
133137
void BM_test(benchmark::State& state) {
134138
... body ...
@@ -1235,11 +1239,21 @@ class BENCHMARK_EXPORT Benchmark {
12351239
// multiplier kRangeMultiplier will be used.
12361240
Benchmark* RangeMultiplier(int multiplier);
12371241

1238-
// Set the minimum amount of time to use when running this benchmark. This
1239-
// option overrides the `benchmark_min_time` flag.
1242+
// Set the minimum amount of time to use to determine the required number
1243+
// of iterations when running this benchmark. This option overrides
1244+
// the `benchmark_min_time` flag.
12401245
// REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
12411246
Benchmark* MinTime(double t);
12421247

1248+
// Set the minimum relative accuracy to use to determine the required number
1249+
// of iterations when running this benchmark. This option overrides
1250+
// the `benchmark_min_rel_accuracy` flag.
1251+
// REQUIRES: `r > 0`, `Iterations` has not been called on this benchmark, and
1252+
// time is measured manually, i.e., `UseManualTime` has been called on this
1253+
// benchmark and each benchmark iteration should call
1254+
// `SetIterationTime(seconds)` to report the measured time.
1255+
Benchmark* MinRelAccuracy(double r);
1256+
12431257
// Set the minimum amount of time to run the benchmark before taking runtimes
12441258
// of this benchmark into account. This
12451259
// option overrides the `benchmark_min_warmup_time` flag.
@@ -1365,6 +1379,7 @@ class BENCHMARK_EXPORT Benchmark {
13651379

13661380
int range_multiplier_;
13671381
double min_time_;
1382+
double min_rel_accuracy_;
13681383
double min_warmup_time_;
13691384
IterationCount iterations_;
13701385
int repetitions_;
@@ -1776,6 +1791,7 @@ struct BENCHMARK_EXPORT BenchmarkName {
17761791
std::string function_name;
17771792
std::string args;
17781793
std::string min_time;
1794+
std::string min_rel_accuracy;
17791795
std::string min_warmup_time;
17801796
std::string iterations;
17811797
std::string repetitions;
@@ -1815,6 +1831,7 @@ class BENCHMARK_EXPORT BenchmarkReporter {
18151831
threads(1),
18161832
time_unit(GetDefaultTimeUnit()),
18171833
real_accumulated_time(0),
1834+
manual_accumulated_time_pow2(0),
18181835
cpu_accumulated_time(0),
18191836
max_heapbytes_used(0),
18201837
use_real_time_for_initial_big_o(false),
@@ -1842,6 +1859,7 @@ class BENCHMARK_EXPORT BenchmarkReporter {
18421859
int64_t repetitions;
18431860
TimeUnit time_unit;
18441861
double real_accumulated_time;
1862+
double manual_accumulated_time_pow2;
18451863
double cpu_accumulated_time;
18461864

18471865
// Return a value representing the real time per iteration in the unit

src/benchmark.cc

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,12 @@ BM_DEFINE_bool(benchmark_list_tests, false);
6868
// linked into the binary are run.
6969
BM_DEFINE_string(benchmark_filter, "");
7070

71-
// Specification of how long to run the benchmark.
71+
// Specification of either an exact number of iterations (specified as
72+
// `<integer>x`) or a minimum number of seconds (specified as `<float>s`) used
73+
// to determine how long to run the benchmark.
7274
//
73-
// It can be either an exact number of iterations (specified as `<integer>x`),
74-
// or a minimum number of seconds (specified as `<float>s`). If the latter
75-
// format (ie., min seconds) is used, the system may run the benchmark longer
76-
// until the results are considered significant.
75+
// If the latter format (ie., min seconds) is used, the system may run
76+
// the benchmark longer until the results are considered significant.
7777
//
7878
// For backward compatibility, the `s` suffix may be omitted, in which case,
7979
// the specified number is interpreted as the number of seconds.
@@ -84,6 +84,19 @@ BM_DEFINE_string(benchmark_filter, "");
8484
// benchmark execution, regardless of number of threads.
8585
BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr);
8686

87+
// Specification of required relative accuracy used to determine how
88+
// long to run the benchmark.
89+
//
90+
// REQUIRES: time is measured manually.
91+
//
92+
// Manual timers provide per-iteration times. The relative accuracy is
93+
// measured as the standard deviation of these per-iteration times divided by
94+
// the mean and the square root of the number of iterations. The benchmark is
95+
// run until both of the following conditions are fulfilled:
96+
// 1. the specified minimum time or number of iterations is reached
97+
// 2. the measured relative accuracy meets the specified requirement
98+
BM_DEFINE_double(benchmark_min_rel_accuracy, 0.0);
99+
87100
// Minimum number of seconds a benchmark should be run before results should be
88101
// taken into account. This e.g can be necessary for benchmarks of code which
89102
// needs to fill some form of cache before performance is of interest.
@@ -97,7 +110,7 @@ BM_DEFINE_int32(benchmark_repetitions, 1);
97110

98111
// If enabled, forces each benchmark to execute exactly one iteration and one
99112
// repetition, bypassing any configured
100-
// MinTime()/MinWarmUpTime()/Iterations()/Repetitions()
113+
// MinTime()/MinRelAccuracy()/MinWarmUpTime()/Iterations()/Repetitions()
101114
BM_DEFINE_bool(benchmark_dry_run, false);
102115

103116
// If set, enable random interleaving of repetitions of all benchmarks.
@@ -757,6 +770,8 @@ void ParseCommandLineFlags(int* argc, char** argv) {
757770
ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
758771
ParseStringFlag(argv[i], "benchmark_min_time",
759772
&FLAGS_benchmark_min_time) ||
773+
ParseDoubleFlag(argv[i], "benchmark_min_rel_accuracy",
774+
&FLAGS_benchmark_min_rel_accuracy) ||
760775
ParseDoubleFlag(argv[i], "benchmark_min_warmup_time",
761776
&FLAGS_benchmark_min_warmup_time) ||
762777
ParseInt32Flag(argv[i], "benchmark_repetitions",
@@ -873,7 +888,8 @@ void PrintDefaultHelp() {
873888
"benchmark"
874889
" [--benchmark_list_tests={true|false}]\n"
875890
" [--benchmark_filter=<regex>]\n"
876-
" [--benchmark_min_time=`<integer>x` OR `<float>s` ]\n"
891+
" [--benchmark_min_time=`<integer>x` OR `<float>s`]\n"
892+
" [--benchmark_min_rel_accuracy=<min_rel_accuracy>]\n"
877893
" [--benchmark_min_warmup_time=<min_warmup_time>]\n"
878894
" [--benchmark_repetitions=<num_repetitions>]\n"
879895
" [--benchmark_dry_run={true|false}]\n"

src/benchmark_api_internal.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
2525
statistics_(benchmark_.statistics_),
2626
repetitions_(benchmark_.repetitions_),
2727
min_time_(benchmark_.min_time_),
28+
min_rel_accuracy_(benchmark_.min_rel_accuracy_),
2829
min_warmup_time_(benchmark_.min_warmup_time_),
2930
iterations_(benchmark_.iterations_),
3031
threads_(thread_count),
@@ -53,6 +54,11 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
5354
name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_);
5455
}
5556

57+
if (!IsZero(benchmark->min_rel_accuracy_)) {
58+
name_.min_rel_accuracy =
59+
StrFormat("min_rel_accuracy:%0.3f", benchmark_.min_rel_accuracy_);
60+
}
61+
5662
if (!IsZero(benchmark->min_warmup_time_)) {
5763
name_.min_warmup_time =
5864
StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_);

src/benchmark_api_internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class BenchmarkInstance {
3636
const std::vector<Statistics>& statistics() const { return statistics_; }
3737
int repetitions() const { return repetitions_; }
3838
double min_time() const { return min_time_; }
39+
double min_rel_accuracy() const { return min_rel_accuracy_; }
3940
double min_warmup_time() const { return min_warmup_time_; }
4041
IterationCount iterations() const { return iterations_; }
4142
int threads() const { return threads_; }
@@ -67,6 +68,7 @@ class BenchmarkInstance {
6768
const std::vector<Statistics>& statistics_;
6869
int repetitions_;
6970
double min_time_;
71+
double min_rel_accuracy_;
7072
double min_warmup_time_;
7173
IterationCount iterations_;
7274
int threads_; // Number of concurrent threads to us

src/benchmark_register.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ Benchmark::Benchmark(const std::string& name)
217217
use_default_time_unit_(true),
218218
range_multiplier_(kRangeMultiplier),
219219
min_time_(0),
220+
min_rel_accuracy_(0),
220221
min_warmup_time_(0),
221222
iterations_(0),
222223
repetitions_(0),
@@ -372,6 +373,14 @@ Benchmark* Benchmark::MinTime(double t) {
372373
return this;
373374
}
374375

376+
Benchmark* Benchmark::MinRelAccuracy(double r) {
377+
BM_CHECK(r > 0.0);
378+
BM_CHECK(iterations_ == 0);
379+
BM_CHECK(use_manual_time_);
380+
min_rel_accuracy_ = r;
381+
return this;
382+
}
383+
375384
Benchmark* Benchmark::MinWarmUpTime(double t) {
376385
BM_CHECK(t >= 0.0);
377386
BM_CHECK(iterations_ == 0);

src/benchmark_runner.cc

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ namespace benchmark {
6060

6161
BM_DECLARE_bool(benchmark_dry_run);
6262
BM_DECLARE_string(benchmark_min_time);
63+
BM_DECLARE_double(benchmark_min_rel_accuracy);
6364
BM_DECLARE_double(benchmark_min_warmup_time);
6465
BM_DECLARE_int32(benchmark_repetitions);
6566
BM_DECLARE_bool(benchmark_report_aggregates_only);
@@ -103,6 +104,7 @@ BenchmarkReporter::Run CreateRunReport(
103104
if (report.skipped == 0u) {
104105
if (b.use_manual_time()) {
105106
report.real_accumulated_time = results.manual_time_used;
107+
report.manual_accumulated_time_pow2 = results.manual_time_used_pow2;
106108
} else {
107109
report.real_accumulated_time = results.real_time_used;
108110
}
@@ -154,6 +156,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
154156
results.cpu_time_used += timer.cpu_time_used();
155157
results.real_time_used += timer.real_time_used();
156158
results.manual_time_used += timer.manual_time_used();
159+
results.manual_time_used_pow2 += timer.manual_time_used_pow2();
157160
results.complexity_n += st.complexity_length_n();
158161
internal::Increment(&results.counters, st.counters);
159162
}
@@ -280,6 +283,11 @@ BenchmarkRunner::BenchmarkRunner(
280283
min_time(FLAGS_benchmark_dry_run
281284
? 0
282285
: ComputeMinTime(b_, parsed_benchtime_flag)),
286+
min_rel_accuracy(FLAGS_benchmark_dry_run
287+
? std::numeric_limits<double>::max()
288+
: (!IsZero(b.min_rel_accuracy())
289+
? b.min_rel_accuracy()
290+
: FLAGS_benchmark_min_rel_accuracy)),
283291
min_warmup_time(
284292
FLAGS_benchmark_dry_run
285293
? 0
@@ -350,8 +358,10 @@ BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
350358

351359
// Base decisions off of real time if requested by this benchmark.
352360
i.seconds = i.results.cpu_time_used;
361+
i.seconds_pow2 = 0;
353362
if (b.use_manual_time()) {
354363
i.seconds = i.results.manual_time_used;
364+
i.seconds_pow2 = i.results.manual_time_used_pow2;
355365
} else if (b.use_real_time()) {
356366
i.seconds = i.results.real_time_used;
357367
}
@@ -372,6 +382,11 @@ IterationCount BenchmarkRunner::PredictNumItersNeeded(
372382
const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1;
373383
multiplier = is_significant ? multiplier : 10.0;
374384

385+
if (!IsZero(GetMinRelAccuracy())) {
386+
multiplier =
387+
std::max(multiplier, GetRelAccuracy(i) * 1.4 / GetMinRelAccuracy());
388+
}
389+
375390
// So what seems to be the sufficiently-large iteration count? Round up.
376391
const IterationCount max_next_iters = static_cast<IterationCount>(
377392
std::llround(std::max(multiplier * static_cast<double>(i.iters),
@@ -389,14 +404,12 @@ bool BenchmarkRunner::ShouldReportIterationResults(
389404
// Either it has run for a sufficient amount of time
390405
// or because an error was reported.
391406
return (i.results.skipped_ != 0u) || FLAGS_benchmark_dry_run ||
392-
i.iters >= kMaxIterations || // Too many iterations already.
393-
i.seconds >=
394-
GetMinTimeToApply() || // The elapsed time is large enough.
395-
// CPU time is specified but the elapsed real time greatly exceeds
396-
// the minimum time.
397-
// Note that user provided timers are except from this test.
398-
((i.results.real_time_used >= 5 * GetMinTimeToApply()) &&
399-
!b.use_manual_time());
407+
// Too many iterations already.
408+
i.iters >= kMaxIterations ||
409+
// We have applied for enough time and the relative accuracy is good
410+
// enough. Relative accuracy is checked only for user provided timers.
411+
(HasSufficientTimeToApply(i) &&
412+
(!b.use_manual_time() || HasSufficientRelAccuracy(i)));
400413
}
401414

402415
double BenchmarkRunner::GetMinTimeToApply() const {
@@ -408,6 +421,26 @@ double BenchmarkRunner::GetMinTimeToApply() const {
408421
return warmup_done ? min_time : min_warmup_time;
409422
}
410423

424+
double BenchmarkRunner::GetRelAccuracy(const IterationResults& i) const {
425+
return std::sqrt(i.seconds_pow2 - std::pow(i.seconds, 2.) / static_cast<double>(i.iters)) / i.seconds;
426+
}
427+
428+
bool BenchmarkRunner::HasSufficientTimeToApply(
429+
const IterationResults& i) const {
430+
return i.seconds >= GetMinTimeToApply() ||
431+
// CPU time is specified but the elapsed real time greatly exceeds
432+
// the minimum time.
433+
// Note that user provided timers are except from this test.
434+
(!b.use_manual_time() &&
435+
i.results.real_time_used >= 5 * GetMinTimeToApply());
436+
}
437+
438+
bool BenchmarkRunner::HasSufficientRelAccuracy(
439+
const IterationResults& i) const {
440+
return (IsZero(GetMinRelAccuracy()) ||
441+
(GetRelAccuracy(i) <= GetMinRelAccuracy()));
442+
}
443+
411444
void BenchmarkRunner::FinishWarmUp(const IterationCount& i) {
412445
warmup_done = true;
413446
iters = i;

src/benchmark_runner.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ class BenchmarkRunner {
7171

7272
double GetMinTime() const { return min_time; }
7373

74+
double GetMinRelAccuracy() const { return min_rel_accuracy; }
75+
7476
bool HasExplicitIters() const { return has_explicit_iteration_count; }
7577

7678
IterationCount GetIters() const { return iters; }
@@ -83,6 +85,7 @@ class BenchmarkRunner {
8385

8486
BenchTimeType parsed_benchtime_flag;
8587
const double min_time;
88+
const double min_rel_accuracy;
8689
const double min_warmup_time;
8790
bool warmup_done;
8891
const int repeats;
@@ -102,6 +105,7 @@ class BenchmarkRunner {
102105
internal::ThreadManager::Result results;
103106
IterationCount iters;
104107
double seconds;
108+
double seconds_pow2;
105109
};
106110
IterationResults DoNIterations();
107111

@@ -115,6 +119,12 @@ class BenchmarkRunner {
115119

116120
double GetMinTimeToApply() const;
117121

122+
double GetRelAccuracy(const IterationResults& i) const;
123+
124+
bool HasSufficientTimeToApply(const IterationResults& i) const;
125+
126+
bool HasSufficientRelAccuracy(const IterationResults& i) const;
127+
118128
void FinishWarmUp(const IterationCount& i);
119129

120130
void RunWarmUp();

src/thread_manager.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class ThreadManager {
2626
double real_time_used = 0;
2727
double cpu_time_used = 0;
2828
double manual_time_used = 0;
29+
double manual_time_used_pow2 = 0;
2930
int64_t complexity_n = 0;
3031
std::string report_label_;
3132
std::string skip_message_;

src/thread_timer.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,10 @@ class ThreadTimer {
3838
}
3939

4040
// Called by each thread
41-
void SetIterationTime(double seconds) { manual_time_used_ += seconds; }
41+
void SetIterationTime(double seconds) {
42+
manual_time_used_ += seconds;
43+
manual_time_used_pow2_ += std::pow(seconds, 2.);
44+
}
4245

4346
bool running() const { return running_; }
4447

@@ -60,6 +63,11 @@ class ThreadTimer {
6063
return manual_time_used_;
6164
}
6265

66+
double manual_time_used_pow2() const {
67+
BM_CHECK(!running_);
68+
return manual_time_used_pow2_;
69+
}
70+
6371
private:
6472
double ReadCpuTimerOfChoice() const {
6573
if (measure_process_cpu_time) return ProcessCPUUsage();
@@ -78,6 +86,7 @@ class ThreadTimer {
7886
double cpu_time_used_ = 0;
7987
// Manually set iteration time. User sets this with SetIterationTime(seconds).
8088
double manual_time_used_ = 0;
89+
double manual_time_used_pow2_ = 0;
8190
};
8291

8392
} // namespace internal

test/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ benchmark_add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_
109109
compile_benchmark_test(benchmark_min_time_flag_iters_test)
110110
benchmark_add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test)
111111

112+
compile_benchmark_test(benchmark_min_rel_accuracy_flag_test)
113+
benchmark_add_test(NAME min_rel_accuracy_flag_test COMMAND benchmark_min_rel_accuracy_flag_test)
114+
112115
add_filter_test(filter_simple "Foo" 3)
113116
add_filter_test(filter_simple_negative "-Foo" 2)
114117
add_filter_test(filter_suffix "BM_.*" 4)

0 commit comments

Comments
 (0)