Skip to content

Commit 35285be

Browse files
committed
Add min rel accuracy stopping criterion
Clean up the initial commit Further cleaning of initial commit. Add test. Improvements to comments thanks to review Reformat thanks to clang format. Static cast to avoid conversion warning
1 parent b2b0aab commit 35285be

11 files changed

+222
-20
lines changed

include/benchmark/benchmark.h

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,12 @@ template <class Q> int BM_Sequential(benchmark::State& state) {
126126
}
127127
BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
128128
129-
Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
130-
benchmark. This option overrides the `benchmark_min_time` flag.
129+
Use `Benchmark::MinTime(double t)` to set the minimum time used to determine how
130+
long to run the benchmark. This option overrides the `benchmark_min_time` flag.
131+
132+
If a benchmark measures time manually, use `Benchmark::MinRelAccuracy(double r)`
133+
to set the required minimum relative accuracy used to determine how long to run
134+
the benchmark. This option overrides the `benchmark_min_rel_accuracy` flag.
131135
132136
void BM_test(benchmark::State& state) {
133137
... body ...
@@ -1262,11 +1266,21 @@ class BENCHMARK_EXPORT Benchmark {
12621266
// multiplier kRangeMultiplier will be used.
12631267
Benchmark* RangeMultiplier(int multiplier);
12641268

1265-
// Set the minimum amount of time to use when running this benchmark. This
1266-
// option overrides the `benchmark_min_time` flag.
1269+
// Set the minimum amount of time to use to determine the required number
1270+
// of iterations when running this benchmark. This option overrides
1271+
// the `benchmark_min_time` flag.
12671272
// REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
12681273
Benchmark* MinTime(double t);
12691274

1275+
// Set the minimum relative accuracy to use to determine the required number
1276+
// of iterations when running this benchmark. This option overrides
1277+
// the `benchmark_min_rel_accuracy` flag.
1278+
// REQUIRES: `r > 0`, `Iterations` has not been called on this benchmark, and
1279+
// time is measured manually, i.e., `UseManualTime` has been called on this
1280+
// benchmark and each benchmark iteration should call
1281+
// `SetIterationTime(seconds)` to report the measured time.
1282+
Benchmark* MinRelAccuracy(double r);
1283+
12701284
// Set the minimum amount of time to run the benchmark before taking runtimes
12711285
// of this benchmark into account. This
12721286
// option overrides the `benchmark_min_warmup_time` flag.
@@ -1389,6 +1403,7 @@ class BENCHMARK_EXPORT Benchmark {
13891403

13901404
int range_multiplier_;
13911405
double min_time_;
1406+
double min_rel_accuracy_;
13921407
double min_warmup_time_;
13931408
IterationCount iterations_;
13941409
int repetitions_;
@@ -1821,6 +1836,7 @@ struct BENCHMARK_EXPORT BenchmarkName {
18211836
std::string function_name;
18221837
std::string args;
18231838
std::string min_time;
1839+
std::string min_rel_accuracy;
18241840
std::string min_warmup_time;
18251841
std::string iterations;
18261842
std::string repetitions;
@@ -1860,6 +1876,7 @@ class BENCHMARK_EXPORT BenchmarkReporter {
18601876
threads(1),
18611877
time_unit(GetDefaultTimeUnit()),
18621878
real_accumulated_time(0),
1879+
manual_accumulated_time_pow2(0),
18631880
cpu_accumulated_time(0),
18641881
max_heapbytes_used(0),
18651882
use_real_time_for_initial_big_o(false),
@@ -1888,6 +1905,7 @@ class BENCHMARK_EXPORT BenchmarkReporter {
18881905
int64_t repetitions;
18891906
TimeUnit time_unit;
18901907
double real_accumulated_time;
1908+
double manual_accumulated_time_pow2;
18911909
double cpu_accumulated_time;
18921910

18931911
// Return a value representing the real time per iteration in the unit

src/benchmark.cc

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,12 @@ BM_DEFINE_bool(benchmark_list_tests, false);
6565
// linked into the binary are run.
6666
BM_DEFINE_string(benchmark_filter, "");
6767

68-
// Specification of how long to run the benchmark.
68+
// Specification of either an exact number of iterations (specified as
69+
// `<integer>x`) or a minimum number of seconds (specified as `<float>s`) used
70+
// to determine how long to run the benchmark.
6971
//
70-
// It can be either an exact number of iterations (specified as `<integer>x`),
71-
// or a minimum number of seconds (specified as `<float>s`). If the latter
72-
// format (ie., min seconds) is used, the system may run the benchmark longer
73-
// until the results are considered significant.
72+
// If the latter format (ie., min seconds) is used, the system may run
73+
// the benchmark longer until the results are considered significant.
7474
//
7575
// For backward compatibility, the `s` suffix may be omitted, in which case,
7676
// the specified number is interpreted as the number of seconds.
@@ -81,6 +81,19 @@ BM_DEFINE_string(benchmark_filter, "");
8181
// benchmark execution, regardless of number of threads.
8282
BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr);
8383

84+
// Specification of required relative accuracy used to determine how
85+
// long to run the benchmark.
86+
//
87+
// REQUIRES: time is measured manually.
88+
//
89+
// Manual timers provide per-iteration times. The relative accuracy is
90+
// measured as the standard deviation of these per-iteration times divided by
91+
// the mean and the square root of the number of iterations. The benchmark is
92+
// run until both of the following conditions are fulfilled:
93+
// 1. the specified minimum time or number of iterations is reached
94+
// 2. the measured relative accuracy meets the specified requirement
95+
BM_DEFINE_double(benchmark_min_rel_accuracy, 0.0);
96+
8497
// Minimum number of seconds a benchmark should be run before results should be
8598
// taken into account. This e.g can be necessary for benchmarks of code which
8699
// needs to fill some form of cache before performance is of interest.
@@ -94,7 +107,7 @@ BM_DEFINE_int32(benchmark_repetitions, 1);
94107

95108
// If enabled, forces each benchmark to execute exactly one iteration and one
96109
// repetition, bypassing any configured
97-
// MinTime()/MinWarmUpTime()/Iterations()/Repetitions()
110+
// MinTime()/MinRelAccuracy()/MinWarmUpTime()/Iterations()/Repetitions()
98111
BM_DEFINE_bool(benchmark_dry_run, false);
99112

100113
// If set, enable random interleaving of repetitions of all benchmarks.
@@ -722,6 +735,8 @@ void ParseCommandLineFlags(int* argc, char** argv) {
722735
ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
723736
ParseStringFlag(argv[i], "benchmark_min_time",
724737
&FLAGS_benchmark_min_time) ||
738+
ParseDoubleFlag(argv[i], "benchmark_min_rel_accuracy",
739+
&FLAGS_benchmark_min_rel_accuracy) ||
725740
ParseDoubleFlag(argv[i], "benchmark_min_warmup_time",
726741
&FLAGS_benchmark_min_warmup_time) ||
727742
ParseInt32Flag(argv[i], "benchmark_repetitions",
@@ -793,7 +808,8 @@ void PrintDefaultHelp() {
793808
"benchmark"
794809
" [--benchmark_list_tests={true|false}]\n"
795810
" [--benchmark_filter=<regex>]\n"
796-
" [--benchmark_min_time=`<integer>x` OR `<float>s` ]\n"
811+
" [--benchmark_min_time=`<integer>x` OR `<float>s`]\n"
812+
" [--benchmark_min_rel_accuracy=<min_rel_accuracy>]\n"
797813
" [--benchmark_min_warmup_time=<min_warmup_time>]\n"
798814
" [--benchmark_repetitions=<num_repetitions>]\n"
799815
" [--benchmark_dry_run={true|false}]\n"

src/benchmark_api_internal.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
2525
statistics_(benchmark_.statistics_),
2626
repetitions_(benchmark_.repetitions_),
2727
min_time_(benchmark_.min_time_),
28+
min_rel_accuracy_(benchmark_.min_rel_accuracy_),
2829
min_warmup_time_(benchmark_.min_warmup_time_),
2930
iterations_(benchmark_.iterations_),
3031
threads_(thread_count) {
@@ -51,6 +52,11 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
5152
name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_);
5253
}
5354

55+
if (!IsZero(benchmark->min_rel_accuracy_)) {
56+
name_.min_rel_accuracy =
57+
StrFormat("min_rel_accuracy:%0.3f", benchmark_.min_rel_accuracy_);
58+
}
59+
5460
if (!IsZero(benchmark->min_warmup_time_)) {
5561
name_.min_warmup_time =
5662
StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_);

src/benchmark_api_internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class BenchmarkInstance {
3636
const std::vector<Statistics>& statistics() const { return statistics_; }
3737
int repetitions() const { return repetitions_; }
3838
double min_time() const { return min_time_; }
39+
double min_rel_accuracy() const { return min_rel_accuracy_; }
3940
double min_warmup_time() const { return min_warmup_time_; }
4041
IterationCount iterations() const { return iterations_; }
4142
int threads() const { return threads_; }
@@ -64,6 +65,7 @@ class BenchmarkInstance {
6465
const std::vector<Statistics>& statistics_;
6566
int repetitions_;
6667
double min_time_;
68+
double min_rel_accuracy_;
6769
double min_warmup_time_;
6870
IterationCount iterations_;
6971
int threads_; // Number of concurrent threads to us

src/benchmark_register.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ Benchmark::Benchmark(const std::string& name)
211211
use_default_time_unit_(true),
212212
range_multiplier_(kRangeMultiplier),
213213
min_time_(0),
214+
min_rel_accuracy_(0),
214215
min_warmup_time_(0),
215216
iterations_(0),
216217
repetitions_(0),
@@ -356,6 +357,14 @@ Benchmark* Benchmark::MinTime(double t) {
356357
return this;
357358
}
358359

360+
Benchmark* Benchmark::MinRelAccuracy(double r) {
361+
BM_CHECK(r > 0.0);
362+
BM_CHECK(iterations_ == 0);
363+
BM_CHECK(use_manual_time_);
364+
min_rel_accuracy_ = r;
365+
return this;
366+
}
367+
359368
Benchmark* Benchmark::MinWarmUpTime(double t) {
360369
BM_CHECK(t >= 0.0);
361370
BM_CHECK(iterations_ == 0);

src/benchmark_runner.cc

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ namespace benchmark {
6060

6161
BM_DECLARE_bool(benchmark_dry_run);
6262
BM_DECLARE_string(benchmark_min_time);
63+
BM_DECLARE_double(benchmark_min_rel_accuracy);
6364
BM_DECLARE_double(benchmark_min_warmup_time);
6465
BM_DECLARE_int32(benchmark_repetitions);
6566
BM_DECLARE_bool(benchmark_report_aggregates_only);
@@ -103,6 +104,7 @@ BenchmarkReporter::Run CreateRunReport(
103104
if (!report.skipped) {
104105
if (b.use_manual_time()) {
105106
report.real_accumulated_time = results.manual_time_used;
107+
report.manual_accumulated_time_pow2 = results.manual_time_used_pow2;
106108
} else {
107109
report.real_accumulated_time = results.real_time_used;
108110
}
@@ -151,6 +153,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
151153
results.cpu_time_used += timer.cpu_time_used();
152154
results.real_time_used += timer.real_time_used();
153155
results.manual_time_used += timer.manual_time_used();
156+
results.manual_time_used_pow2 += timer.manual_time_used_pow2();
154157
results.complexity_n += st.complexity_length_n();
155158
internal::Increment(&results.counters, st.counters);
156159
}
@@ -239,6 +242,11 @@ BenchmarkRunner::BenchmarkRunner(
239242
min_time(FLAGS_benchmark_dry_run
240243
? 0
241244
: ComputeMinTime(b_, parsed_benchtime_flag)),
245+
min_rel_accuracy(FLAGS_benchmark_dry_run
246+
? std::numeric_limits<double>::max()
247+
: (!IsZero(b.min_rel_accuracy())
248+
? b.min_rel_accuracy()
249+
: FLAGS_benchmark_min_rel_accuracy)),
242250
min_warmup_time(
243251
FLAGS_benchmark_dry_run
244252
? 0
@@ -318,8 +326,10 @@ BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
318326

319327
// Base decisions off of real time if requested by this benchmark.
320328
i.seconds = i.results.cpu_time_used;
329+
i.seconds_pow2 = 0;
321330
if (b.use_manual_time()) {
322331
i.seconds = i.results.manual_time_used;
332+
i.seconds_pow2 = i.results.manual_time_used_pow2;
323333
} else if (b.use_real_time()) {
324334
i.seconds = i.results.real_time_used;
325335
}
@@ -340,6 +350,11 @@ IterationCount BenchmarkRunner::PredictNumItersNeeded(
340350
const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1;
341351
multiplier = is_significant ? multiplier : 10.0;
342352

353+
if (!IsZero(GetMinRelAccuracy())) {
354+
multiplier =
355+
std::max(multiplier, GetRelAccuracy(i) * 1.4 / GetMinRelAccuracy());
356+
}
357+
343358
// So what seems to be the sufficiently-large iteration count? Round up.
344359
const IterationCount max_next_iters = static_cast<IterationCount>(
345360
std::llround(std::max(multiplier * static_cast<double>(i.iters),
@@ -357,14 +372,12 @@ bool BenchmarkRunner::ShouldReportIterationResults(
357372
// Either it has run for a sufficient amount of time
358373
// or because an error was reported.
359374
return i.results.skipped_ || FLAGS_benchmark_dry_run ||
360-
i.iters >= kMaxIterations || // Too many iterations already.
361-
i.seconds >=
362-
GetMinTimeToApply() || // The elapsed time is large enough.
363-
// CPU time is specified but the elapsed real time greatly exceeds
364-
// the minimum time.
365-
// Note that user provided timers are except from this test.
366-
((i.results.real_time_used >= 5 * GetMinTimeToApply()) &&
367-
!b.use_manual_time());
375+
// Too many iterations already.
376+
i.iters >= kMaxIterations ||
377+
// We have applied for enough time and the relative accuracy is good
378+
// enough. Relative accuracy is checked only for user provided timers.
379+
(HasSufficientTimeToApply(i) &&
380+
(!b.use_manual_time() || HasSufficientRelAccuracy(i)));
368381
}
369382

370383
double BenchmarkRunner::GetMinTimeToApply() const {
@@ -376,6 +389,26 @@ double BenchmarkRunner::GetMinTimeToApply() const {
376389
return warmup_done ? min_time : min_warmup_time;
377390
}
378391

392+
double BenchmarkRunner::GetRelAccuracy(const IterationResults& i) const {
393+
return std::sqrt(i.seconds_pow2 - std::pow(i.seconds, 2.) / static_cast<double>(i.iters)) / i.seconds;
394+
}
395+
396+
bool BenchmarkRunner::HasSufficientTimeToApply(
397+
const IterationResults& i) const {
398+
return i.seconds >= GetMinTimeToApply() ||
399+
// CPU time is specified but the elapsed real time greatly exceeds
400+
// the minimum time.
401+
// Note that user provided timers are except from this test.
402+
(!b.use_manual_time() &&
403+
i.results.real_time_used >= 5 * GetMinTimeToApply());
404+
}
405+
406+
bool BenchmarkRunner::HasSufficientRelAccuracy(
407+
const IterationResults& i) const {
408+
return (IsZero(GetMinRelAccuracy()) ||
409+
(GetRelAccuracy(i) <= GetMinRelAccuracy()));
410+
}
411+
379412
void BenchmarkRunner::FinishWarmUp(const IterationCount& i) {
380413
warmup_done = true;
381414
iters = i;

src/benchmark_runner.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ class BenchmarkRunner {
7171

7272
double GetMinTime() const { return min_time; }
7373

74+
double GetMinRelAccuracy() const { return min_rel_accuracy; }
75+
7476
bool HasExplicitIters() const { return has_explicit_iteration_count; }
7577

7678
IterationCount GetIters() const { return iters; }
@@ -83,6 +85,7 @@ class BenchmarkRunner {
8385

8486
BenchTimeType parsed_benchtime_flag;
8587
const double min_time;
88+
const double min_rel_accuracy;
8689
const double min_warmup_time;
8790
bool warmup_done;
8891
const int repeats;
@@ -104,6 +107,7 @@ class BenchmarkRunner {
104107
internal::ThreadManager::Result results;
105108
IterationCount iters;
106109
double seconds;
110+
double seconds_pow2;
107111
};
108112
IterationResults DoNIterations();
109113

@@ -117,6 +121,12 @@ class BenchmarkRunner {
117121

118122
double GetMinTimeToApply() const;
119123

124+
double GetRelAccuracy(const IterationResults& i) const;
125+
126+
bool HasSufficientTimeToApply(const IterationResults& i) const;
127+
128+
bool HasSufficientRelAccuracy(const IterationResults& i) const;
129+
120130
void FinishWarmUp(const IterationCount& i);
121131

122132
void RunWarmUp();

src/thread_manager.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class ThreadManager {
4141
double real_time_used = 0;
4242
double cpu_time_used = 0;
4343
double manual_time_used = 0;
44+
double manual_time_used_pow2 = 0;
4445
int64_t complexity_n = 0;
4546
std::string report_label_;
4647
std::string skip_message_;

src/thread_timer.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,10 @@ class ThreadTimer {
3838
}
3939

4040
// Called by each thread
41-
void SetIterationTime(double seconds) { manual_time_used_ += seconds; }
41+
void SetIterationTime(double seconds) {
42+
manual_time_used_ += seconds;
43+
manual_time_used_pow2_ += std::pow(seconds, 2.);
44+
}
4245

4346
bool running() const { return running_; }
4447

@@ -60,6 +63,11 @@ class ThreadTimer {
6063
return manual_time_used_;
6164
}
6265

66+
double manual_time_used_pow2() const {
67+
BM_CHECK(!running_);
68+
return manual_time_used_pow2_;
69+
}
70+
6371
private:
6472
double ReadCpuTimerOfChoice() const {
6573
if (measure_process_cpu_time) return ProcessCPUUsage();
@@ -78,6 +86,7 @@ class ThreadTimer {
7886
double cpu_time_used_ = 0;
7987
// Manually set iteration time. User sets this with SetIterationTime(seconds).
8088
double manual_time_used_ = 0;
89+
double manual_time_used_pow2_ = 0;
8190
};
8291

8392
} // namespace internal

test/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ benchmark_add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_
9797
compile_benchmark_test(benchmark_min_time_flag_iters_test)
9898
benchmark_add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test)
9999

100+
compile_benchmark_test(benchmark_min_rel_accuracy_flag_test)
101+
benchmark_add_test(NAME min_rel_accuracy_flag_test COMMAND benchmark_min_rel_accuracy_flag_test)
102+
100103
add_filter_test(filter_simple "Foo" 3)
101104
add_filter_test(filter_simple_negative "-Foo" 2)
102105
add_filter_test(filter_suffix "BM_.*" 4)

0 commit comments

Comments
 (0)