Skip to content

Commit aa6a03d

Browse files
Fix standard deviation
1 parent f36f86f commit aa6a03d

File tree

2 files changed

+90
-40
lines changed

2 files changed

+90
-40
lines changed

libc/benchmarks/gpu/LibcGpuBenchmark.cpp

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -166,15 +166,13 @@ benchmark(const BenchmarkOptions &options,
166166
const cpp::function<uint64_t(uint32_t)> &wrapper_func) {
167167
BenchmarkResult result;
168168
RuntimeEstimationProgression rep;
169-
uint32_t total_iterations = 0;
170169
uint32_t iterations = options.initial_iterations;
170+
171171
if (iterations < 1u)
172172
iterations = 1;
173173

174174
uint32_t samples = 0;
175175
uint64_t total_time = 0;
176-
uint64_t best_guess = 0;
177-
uint64_t cycles_squared = 0;
178176
uint64_t min = UINT64_MAX;
179177
uint64_t max = 0;
180178

@@ -186,46 +184,55 @@ benchmark(const BenchmarkOptions &options,
186184
uint32_t call_index = 0;
187185

188186
for (int64_t time_budget = options.max_duration; time_budget >= 0;) {
189-
uint64_t sample_cycles = 0;
190-
const clock_t start = static_cast<double>(clock());
191-
for (uint32_t i = 0; i < iterations; i++) {
187+
RefinableRuntimeEstimator sample_estimator;
188+
189+
const clock_t start = clock();
190+
while (sample_estimator.get_iterations() < iterations) {
192191
auto wrapper_intermediate = wrapper_func(call_index++);
193-
uint64_t current_result = wrapper_intermediate - overhead;
192+
uint64_t current_result =
193+
wrapper_intermediate < overhead ? 0 : wrapper_intermediate - overhead;
194194
max = cpp::max(max, current_result);
195195
min = cpp::min(min, current_result);
196-
sample_cycles += current_result;
196+
sample_estimator.update(current_result);
197197
}
198198
const clock_t end = clock();
199+
199200
const clock_t duration_ns =
200201
((end - start) * 1000 * 1000 * 1000) / CLOCKS_PER_SEC;
201202
total_time += duration_ns;
202203
time_budget -= duration_ns;
203204
samples++;
204-
cycles_squared += sample_cycles * sample_cycles;
205205

206-
total_iterations += iterations;
207-
const double change_ratio =
208-
rep.compute_improvement({iterations, sample_cycles});
209-
best_guess = rep.current_estimation;
206+
const double change_ratio = rep.compute_improvement(sample_estimator);
210207

211208
if (samples >= options.max_samples || iterations >= options.max_iterations)
212209
break;
210+
211+
const auto total_iterations = rep.get_estimator().get_iterations();
212+
213213
if (total_time >= options.min_duration && samples >= options.min_samples &&
214214
total_iterations >= options.min_iterations &&
215215
change_ratio < options.epsilon)
216216
break;
217217

218-
iterations *= options.scaling_factor;
218+
iterations = static_cast<uint32_t>(iterations * options.scaling_factor);
219219
}
220-
result.cycles = best_guess;
221-
result.standard_deviation = fputil::sqrt<double>(
222-
static_cast<double>(cycles_squared) / total_iterations -
223-
static_cast<double>(best_guess * best_guess));
220+
221+
const auto &estimator = rep.get_estimator();
222+
result.cycles = static_cast<uint64_t>(estimator.get_mean());
223+
result.standard_deviation = estimator.get_stddev();
224+
224225
result.min = min;
225226
result.max = max;
226227
result.samples = samples;
227-
result.total_iterations = total_iterations;
228-
result.total_time = total_time / total_iterations;
228+
229+
result.total_iterations = estimator.get_iterations();
230+
if (result.total_iterations > 0) {
231+
result.total_time = total_time / result.total_iterations;
232+
} else {
233+
result.total_time = 0;
234+
}
235+
229236
return result;
230237
}
231238

libc/benchmarks/gpu/LibcGpuBenchmark.h

Lines changed: 63 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "src/__support/CPP/string_view.h"
1212
#include "src/__support/CPP/type_traits.h"
1313
#include "src/__support/FPUtil/FPBits.h"
14+
#include "src/__support/FPUtil/sqrt.h"
1415
#include "src/__support/macros/config.h"
1516
#include "src/time/clock.h"
1617

@@ -30,40 +31,82 @@ struct BenchmarkOptions {
3031
double scaling_factor = 1.4;
3132
};
3233

33-
struct Measurement {
34+
class RefinableRuntimeEstimator {
3435
uint32_t iterations = 0;
35-
uint64_t elapsed_cycles = 0;
36-
};
37-
38-
class RefinableRuntimeEstimation {
39-
uint64_t total_cycles = 0;
40-
uint32_t total_iterations = 0;
36+
uint64_t sum_of_cycles = 0;
37+
uint64_t sum_of_squared_cycles = 0;
4138

4239
public:
43-
uint64_t update(const Measurement &M) {
44-
total_cycles += M.elapsed_cycles;
45-
total_iterations += M.iterations;
46-
return total_cycles / total_iterations;
40+
void update(uint64_t cycles) noexcept {
41+
iterations += 1;
42+
sum_of_cycles += cycles;
43+
sum_of_squared_cycles += cycles * cycles;
44+
}
45+
46+
double get_mean() const noexcept {
47+
if (iterations == 0)
48+
return 0.0;
49+
50+
return static_cast<double>(sum_of_cycles) / iterations;
51+
}
52+
53+
void update(const RefinableRuntimeEstimator &other) noexcept {
54+
iterations += other.iterations;
55+
sum_of_cycles += other.sum_of_cycles;
56+
sum_of_squared_cycles += other.sum_of_squared_cycles;
57+
}
58+
59+
double get_variance() const noexcept {
60+
if (iterations == 0)
61+
return 0.0;
62+
63+
const double num = static_cast<double>(iterations);
64+
const double sum_x = static_cast<double>(sum_of_cycles);
65+
const double sum_x2 = static_cast<double>(sum_of_squared_cycles);
66+
67+
const double mean_of_squares = sum_x2 / num;
68+
const double mean = sum_x / num;
69+
const double mean_squared = mean * mean;
70+
const double variance = mean_of_squares - mean_squared;
71+
72+
return variance < 0.0 ? 0.0 : variance;
73+
}
74+
75+
double get_stddev() const noexcept {
76+
return fputil::sqrt<double>(get_variance());
4777
}
78+
79+
uint32_t get_iterations() const noexcept { return iterations; }
4880
};
4981

5082
// Tracks the progression of the runtime estimation
5183
class RuntimeEstimationProgression {
52-
RefinableRuntimeEstimation rre;
84+
RefinableRuntimeEstimator estimator;
85+
double current_mean = 0.0;
5386

5487
public:
55-
uint64_t current_estimation = 0;
88+
const RefinableRuntimeEstimator &get_estimator() const noexcept {
89+
return estimator;
90+
}
91+
92+
double
93+
compute_improvement(const RefinableRuntimeEstimator &sample_estimator) {
94+
if (sample_estimator.get_iterations() == 0)
95+
return 1.0;
5696

57-
double compute_improvement(const Measurement &M) {
58-
const uint64_t new_estimation = rre.update(M);
59-
double ratio =
60-
(static_cast<double>(current_estimation) / new_estimation) - 1.0;
97+
estimator.update(sample_estimator);
98+
99+
const double new_mean = estimator.get_mean();
100+
if (current_mean == 0.0 || new_mean == 0.0) {
101+
current_mean = new_mean;
102+
return 1.0;
103+
}
61104

62-
// Get absolute value
105+
double ratio = (current_mean / new_mean) - 1.0;
63106
if (ratio < 0)
64-
ratio *= -1;
107+
ratio = -ratio;
65108

66-
current_estimation = new_estimation;
109+
current_mean = new_mean;
67110
return ratio;
68111
}
69112
};

0 commit comments

Comments
 (0)