Skip to content

Commit d568d49

Browse files
committed
Add more counters to the set_intersection benchmark, guard them behind an environment variable so we can choose to either measure time more accurately or obtain more information.
This led me down an interesting road of validating benchmark results and finding a significant discrepancy in timings between when I run all test cases at once or `--benchmark-filter` them individually.
1 parent 995d04b commit d568d49

File tree

1 file changed

+27
-11
lines changed

1 file changed

+27
-11
lines changed

libcxx/benchmarks/algorithms/set_intersection.bench.cpp

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99
#include <algorithm>
1010
#include <iterator>
1111
#include <set>
12+
#include <stdlib.h>
1213
#include <vector>
1314

1415
#include "common.h"
16+
#include "test_iterators.h"
1517

1618
namespace {
1719

@@ -137,6 +139,10 @@ std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size
137139
return std::pair<Container, Container>();
138140
}
139141

142+
// use environment variable to enable additional counters: instrumentation will
143+
// impact CPU utilisation, let's give the user the option
144+
static const bool TRACK_COUNTERS = getenv("TRACK_COUNTERS") != nullptr;
145+
140146
template <class ValueType, class Container, class Overlap>
141147
struct SetIntersection {
142148
using ContainerType = typename Container::template type<Value<ValueType>>;
@@ -147,31 +153,41 @@ struct SetIntersection {
147153

148154
bool skip() const noexcept {
149155
// let's save some time and skip simmetrical runs
150-
return size1_ <= size2_;
156+
return size1_ < size2_;
151157
}
152158

153159
void run(benchmark::State& state) const {
154160
state.PauseTiming();
155161
auto input = genCacheUnfriendlyData<ContainerType>(size1_, size2_, Overlap());
156162
std::vector<Value<ValueType>> out(std::min(size1_, size2_));
157163

158-
size_t cmp;
159-
auto tracking_less = [&cmp](const Value<ValueType>& lhs, const Value<ValueType>& rhs) {
160-
++cmp;
161-
return std::less<Value<ValueType>>{}(lhs, rhs);
162-
};
163-
164164
const auto BATCH_SIZE = std::max(size_t{512}, (2 * TestSetElements) / (size1_ + size2_));
165165
state.ResumeTiming();
166166

167167
for (const auto& _ : state) {
168168
while (state.KeepRunningBatch(BATCH_SIZE)) {
169169
for (unsigned i = 0; i < BATCH_SIZE; ++i) {
170-
cmp = 0;
171170
const auto& [c1, c2] = input;
172-
auto res = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin(), tracking_less);
173-
benchmark::DoNotOptimize(res);
174-
state.counters["Comparisons"] = cmp;
171+
if (TRACK_COUNTERS) {
172+
size_t cmp{}, strides{}, displacement{};
173+
auto tracking_less = [&cmp](const Value<ValueType>& lhs, const Value<ValueType>& rhs) {
174+
++cmp;
175+
return std::less<Value<ValueType>>{}(lhs, rhs);
176+
};
177+
stride_counting_iterator b1(c1.begin(), &strides, &displacement);
178+
stride_counting_iterator e1(c1.end(), &strides, &displacement);
179+
stride_counting_iterator b2(c2.begin(), &strides, &displacement);
180+
stride_counting_iterator e2(c2.end(), &strides, &displacement);
181+
auto res = std::set_intersection(b1, e1, b2, e2, out.begin(), tracking_less);
182+
benchmark::DoNotOptimize(res);
183+
state.counters["comparisons"] = cmp;
184+
state.counters["iter_strides"] = strides;
185+
state.counters["iter_displacement"] = displacement;
186+
187+
} else {
188+
auto res = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin());
189+
benchmark::DoNotOptimize(res);
190+
}
175191
}
176192
}
177193
}

0 commit comments

Comments
 (0)