99#include < algorithm>
1010#include < iterator>
1111#include < set>
12+ #include < stdlib.h>
1213#include < vector>
1314
1415#include " common.h"
16+ #include " test_iterators.h"
1517
1618namespace {
1719
@@ -137,6 +139,10 @@ std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size
137139 return std::pair<Container, Container>();
138140}
139141
142+ // use environment variable to enable additional counters: instrumentation will
143+ // impact CPU utilisation, let's give the user the option
144+ static const bool TRACK_COUNTERS = getenv(" TRACK_COUNTERS" ) != nullptr ;
145+
140146template <class ValueType , class Container , class Overlap >
141147struct SetIntersection {
142148 using ContainerType = typename Container::template type<Value<ValueType>>;
@@ -147,31 +153,41 @@ struct SetIntersection {
147153
148154 bool skip () const noexcept {
149155 // let's save some time and skip simmetrical runs
150- return size1_ <= size2_;
156+ return size1_ < size2_;
151157 }
152158
153159 void run (benchmark::State& state) const {
154160 state.PauseTiming ();
155161 auto input = genCacheUnfriendlyData<ContainerType>(size1_, size2_, Overlap ());
156162 std::vector<Value<ValueType>> out (std::min (size1_, size2_));
157163
158- size_t cmp;
159- auto tracking_less = [&cmp](const Value<ValueType>& lhs, const Value<ValueType>& rhs) {
160- ++cmp;
161- return std::less<Value<ValueType>>{}(lhs, rhs);
162- };
163-
164164 const auto BATCH_SIZE = std::max (size_t {512 }, (2 * TestSetElements) / (size1_ + size2_));
165165 state.ResumeTiming ();
166166
167167 for (const auto & _ : state) {
168168 while (state.KeepRunningBatch (BATCH_SIZE)) {
169169 for (unsigned i = 0 ; i < BATCH_SIZE; ++i) {
170- cmp = 0 ;
171170 const auto & [c1, c2] = input;
172- auto res = std::set_intersection (c1.begin (), c1.end (), c2.begin (), c2.end (), out.begin (), tracking_less);
173- benchmark::DoNotOptimize (res);
174- state.counters [" Comparisons" ] = cmp;
171+ if (TRACK_COUNTERS) {
172+ size_t cmp{}, strides{}, displacement{};
173+ auto tracking_less = [&cmp](const Value<ValueType>& lhs, const Value<ValueType>& rhs) {
174+ ++cmp;
175+ return std::less<Value<ValueType>>{}(lhs, rhs);
176+ };
177+ stride_counting_iterator b1 (c1.begin (), &strides, &displacement);
178+ stride_counting_iterator e1 (c1.end (), &strides, &displacement);
179+ stride_counting_iterator b2 (c2.begin (), &strides, &displacement);
180+ stride_counting_iterator e2 (c2.end (), &strides, &displacement);
181+ auto res = std::set_intersection (b1, e1 , b2, e2 , out.begin (), tracking_less);
182+ benchmark::DoNotOptimize (res);
183+ state.counters [" comparisons" ] = cmp;
184+ state.counters [" iter_strides" ] = strides;
185+ state.counters [" iter_displacement" ] = displacement;
186+
187+ } else {
188+ auto res = std::set_intersection (c1.begin (), c1.end (), c2.begin (), c2.end (), out.begin ());
189+ benchmark::DoNotOptimize (res);
190+ }
175191 }
176192 }
177193 }
0 commit comments