Skip to content

Commit 3ebe99f

Browse files
authored
[libcxx] Unwrap iterators in __find_segment (#161274)
The segmented iterator optimized implementation of find now unwraps iterators when processing each segments. As a result, it is able to take better advantage to some find specializations: calling memchr/wmemchr for vector<vector<{char,int}>> ``` Benchmark Baseline Candidate Difference % Difference -------------------------------------------------------------- ---------- ----------- ------------ -------------- rng::find(join_view(deque<deque<int>>))_(process_all)/1024 71.13 61.19 -9.94 -13.97 rng::find(join_view(deque<deque<int>>))_(process_all)/32768 2359.19 2237.02 -122.17 -5.18 rng::find(join_view(deque<deque<int>>))_(process_all)/50 16.88 17.59 0.71 4.20 rng::find(join_view(deque<deque<int>>))_(process_all)/8 15.59 16.10 0.51 3.27 rng::find(join_view(deque<deque<int>>))_(process_all)/8192 647.01 532.75 -114.26 -17.66 rng::find(join_view(list<vector<int>>))_(process_all)/1024 689.76 680.74 -9.02 -1.31 rng::find(join_view(list<vector<int>>))_(process_all)/32768 22284.95 21500.26 -784.69 -3.52 rng::find(join_view(list<vector<int>>))_(process_all)/50 32.77 32.12 -0.65 -1.98 rng::find(join_view(list<vector<int>>))_(process_all)/8 6.11 5.92 -0.19 -3.11 rng::find(join_view(list<vector<int>>))_(process_all)/8192 5527.88 5373.43 -154.45 -2.79 rng::find(join_view(vector<list<int>>))_(process_all)/1024 1305.59 1264.04 -41.55 -3.18 rng::find(join_view(vector<list<int>>))_(process_all)/32768 42840.88 43322.64 481.76 1.12 rng::find(join_view(vector<list<int>>))_(process_all)/50 57.52 62.35 4.82 8.38 rng::find(join_view(vector<list<int>>))_(process_all)/8 6.06 5.98 -0.07 -1.18 rng::find(join_view(vector<list<int>>))_(process_all)/8192 20700.53 21431.66 731.12 3.53 rng::find(join_view(vector<vector<char>>))_(process_all)/1024 310.64 18.34 -292.30 -94.09 rng::find(join_view(vector<vector<char>>))_(process_all)/32768 9424.96 531.99 -8892.97 -94.36 rng::find(join_view(vector<vector<char>>))_(process_all)/50 18.58 3.25 -15.32 -82.49 rng::find(join_view(vector<vector<char>>))_(process_all)/8 4.81 2.98 -1.84 -38.13 rng::find(join_view(vector<vector<char>>))_(process_all)/8192 2437.50 126.88 -2310.62 -94.79 rng::find(join_view(vector<vector<int>>))_(process_all)/1024 297.10 41.70 -255.39 -85.96 rng::find(join_view(vector<vector<int>>))_(process_all)/32768 9662.42 1822.05 -7840.36 -81.14 rng::find(join_view(vector<vector<int>>))_(process_all)/50 22.29 5.10 -17.19 -77.11 rng::find(join_view(vector<vector<int>>))_(process_all)/8 3.73 3.13 -0.60 -16.05 rng::find(join_view(vector<vector<int>>))_(process_all)/8192 2399.68 356.10 -2043.58 -85.16 ```
1 parent 9bffb10 commit 3ebe99f

File tree

2 files changed

+55
-1
lines changed

2 files changed

+55
-1
lines changed

libcxx/include/__algorithm/find.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,8 @@ struct __find_segment {
230230
template <class _InputIterator, class _Proj>
231231
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _InputIterator
232232
operator()(_InputIterator __first, _InputIterator __last, _Proj& __proj) const {
233-
return std::__find(__first, __last, __value_, __proj);
233+
return std::__rewrap_iter(
234+
__first, std::__find(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __value_, __proj));
234235
}
235236
};
236237

libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <cstddef>
1313
#include <deque>
1414
#include <list>
15+
#include <ranges>
1516
#include <string>
1617
#include <vector>
1718

@@ -83,6 +84,20 @@ int main(int argc, char** argv) {
8384
bm.template operator()<std::list<int>>("rng::find_if_not(list<int>) (" + comment + ")", ranges_find_if_not);
8485
};
8586

87+
auto register_nested_container_benchmarks = [&](auto bm, std::string comment) {
88+
// ranges_find
89+
bm.template operator()<std::vector<std::vector<char>>>(
90+
"rng::find(join_view(vector<vector<char>>)) (" + comment + ")", ranges_find);
91+
bm.template operator()<std::vector<std::vector<int>>>(
92+
"rng::find(join_view(vector<vector<int>>)) (" + comment + ")", ranges_find);
93+
bm.template operator()<std::list<std::vector<int>>>(
94+
"rng::find(join_view(list<vector<int>>)) (" + comment + ")", ranges_find);
95+
bm.template operator()<std::vector<std::list<int>>>(
96+
"rng::find(join_view(vector<list<int>>)) (" + comment + ")", ranges_find);
97+
bm.template operator()<std::deque<std::deque<int>>>(
98+
"rng::find(join_view(deque<deque<int>>)) (" + comment + ")", ranges_find);
99+
};
100+
86101
// Benchmark {std,ranges}::{find,find_if,find_if_not}(normal container) where we
87102
// bail out after 25% of elements
88103
{
@@ -142,6 +157,44 @@ int main(int argc, char** argv) {
142157
register_benchmarks(bm, "process all");
143158
}
144159

160+
// Benchmark {std,ranges}::{find,find_if,find_if_not}(join(normal container)) where we process the whole sequence
161+
{
162+
auto bm = []<class Container>(std::string name, auto find) {
163+
benchmark::RegisterBenchmark(
164+
name,
165+
[find](auto& st) {
166+
std::size_t const size = st.range(0);
167+
std::size_t const seg_size = 256;
168+
std::size_t const segments = (size + seg_size - 1) / seg_size;
169+
using C1 = typename Container::value_type;
170+
using ValueType = typename C1::value_type;
171+
ValueType x = Generate<ValueType>::random();
172+
ValueType y = random_different_from({x});
173+
Container c(segments);
174+
auto n = size;
175+
for (auto it = c.begin(); it != c.end(); it++) {
176+
it->resize(std::min(seg_size, n), x);
177+
n -= it->size();
178+
}
179+
180+
auto view = c | std::views::join;
181+
182+
for ([[maybe_unused]] auto _ : st) {
183+
benchmark::DoNotOptimize(c);
184+
benchmark::DoNotOptimize(y);
185+
auto result = find(view.begin(), view.end(), y);
186+
benchmark::DoNotOptimize(result);
187+
}
188+
})
189+
->Arg(8)
190+
->Arg(50) // non power-of-two
191+
->Arg(1024)
192+
->Arg(8192)
193+
->Arg(1 << 15);
194+
};
195+
register_nested_container_benchmarks(bm, "process all");
196+
}
197+
145198
// Benchmark {std,ranges}::{find,find_if,find_if_not}(vector<bool>) where we process the whole sequence
146199
{
147200
auto bm = [](std::string name, auto find) {

0 commit comments

Comments
 (0)