Skip to content

Commit 8d83498

Browse files
winner245ldionnefrederick-vs-ja
authored andcommitted
Automerge: [libc++] Optimize {std,ranges}::distance for segmented iterators (#133612)
This patch enhances the performance of `std::distance` and `std::ranges::distance` for non-random-access segmented iterators, e.g., `std::join_view` iterators. The original implementation operates in linear time, `O(n)`, where `n` is the total number of elements. The optimized version reduces this to approximately `O(n / segment_size)` by leveraging segmented structure, where `segment_size` is the average size of each segment. The table below summarizes the peak performance improvements observed across different segment sizes, with the total element count `n` ranging up to `1 << 20` (1,048,576 elements), based on benchmark results. ``` ---------------------------------------------------------------------------------------- Container/n/segment_size std::distance std::ranges::distance ---------------------------------------------------------------------------------------- join_view(vector<vector<int>>)/1048576/256 401.6x 422.9x join_view(deque<deque<int>>)/1048576/256 112.1x 132.6x join_view(vector<vector<int>>)/1048576/1024 1669.2x 1559.1x join_view(deque<deque<int>>)/1048576/1024 487.7x 497.4x ``` ## Benchmarks #### Segment size = 1024 ``` ----------------------------------------------------------------------------------------- Benchmark Before After Speedup ----------------------------------------------------------------------------------------- std::distance(join_view(vector<vector<int>>))/50 38.8 ns 1.01 ns 38.4x std::distance(join_view(vector<vector<int>>))/1024 660 ns 1.02 ns 647.1x std::distance(join_view(vector<vector<int>>))/4096 2934 ns 1.98 ns 1481.8x std::distance(join_view(vector<vector<int>>))/8192 5751 ns 3.92 ns 1466.8x std::distance(join_view(vector<vector<int>>))/16384 11520 ns 7.06 ns 1631.7x std::distance(join_view(vector<vector<int>>))/65536 46367 ns 32.2 ns 1440.6x std::distance(join_view(vector<vector<int>>))/262144 182611 ns 114 ns 1601.9x std::distance(join_view(vector<vector<int>>))/1048576 737785 ns 442 ns 1669.2x std::distance(join_view(deque<deque<int>>))/50 53.1 ns 6.13 ns 8.7x std::distance(join_view(deque<deque<int>>))/1024 854 ns 7.53 ns 113.4x std::distance(join_view(deque<deque<int>>))/4096 3507 ns 14.7 ns 238.6x std::distance(join_view(deque<deque<int>>))/8192 7114 ns 17.6 ns 404.2x std::distance(join_view(deque<deque<int>>))/16384 13997 ns 30.7 ns 455.9x std::distance(join_view(deque<deque<int>>))/65536 55598 ns 114 ns 487.7x std::distance(join_view(deque<deque<int>>))/262144 214293 ns 480 ns 446.4x std::distance(join_view(deque<deque<int>>))/1048576 833000 ns 2183 ns 381.6x rng::distance(join_view(vector<vector<int>>))/50 39.1 ns 1.10 ns 35.5x rng::distance(join_view(vector<vector<int>>))/1024 689 ns 1.14 ns 604.4x rng::distance(join_view(vector<vector<int>>))/4096 2753 ns 2.15 ns 1280.5x rng::distance(join_view(vector<vector<int>>))/8192 5530 ns 4.61 ns 1199.6x rng::distance(join_view(vector<vector<int>>))/16384 10968 ns 7.97 ns 1376.2x rng::distance(join_view(vector<vector<int>>))/65536 46009 ns 35.3 ns 1303.4x rng::distance(join_view(vector<vector<int>>))/262144 190569 ns 124 ns 1536.9x rng::distance(join_view(vector<vector<int>>))/1048576 746724 ns 479 ns 1559.1x rng::distance(join_view(deque<deque<int>>))/50 51.6 ns 6.57 ns 7.9x rng::distance(join_view(deque<deque<int>>))/1024 826 ns 6.50 ns 127.1x rng::distance(join_view(deque<deque<int>>))/4096 3323 ns 12.5 ns 265.8x rng::distance(join_view(deque<deque<int>>))/8192 6619 ns 19.1 ns 346.5x rng::distance(join_view(deque<deque<int>>))/16384 13495 ns 33.2 ns 406.5x rng::distance(join_view(deque<deque<int>>))/65536 53668 ns 114 ns 470.8x rng::distance(join_view(deque<deque<int>>))/262144 236277 ns 475 ns 497.4x rng::distance(join_view(deque<deque<int>>))/1048576 914177 ns 2157 ns 423.8x ----------------------------------------------------------------------------------------- ``` #### Segment size = 256 ``` ----------------------------------------------------------------------------------------- Benchmark Before After Speedup ----------------------------------------------------------------------------------------- std::distance(join_view(vector<vector<int>>))/50 38.1 ns 1.02 ns 37.4x std::distance(join_view(vector<vector<int>>))/1024 689 ns 2.06 ns 334.5x std::distance(join_view(vector<vector<int>>))/4096 2815 ns 7.01 ns 401.6x std::distance(join_view(vector<vector<int>>))/8192 5507 ns 14.3 ns 385.1x std::distance(join_view(vector<vector<int>>))/16384 11050 ns 33.7 ns 327.9x std::distance(join_view(vector<vector<int>>))/65536 44197 ns 118 ns 374.6x std::distance(join_view(vector<vector<int>>))/262144 175793 ns 449 ns 391.5x std::distance(join_view(vector<vector<int>>))/1048576 703242 ns 2140 ns 328.7x std::distance(join_view(deque<deque<int>>))/50 50.2 ns 6.12 ns 8.2x std::distance(join_view(deque<deque<int>>))/1024 835 ns 11.4 ns 73.2x std::distance(join_view(deque<deque<int>>))/4096 3353 ns 32.9 ns 101.9x std::distance(join_view(deque<deque<int>>))/8192 6711 ns 64.2 ns 104.5x std::distance(join_view(deque<deque<int>>))/16384 13231 ns 118 ns 112.1x std::distance(join_view(deque<deque<int>>))/65536 53523 ns 556 ns 96.3x std::distance(join_view(deque<deque<int>>))/262144 219101 ns 2166 ns 101.2x std::distance(join_view(deque<deque<int>>))/1048576 880277 ns 15852 ns 55.5x rng::distance(join_view(vector<vector<int>>))/50 37.7 ns 1.13 ns 33.4x rng::distance(join_view(vector<vector<int>>))/1024 697 ns 2.14 ns 325.7x rng::distance(join_view(vector<vector<int>>))/4096 2804 ns 7.52 ns 373.0x rng::distance(join_view(vector<vector<int>>))/8192 5749 ns 15.2 ns 378.2x rng::distance(join_view(vector<vector<int>>))/16384 11742 ns 34.8 ns 337.4x rng::distance(join_view(vector<vector<int>>))/65536 47274 ns 116 ns 407.7x rng::distance(join_view(vector<vector<int>>))/262144 187774 ns 444 ns 422.9x rng::distance(join_view(vector<vector<int>>))/1048576 749724 ns 2109 ns 355.5x rng::distance(join_view(deque<deque<int>>))/50 53.0 ns 6.09 ns 8.7x rng::distance(join_view(deque<deque<int>>))/1024 895 ns 11.0 ns 81.4x rng::distance(join_view(deque<deque<int>>))/4096 3825 ns 30.6 ns 125.0x rng::distance(join_view(deque<deque<int>>))/8192 7550 ns 60.5 ns 124.8x rng::distance(join_view(deque<deque<int>>))/16384 14847 ns 112 ns 132.6x rng::distance(join_view(deque<deque<int>>))/65536 56888 ns 453 ns 125.6x rng::distance(join_view(deque<deque<int>>))/262144 231395 ns 2034 ns 113.8x rng::distance(join_view(deque<deque<int>>))/1048576 933093 ns 15012 ns 62.2x ----------------------------------------------------------------------------------------- ``` Addresses a subtask of #102817. --------- Co-authored-by: Louis Dionne <[email protected]> Co-authored-by: A. Jiang <[email protected]>
2 parents 82f3269 + fd08af0 commit 8d83498

File tree

5 files changed

+249
-61
lines changed

5 files changed

+249
-61
lines changed

libcxx/docs/ReleaseNotes/22.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ Improvements and New Features
6868
reduced debug information.
6969

7070
- The performance of ``std::find`` has been improved by up to 2x for integral types
71+
- The ``std::distance`` and ``std::ranges::distance`` algorithms have been optimized for segmented iterators (e.g.,
72+
``std::join_view`` iterators), reducing the complexity from ``O(n)`` to ``O(n / segment_size)``. Benchmarks show
73+
performance improvements of over 1600x in favorable cases with large segment sizes (e.g., 1024).
7174

7275
Deprecations and Removals
7376
-------------------------

libcxx/include/__iterator/distance.h

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,41 +10,71 @@
1010
#ifndef _LIBCPP___ITERATOR_DISTANCE_H
1111
#define _LIBCPP___ITERATOR_DISTANCE_H
1212

13+
#include <__algorithm/for_each_segment.h>
1314
#include <__config>
1415
#include <__iterator/concepts.h>
1516
#include <__iterator/incrementable_traits.h>
1617
#include <__iterator/iterator_traits.h>
18+
#include <__iterator/segmented_iterator.h>
1719
#include <__ranges/access.h>
1820
#include <__ranges/concepts.h>
1921
#include <__ranges/size.h>
2022
#include <__type_traits/decay.h>
23+
#include <__type_traits/enable_if.h>
2124
#include <__type_traits/remove_cvref.h>
25+
#include <__utility/move.h>
2226

2327
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
2428
# pragma GCC system_header
2529
#endif
2630

31+
_LIBCPP_PUSH_MACROS
32+
#include <__undef_macros>
33+
2734
_LIBCPP_BEGIN_NAMESPACE_STD
2835

29-
template <class _InputIter>
30-
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InputIter>::difference_type
31-
__distance(_InputIter __first, _InputIter __last, input_iterator_tag) {
32-
typename iterator_traits<_InputIter>::difference_type __r(0);
36+
#if _LIBCPP_STD_VER >= 20
37+
template <class _Iter>
38+
using __iter_distance_t _LIBCPP_NODEBUG = std::iter_difference_t<_Iter>;
39+
#else
40+
template <class _Iter>
41+
using __iter_distance_t _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::difference_type;
42+
#endif
43+
44+
template <class _InputIter, class _Sent>
45+
inline _LIBCPP_HIDE_FROM_ABI
46+
_LIBCPP_CONSTEXPR_SINCE_CXX17 __iter_distance_t<_InputIter> __distance(_InputIter __first, _Sent __last) {
47+
__iter_distance_t<_InputIter> __r(0);
3348
for (; __first != __last; ++__first)
3449
++__r;
3550
return __r;
3651
}
3752

38-
template <class _RandIter>
39-
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_RandIter>::difference_type
40-
__distance(_RandIter __first, _RandIter __last, random_access_iterator_tag) {
53+
template <class _RandIter, __enable_if_t<__has_random_access_iterator_category<_RandIter>::value, int> = 0>
54+
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 __iter_distance_t<_RandIter>
55+
__distance(_RandIter __first, _RandIter __last) {
4156
return __last - __first;
4257
}
4358

59+
#if _LIBCPP_STD_VER >= 20
60+
template <class _SegmentedIter,
61+
__enable_if_t<!__has_random_access_iterator_category<_SegmentedIter>::value &&
62+
__is_segmented_iterator_v<_SegmentedIter>,
63+
int> = 0>
64+
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 __iter_distance_t<_SegmentedIter>
65+
__distance(_SegmentedIter __first, _SegmentedIter __last) {
66+
__iter_distance_t<_SegmentedIter> __r(0);
67+
std::__for_each_segment(__first, __last, [&__r](auto __lfirst, auto __llast) {
68+
__r += std::__distance(__lfirst, __llast);
69+
});
70+
return __r;
71+
}
72+
#endif // _LIBCPP_STD_VER >= 20
73+
4474
template <class _InputIter>
4575
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InputIter>::difference_type
4676
distance(_InputIter __first, _InputIter __last) {
47-
return std::__distance(__first, __last, typename iterator_traits<_InputIter>::iterator_category());
77+
return std::__distance(__first, __last);
4878
}
4979

5080
#if _LIBCPP_STD_VER >= 20
@@ -56,12 +86,7 @@ struct __distance {
5686
template <class _Ip, sentinel_for<_Ip> _Sp>
5787
requires(!sized_sentinel_for<_Sp, _Ip>)
5888
_LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Ip> operator()(_Ip __first, _Sp __last) const {
59-
iter_difference_t<_Ip> __n = 0;
60-
while (__first != __last) {
61-
++__first;
62-
++__n;
63-
}
64-
return __n;
89+
return std::__distance(std::move(__first), std::move(__last));
6590
}
6691

6792
template <class _Ip, sized_sentinel_for<decay_t<_Ip>> _Sp>
@@ -92,4 +117,6 @@ inline constexpr auto distance = __distance{};
92117

93118
_LIBCPP_END_NAMESPACE_STD
94119

120+
_LIBCPP_POP_MACROS
121+
95122
#endif // _LIBCPP___ITERATOR_DISTANCE_H
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// UNSUPPORTED: c++03, c++11, c++14, c++17
10+
11+
#include <algorithm>
12+
#include <cstddef>
13+
#include <deque>
14+
#include <iterator>
15+
#include <ranges>
16+
#include <vector>
17+
18+
#include <benchmark/benchmark.h>
19+
20+
int main(int argc, char** argv) {
21+
auto std_distance = [](auto first, auto last) { return std::distance(first, last); };
22+
23+
// {std,ranges}::distance(std::deque)
24+
{
25+
auto bm = [](std::string name, auto distance) {
26+
benchmark::RegisterBenchmark(
27+
name,
28+
[distance](auto& st) {
29+
std::size_t const size = st.range(0);
30+
std::deque<int> c(size, 1);
31+
32+
for ([[maybe_unused]] auto _ : st) {
33+
benchmark::DoNotOptimize(c);
34+
auto result = distance(c.begin(), c.end());
35+
benchmark::DoNotOptimize(result);
36+
}
37+
})
38+
->Arg(50) // non power-of-two
39+
->Arg(1024)
40+
->Arg(4096)
41+
->Arg(8192);
42+
};
43+
bm.operator()("std::distance(deque<int>)", std_distance);
44+
bm.operator()("rng::distance(deque<int>)", std::ranges::distance);
45+
}
46+
47+
// {std,ranges}::distance(std::join_view)
48+
{
49+
auto bm = []<class Container>(std::string name, auto distance, std::size_t seg_size) {
50+
benchmark::RegisterBenchmark(
51+
name,
52+
[distance, seg_size](auto& st) {
53+
std::size_t const size = st.range(0);
54+
std::size_t const segments = (size + seg_size - 1) / seg_size;
55+
Container c(segments);
56+
for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
57+
c[i].resize(std::min(seg_size, n));
58+
}
59+
60+
auto view = c | std::views::join;
61+
auto first = view.begin();
62+
auto last = view.end();
63+
64+
for ([[maybe_unused]] auto _ : st) {
65+
benchmark::DoNotOptimize(c);
66+
auto result = distance(first, last);
67+
benchmark::DoNotOptimize(result);
68+
}
69+
})
70+
->Arg(50) // non power-of-two
71+
->Arg(1024)
72+
->Arg(4096)
73+
->Arg(8192);
74+
};
75+
bm.operator()<std::vector<std::vector<int>>>("std::distance(join_view(vector<vector<int>>))", std_distance, 256);
76+
bm.operator()<std::vector<std::vector<int>>>(
77+
"rng::distance(join_view(vector<vector<int>>)", std::ranges::distance, 256);
78+
}
79+
80+
benchmark::Initialize(&argc, argv);
81+
benchmark::RunSpecifiedBenchmarks();
82+
benchmark::Shutdown();
83+
return 0;
84+
}

libcxx/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp

Lines changed: 56 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,38 +16,73 @@
1616
// Iter::difference_type
1717
// distance(Iter first, Iter last); // constexpr in C++17
1818

19-
#include <iterator>
19+
#include <array>
2020
#include <cassert>
21+
#include <deque>
22+
#include <iterator>
23+
#include <vector>
2124
#include <type_traits>
2225

2326
#include "test_macros.h"
2427
#include "test_iterators.h"
2528

2629
template <class It>
27-
TEST_CONSTEXPR_CXX17
28-
void check_distance(It first, It last, typename std::iterator_traits<It>::difference_type dist)
29-
{
30-
typedef typename std::iterator_traits<It>::difference_type Difference;
31-
static_assert(std::is_same<decltype(std::distance(first, last)), Difference>::value, "");
32-
assert(std::distance(first, last) == dist);
30+
TEST_CONSTEXPR_CXX17 void check_distance(It first, It last, typename std::iterator_traits<It>::difference_type dist) {
31+
typedef typename std::iterator_traits<It>::difference_type Difference;
32+
static_assert(std::is_same<decltype(std::distance(first, last)), Difference>::value, "");
33+
assert(std::distance(first, last) == dist);
3334
}
3435

35-
TEST_CONSTEXPR_CXX17 bool tests()
36-
{
37-
const char* s = "1234567890";
38-
check_distance(cpp17_input_iterator<const char*>(s), cpp17_input_iterator<const char*>(s+10), 10);
39-
check_distance(forward_iterator<const char*>(s), forward_iterator<const char*>(s+10), 10);
40-
check_distance(bidirectional_iterator<const char*>(s), bidirectional_iterator<const char*>(s+10), 10);
41-
check_distance(random_access_iterator<const char*>(s), random_access_iterator<const char*>(s+10), 10);
42-
check_distance(s, s+10, 10);
43-
return true;
36+
#if TEST_STD_VER >= 20
37+
/*TEST_CONSTEXPR_CXX26*/ void test_deque() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr
38+
using Container = std::deque<std::deque<double>>;
39+
Container c;
40+
auto view = c | std::views::join;
41+
Container::difference_type n = 0;
42+
for (std::size_t i = 0; i < 10; ++i) {
43+
n += i;
44+
c.push_back(Container::value_type(i));
45+
}
46+
assert(std::distance(view.begin(), view.end()) == n);
47+
}
48+
#endif
49+
50+
TEST_CONSTEXPR_CXX17 bool tests() {
51+
const char* s = "1234567890";
52+
check_distance(cpp17_input_iterator<const char*>(s), cpp17_input_iterator<const char*>(s + 10), 10);
53+
check_distance(forward_iterator<const char*>(s), forward_iterator<const char*>(s + 10), 10);
54+
check_distance(bidirectional_iterator<const char*>(s), bidirectional_iterator<const char*>(s + 10), 10);
55+
check_distance(random_access_iterator<const char*>(s), random_access_iterator<const char*>(s + 10), 10);
56+
check_distance(s, s + 10, 10);
57+
58+
#if TEST_STD_VER >= 20
59+
{
60+
using Container = std::vector<std::vector<int>>;
61+
Container c;
62+
auto view = c | std::views::join;
63+
Container::difference_type n = 0;
64+
for (std::size_t i = 0; i < 10; ++i) {
65+
n += i;
66+
c.push_back(Container::value_type(i));
67+
}
68+
assert(std::distance(view.begin(), view.end()) == n);
69+
}
70+
{
71+
using Container = std::array<std::array<char, 3>, 10>;
72+
Container c;
73+
auto view = c | std::views::join;
74+
assert(std::distance(view.begin(), view.end()) == 30);
75+
}
76+
if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr
77+
test_deque();
78+
#endif
79+
return true;
4480
}
4581

46-
int main(int, char**)
47-
{
48-
tests();
82+
int main(int, char**) {
83+
tests();
4984
#if TEST_STD_VER >= 17
50-
static_assert(tests(), "");
85+
static_assert(tests(), "");
5186
#endif
52-
return 0;
87+
return 0;
5388
}

0 commit comments

Comments
 (0)