Skip to content

Commit ce0d204

Browse files
committed
Optimize ranges::copy{, _n} for vector<bool>::iterator
1 parent 9ab5474 commit ce0d204

File tree

8 files changed

+411
-185
lines changed

8 files changed

+411
-185
lines changed

libcxx/docs/ReleaseNotes/20.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ Improvements and New Features
7373
optimized, resulting in a performance improvement of up to 2x for trivial element types (e.g., `std::vector<int>`),
7474
and up to 3.4x for non-trivial element types (e.g., `std::vector<std::vector<int>>`).
7575

76+
- The ``std::ranges::copy`` and ``std::ranges::copy_n`` algorithms have been optimized for ``std::vector<bool>::iterator``\s,
77+
resulting in a performance improvement of up to 1400x.
78+
7679
Deprecations and Removals
7780
-------------------------
7881

libcxx/include/__algorithm/copy.h

Lines changed: 132 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <__algorithm/for_each_segment.h>
1414
#include <__algorithm/min.h>
1515
#include <__config>
16+
#include <__fwd/bit_reference.h>
1617
#include <__iterator/iterator_traits.h>
1718
#include <__iterator/segmented_iterator.h>
1819
#include <__type_traits/common_type.h>
@@ -29,9 +30,129 @@ _LIBCPP_PUSH_MACROS
2930

3031
_LIBCPP_BEGIN_NAMESPACE_STD
3132

33+
template <class _InputIterator, class _OutputIterator>
34+
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
35+
copy(_InputIterator __first, _InputIterator __last, _OutputIterator __result);
36+
3237
template <class _InIter, class _Sent, class _OutIter>
3338
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> __copy(_InIter, _Sent, _OutIter);
3439

40+
template <class _Cp, bool _IsConst>
41+
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_aligned(
42+
__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
43+
using _In = __bit_iterator<_Cp, _IsConst>;
44+
using difference_type = typename _In::difference_type;
45+
using __storage_type = typename _In::__storage_type;
46+
47+
const int __bits_per_word = _In::__bits_per_word;
48+
difference_type __n = __last - __first;
49+
if (__n > 0) {
50+
// do first word
51+
if (__first.__ctz_ != 0) {
52+
unsigned __clz = __bits_per_word - __first.__ctz_;
53+
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
54+
__n -= __dn;
55+
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
56+
__storage_type __b = *__first.__seg_ & __m;
57+
*__result.__seg_ &= ~__m;
58+
*__result.__seg_ |= __b;
59+
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
60+
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
61+
++__first.__seg_;
62+
// __first.__ctz_ = 0;
63+
}
64+
// __first.__ctz_ == 0;
65+
// do middle words
66+
__storage_type __nw = __n / __bits_per_word;
67+
std::copy(std::__to_address(__first.__seg_),
68+
std::__to_address(__first.__seg_ + __nw),
69+
std::__to_address(__result.__seg_));
70+
__n -= __nw * __bits_per_word;
71+
__result.__seg_ += __nw;
72+
// do last word
73+
if (__n > 0) {
74+
__first.__seg_ += __nw;
75+
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
76+
__storage_type __b = *__first.__seg_ & __m;
77+
*__result.__seg_ &= ~__m;
78+
*__result.__seg_ |= __b;
79+
__result.__ctz_ = static_cast<unsigned>(__n);
80+
}
81+
}
82+
return __result;
83+
}
84+
85+
template <class _Cp, bool _IsConst>
86+
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_unaligned(
87+
__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
88+
using _In = __bit_iterator<_Cp, _IsConst>;
89+
using difference_type = typename _In::difference_type;
90+
using __storage_type = typename _In::__storage_type;
91+
92+
const int __bits_per_word = _In::__bits_per_word;
93+
difference_type __n = __last - __first;
94+
if (__n > 0) {
95+
// do first word
96+
if (__first.__ctz_ != 0) {
97+
unsigned __clz_f = __bits_per_word - __first.__ctz_;
98+
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
99+
__n -= __dn;
100+
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
101+
__storage_type __b = *__first.__seg_ & __m;
102+
unsigned __clz_r = __bits_per_word - __result.__ctz_;
103+
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
104+
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
105+
*__result.__seg_ &= ~__m;
106+
if (__result.__ctz_ > __first.__ctz_)
107+
*__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_);
108+
else
109+
*__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_);
110+
__result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
111+
__result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
112+
__dn -= __ddn;
113+
if (__dn > 0) {
114+
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
115+
*__result.__seg_ &= ~__m;
116+
*__result.__seg_ |= __b >> (__first.__ctz_ + __ddn);
117+
__result.__ctz_ = static_cast<unsigned>(__dn);
118+
}
119+
++__first.__seg_;
120+
// __first.__ctz_ = 0;
121+
}
122+
// __first.__ctz_ == 0;
123+
// do middle words
124+
unsigned __clz_r = __bits_per_word - __result.__ctz_;
125+
__storage_type __m = ~__storage_type(0) << __result.__ctz_;
126+
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
127+
__storage_type __b = *__first.__seg_;
128+
*__result.__seg_ &= ~__m;
129+
*__result.__seg_ |= __b << __result.__ctz_;
130+
++__result.__seg_;
131+
*__result.__seg_ &= __m;
132+
*__result.__seg_ |= __b >> __clz_r;
133+
}
134+
// do last word
135+
if (__n > 0) {
136+
__m = ~__storage_type(0) >> (__bits_per_word - __n);
137+
__storage_type __b = *__first.__seg_ & __m;
138+
__storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
139+
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
140+
*__result.__seg_ &= ~__m;
141+
*__result.__seg_ |= __b << __result.__ctz_;
142+
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
143+
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
144+
__n -= __dn;
145+
if (__n > 0) {
146+
__m = ~__storage_type(0) >> (__bits_per_word - __n);
147+
*__result.__seg_ &= ~__m;
148+
*__result.__seg_ |= __b >> __dn;
149+
__result.__ctz_ = static_cast<unsigned>(__n);
150+
}
151+
}
152+
}
153+
return __result;
154+
}
155+
35156
struct __copy_impl {
36157
template <class _InIter, class _Sent, class _OutIter>
37158
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
@@ -95,6 +216,16 @@ struct __copy_impl {
95216
}
96217
}
97218

219+
template <class _Cp, bool _IsConst>
220+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> >
221+
operator()(__bit_iterator<_Cp, _IsConst> __first,
222+
__bit_iterator<_Cp, _IsConst> __last,
223+
__bit_iterator<_Cp, false> __result) {
224+
if (__first.__ctz_ == __result.__ctz_)
225+
return std::make_pair(__last, std::__copy_aligned(__first, __last, __result));
226+
return std::make_pair(__last, std::__copy_unaligned(__first, __last, __result));
227+
}
228+
98229
// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
99230
template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0>
100231
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>
@@ -110,7 +241,7 @@ __copy(_InIter __first, _Sent __last, _OutIter __result) {
110241
}
111242

112243
template <class _InputIterator, class _OutputIterator>
113-
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
244+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
114245
copy(_InputIterator __first, _InputIterator __last, _OutputIterator __result) {
115246
return std::__copy(__first, __last, __result).second;
116247
}

libcxx/include/__bit_reference

Lines changed: 5 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#ifndef _LIBCPP___BIT_REFERENCE
1111
#define _LIBCPP___BIT_REFERENCE
1212

13+
#include <__algorithm/copy.h>
1314
#include <__algorithm/copy_n.h>
1415
#include <__algorithm/min.h>
1516
#include <__bit/countr.h>
@@ -22,6 +23,7 @@
2223
#include <__memory/pointer_traits.h>
2324
#include <__type_traits/conditional.h>
2425
#include <__type_traits/is_constant_evaluated.h>
26+
#include <__utility/pair.h>
2527
#include <__utility/swap.h>
2628

2729
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -169,130 +171,6 @@ private:
169171
__mask_(__m) {}
170172
};
171173

172-
// copy
173-
174-
template <class _Cp, bool _IsConst>
175-
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_aligned(
176-
__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
177-
using _In = __bit_iterator<_Cp, _IsConst>;
178-
using difference_type = typename _In::difference_type;
179-
using __storage_type = typename _In::__storage_type;
180-
181-
const int __bits_per_word = _In::__bits_per_word;
182-
difference_type __n = __last - __first;
183-
if (__n > 0) {
184-
// do first word
185-
if (__first.__ctz_ != 0) {
186-
unsigned __clz = __bits_per_word - __first.__ctz_;
187-
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
188-
__n -= __dn;
189-
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
190-
__storage_type __b = *__first.__seg_ & __m;
191-
*__result.__seg_ &= ~__m;
192-
*__result.__seg_ |= __b;
193-
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
194-
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
195-
++__first.__seg_;
196-
// __first.__ctz_ = 0;
197-
}
198-
// __first.__ctz_ == 0;
199-
// do middle words
200-
__storage_type __nw = __n / __bits_per_word;
201-
std::copy_n(std::__to_address(__first.__seg_), __nw, std::__to_address(__result.__seg_));
202-
__n -= __nw * __bits_per_word;
203-
__result.__seg_ += __nw;
204-
// do last word
205-
if (__n > 0) {
206-
__first.__seg_ += __nw;
207-
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
208-
__storage_type __b = *__first.__seg_ & __m;
209-
*__result.__seg_ &= ~__m;
210-
*__result.__seg_ |= __b;
211-
__result.__ctz_ = static_cast<unsigned>(__n);
212-
}
213-
}
214-
return __result;
215-
}
216-
217-
template <class _Cp, bool _IsConst>
218-
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_unaligned(
219-
__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
220-
using _In = __bit_iterator<_Cp, _IsConst>;
221-
using difference_type = typename _In::difference_type;
222-
using __storage_type = typename _In::__storage_type;
223-
224-
const int __bits_per_word = _In::__bits_per_word;
225-
difference_type __n = __last - __first;
226-
if (__n > 0) {
227-
// do first word
228-
if (__first.__ctz_ != 0) {
229-
unsigned __clz_f = __bits_per_word - __first.__ctz_;
230-
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
231-
__n -= __dn;
232-
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
233-
__storage_type __b = *__first.__seg_ & __m;
234-
unsigned __clz_r = __bits_per_word - __result.__ctz_;
235-
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
236-
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
237-
*__result.__seg_ &= ~__m;
238-
if (__result.__ctz_ > __first.__ctz_)
239-
*__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_);
240-
else
241-
*__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_);
242-
__result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
243-
__result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
244-
__dn -= __ddn;
245-
if (__dn > 0) {
246-
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
247-
*__result.__seg_ &= ~__m;
248-
*__result.__seg_ |= __b >> (__first.__ctz_ + __ddn);
249-
__result.__ctz_ = static_cast<unsigned>(__dn);
250-
}
251-
++__first.__seg_;
252-
// __first.__ctz_ = 0;
253-
}
254-
// __first.__ctz_ == 0;
255-
// do middle words
256-
unsigned __clz_r = __bits_per_word - __result.__ctz_;
257-
__storage_type __m = ~__storage_type(0) << __result.__ctz_;
258-
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
259-
__storage_type __b = *__first.__seg_;
260-
*__result.__seg_ &= ~__m;
261-
*__result.__seg_ |= __b << __result.__ctz_;
262-
++__result.__seg_;
263-
*__result.__seg_ &= __m;
264-
*__result.__seg_ |= __b >> __clz_r;
265-
}
266-
// do last word
267-
if (__n > 0) {
268-
__m = ~__storage_type(0) >> (__bits_per_word - __n);
269-
__storage_type __b = *__first.__seg_ & __m;
270-
__storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
271-
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
272-
*__result.__seg_ &= ~__m;
273-
*__result.__seg_ |= __b << __result.__ctz_;
274-
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
275-
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
276-
__n -= __dn;
277-
if (__n > 0) {
278-
__m = ~__storage_type(0) >> (__bits_per_word - __n);
279-
*__result.__seg_ &= ~__m;
280-
*__result.__seg_ |= __b >> __dn;
281-
__result.__ctz_ = static_cast<unsigned>(__n);
282-
}
283-
}
284-
}
285-
return __result;
286-
}
287-
288-
template <class _Cp, bool _IsConst>
289-
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false>
290-
copy(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
291-
if (__first.__ctz_ == __result.__ctz_)
292-
return std::__copy_aligned(__first, __last, __result);
293-
return std::__copy_unaligned(__first, __last, __result);
294-
}
295-
296174
// copy_backward
297175

298176
template <class _Cp, bool _IsConst>
@@ -975,8 +853,9 @@ private:
975853
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_unaligned(
976854
__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
977855
template <class _Dp, bool _IC>
978-
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
979-
copy(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
856+
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Dp, _IC>, __bit_iterator<_Dp, false> >
857+
__copy_impl::operator()(
858+
__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
980859
template <class _Dp, bool _IC>
981860
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_backward_aligned(
982861
__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);

0 commit comments

Comments
 (0)