Skip to content

Commit faed5b3

Browse files
committed
Optimize ranges::swap_ranges for vector<bool>::iterator
1 parent 70965ef commit faed5b3

File tree

6 files changed

+422
-299
lines changed

6 files changed

+422
-299
lines changed

libcxx/include/__algorithm/swap_ranges.h

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
#define _LIBCPP___ALGORITHM_SWAP_RANGES_H
1111

1212
#include <__algorithm/iterator_operations.h>
13+
#include <__algorithm/min.h>
1314
#include <__config>
15+
#include <__fwd/bit_reference.h>
1416
#include <__utility/move.h>
1517
#include <__utility/pair.h>
1618

@@ -23,6 +25,165 @@ _LIBCPP_PUSH_MACROS
2325

2426
_LIBCPP_BEGIN_NAMESPACE_STD
2527

28+
template <class _Cl, class _Cr>
29+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cr, false> __swap_ranges_aligned(
30+
__bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
31+
using _I1 = __bit_iterator<_Cl, false>;
32+
using difference_type = typename _I1::difference_type;
33+
using __storage_type = typename _I1::__storage_type;
34+
35+
const int __bits_per_word = _I1::__bits_per_word;
36+
difference_type __n = __last - __first;
37+
if (__n > 0) {
38+
// do first word
39+
if (__first.__ctz_ != 0) {
40+
unsigned __clz = __bits_per_word - __first.__ctz_;
41+
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
42+
__n -= __dn;
43+
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
44+
__storage_type __b1 = *__first.__seg_ & __m;
45+
*__first.__seg_ &= ~__m;
46+
__storage_type __b2 = *__result.__seg_ & __m;
47+
*__result.__seg_ &= ~__m;
48+
*__result.__seg_ |= __b1;
49+
*__first.__seg_ |= __b2;
50+
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
51+
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
52+
++__first.__seg_;
53+
// __first.__ctz_ = 0;
54+
}
55+
// __first.__ctz_ == 0;
56+
// do middle words
57+
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_)
58+
swap(*__first.__seg_, *__result.__seg_);
59+
// do last word
60+
if (__n > 0) {
61+
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
62+
__storage_type __b1 = *__first.__seg_ & __m;
63+
*__first.__seg_ &= ~__m;
64+
__storage_type __b2 = *__result.__seg_ & __m;
65+
*__result.__seg_ &= ~__m;
66+
*__result.__seg_ |= __b1;
67+
*__first.__seg_ |= __b2;
68+
__result.__ctz_ = static_cast<unsigned>(__n);
69+
}
70+
}
71+
return __result;
72+
}
73+
74+
template <class _Cl, class _Cr>
75+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cr, false> __swap_ranges_unaligned(
76+
__bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
77+
using _I1 = __bit_iterator<_Cl, false>;
78+
using difference_type = typename _I1::difference_type;
79+
using __storage_type = typename _I1::__storage_type;
80+
81+
const int __bits_per_word = _I1::__bits_per_word;
82+
difference_type __n = __last - __first;
83+
if (__n > 0) {
84+
// do first word
85+
if (__first.__ctz_ != 0) {
86+
unsigned __clz_f = __bits_per_word - __first.__ctz_;
87+
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
88+
__n -= __dn;
89+
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
90+
__storage_type __b1 = *__first.__seg_ & __m;
91+
*__first.__seg_ &= ~__m;
92+
unsigned __clz_r = __bits_per_word - __result.__ctz_;
93+
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
94+
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
95+
__storage_type __b2 = *__result.__seg_ & __m;
96+
*__result.__seg_ &= ~__m;
97+
if (__result.__ctz_ > __first.__ctz_) {
98+
unsigned __s = __result.__ctz_ - __first.__ctz_;
99+
*__result.__seg_ |= __b1 << __s;
100+
*__first.__seg_ |= __b2 >> __s;
101+
} else {
102+
unsigned __s = __first.__ctz_ - __result.__ctz_;
103+
*__result.__seg_ |= __b1 >> __s;
104+
*__first.__seg_ |= __b2 << __s;
105+
}
106+
__result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
107+
__result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
108+
__dn -= __ddn;
109+
if (__dn > 0) {
110+
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
111+
__b2 = *__result.__seg_ & __m;
112+
*__result.__seg_ &= ~__m;
113+
unsigned __s = __first.__ctz_ + __ddn;
114+
*__result.__seg_ |= __b1 >> __s;
115+
*__first.__seg_ |= __b2 << __s;
116+
__result.__ctz_ = static_cast<unsigned>(__dn);
117+
}
118+
++__first.__seg_;
119+
// __first.__ctz_ = 0;
120+
}
121+
// __first.__ctz_ == 0;
122+
// do middle words
123+
__storage_type __m = ~__storage_type(0) << __result.__ctz_;
124+
unsigned __clz_r = __bits_per_word - __result.__ctz_;
125+
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
126+
__storage_type __b1 = *__first.__seg_;
127+
__storage_type __b2 = *__result.__seg_ & __m;
128+
*__result.__seg_ &= ~__m;
129+
*__result.__seg_ |= __b1 << __result.__ctz_;
130+
*__first.__seg_ = __b2 >> __result.__ctz_;
131+
++__result.__seg_;
132+
__b2 = *__result.__seg_ & ~__m;
133+
*__result.__seg_ &= __m;
134+
*__result.__seg_ |= __b1 >> __clz_r;
135+
*__first.__seg_ |= __b2 << __clz_r;
136+
}
137+
// do last word
138+
if (__n > 0) {
139+
__m = ~__storage_type(0) >> (__bits_per_word - __n);
140+
__storage_type __b1 = *__first.__seg_ & __m;
141+
*__first.__seg_ &= ~__m;
142+
__storage_type __dn = std::min<__storage_type>(__n, __clz_r);
143+
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
144+
__storage_type __b2 = *__result.__seg_ & __m;
145+
*__result.__seg_ &= ~__m;
146+
*__result.__seg_ |= __b1 << __result.__ctz_;
147+
*__first.__seg_ |= __b2 >> __result.__ctz_;
148+
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
149+
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
150+
__n -= __dn;
151+
if (__n > 0) {
152+
__m = ~__storage_type(0) >> (__bits_per_word - __n);
153+
__b2 = *__result.__seg_ & __m;
154+
*__result.__seg_ &= ~__m;
155+
*__result.__seg_ |= __b1 >> __dn;
156+
*__first.__seg_ |= __b2 << __dn;
157+
__result.__ctz_ = static_cast<unsigned>(__n);
158+
}
159+
}
160+
}
161+
return __result;
162+
}
163+
164+
// 2+1 iterators: size2 >= size1; used by std::swap_ranges.
165+
template <class, class _Cl, class _Cr>
166+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
167+
__swap_ranges(__bit_iterator<_Cl, false> __first1,
168+
__bit_iterator<_Cl, false> __last1,
169+
__bit_iterator<_Cr, false> __first2) {
170+
if (__first1.__ctz_ == __first2.__ctz_)
171+
return std::make_pair(__last1, std::__swap_ranges_aligned(__first1, __last1, __first2));
172+
return std::make_pair(__last1, std::__swap_ranges_unaligned(__first1, __last1, __first2));
173+
}
174+
175+
// 2+2 iterators: used by std::ranges::swap_ranges.
176+
template <class _AlgPolicy, class _Cl, class _Cr>
177+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
178+
__swap_ranges(__bit_iterator<_Cl, false> __first1,
179+
__bit_iterator<_Cl, false> __last1,
180+
__bit_iterator<_Cr, false> __first2,
181+
__bit_iterator<_Cr, false> __last2) {
182+
if (__last1 - __first1 < __last2 - __first2)
183+
return std::make_pair(__last1, std::__swap_ranges<_AlgPolicy>(__first1, __last1, __first2).second);
184+
return std::make_pair(std::__swap_ranges<_AlgPolicy>(__first2, __last2, __first1).second, __last2);
185+
}
186+
26187
// 2+2 iterators: the shorter size will be used.
27188
template <class _AlgPolicy, class _ForwardIterator1, class _Sentinel1, class _ForwardIterator2, class _Sentinel2>
28189
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator1, _ForwardIterator2>

libcxx/include/__bit_reference

Lines changed: 6 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include <__algorithm/copy_n.h>
1414
#include <__algorithm/min.h>
15+
#include <__algorithm/swap_ranges.h>
1516
#include <__bit/countr.h>
1617
#include <__compare/ordering.h>
1718
#include <__config>
@@ -437,152 +438,6 @@ inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> move_backward(
437438
return std::copy_backward(__first, __last, __result);
438439
}
439440

440-
// swap_ranges
441-
442-
template <class _Cl, class _Cr>
443-
_LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> __swap_ranges_aligned(
444-
__bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
445-
using _I1 = __bit_iterator<_Cl, false>;
446-
using difference_type = typename _I1::difference_type;
447-
using __storage_type = typename _I1::__storage_type;
448-
449-
const int __bits_per_word = _I1::__bits_per_word;
450-
difference_type __n = __last - __first;
451-
if (__n > 0) {
452-
// do first word
453-
if (__first.__ctz_ != 0) {
454-
unsigned __clz = __bits_per_word - __first.__ctz_;
455-
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
456-
__n -= __dn;
457-
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
458-
__storage_type __b1 = *__first.__seg_ & __m;
459-
*__first.__seg_ &= ~__m;
460-
__storage_type __b2 = *__result.__seg_ & __m;
461-
*__result.__seg_ &= ~__m;
462-
*__result.__seg_ |= __b1;
463-
*__first.__seg_ |= __b2;
464-
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
465-
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
466-
++__first.__seg_;
467-
// __first.__ctz_ = 0;
468-
}
469-
// __first.__ctz_ == 0;
470-
// do middle words
471-
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_)
472-
swap(*__first.__seg_, *__result.__seg_);
473-
// do last word
474-
if (__n > 0) {
475-
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
476-
__storage_type __b1 = *__first.__seg_ & __m;
477-
*__first.__seg_ &= ~__m;
478-
__storage_type __b2 = *__result.__seg_ & __m;
479-
*__result.__seg_ &= ~__m;
480-
*__result.__seg_ |= __b1;
481-
*__first.__seg_ |= __b2;
482-
__result.__ctz_ = static_cast<unsigned>(__n);
483-
}
484-
}
485-
return __result;
486-
}
487-
488-
template <class _Cl, class _Cr>
489-
_LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> __swap_ranges_unaligned(
490-
__bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
491-
using _I1 = __bit_iterator<_Cl, false>;
492-
using difference_type = typename _I1::difference_type;
493-
using __storage_type = typename _I1::__storage_type;
494-
495-
const int __bits_per_word = _I1::__bits_per_word;
496-
difference_type __n = __last - __first;
497-
if (__n > 0) {
498-
// do first word
499-
if (__first.__ctz_ != 0) {
500-
unsigned __clz_f = __bits_per_word - __first.__ctz_;
501-
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
502-
__n -= __dn;
503-
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
504-
__storage_type __b1 = *__first.__seg_ & __m;
505-
*__first.__seg_ &= ~__m;
506-
unsigned __clz_r = __bits_per_word - __result.__ctz_;
507-
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
508-
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
509-
__storage_type __b2 = *__result.__seg_ & __m;
510-
*__result.__seg_ &= ~__m;
511-
if (__result.__ctz_ > __first.__ctz_) {
512-
unsigned __s = __result.__ctz_ - __first.__ctz_;
513-
*__result.__seg_ |= __b1 << __s;
514-
*__first.__seg_ |= __b2 >> __s;
515-
} else {
516-
unsigned __s = __first.__ctz_ - __result.__ctz_;
517-
*__result.__seg_ |= __b1 >> __s;
518-
*__first.__seg_ |= __b2 << __s;
519-
}
520-
__result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
521-
__result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
522-
__dn -= __ddn;
523-
if (__dn > 0) {
524-
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
525-
__b2 = *__result.__seg_ & __m;
526-
*__result.__seg_ &= ~__m;
527-
unsigned __s = __first.__ctz_ + __ddn;
528-
*__result.__seg_ |= __b1 >> __s;
529-
*__first.__seg_ |= __b2 << __s;
530-
__result.__ctz_ = static_cast<unsigned>(__dn);
531-
}
532-
++__first.__seg_;
533-
// __first.__ctz_ = 0;
534-
}
535-
// __first.__ctz_ == 0;
536-
// do middle words
537-
__storage_type __m = ~__storage_type(0) << __result.__ctz_;
538-
unsigned __clz_r = __bits_per_word - __result.__ctz_;
539-
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
540-
__storage_type __b1 = *__first.__seg_;
541-
__storage_type __b2 = *__result.__seg_ & __m;
542-
*__result.__seg_ &= ~__m;
543-
*__result.__seg_ |= __b1 << __result.__ctz_;
544-
*__first.__seg_ = __b2 >> __result.__ctz_;
545-
++__result.__seg_;
546-
__b2 = *__result.__seg_ & ~__m;
547-
*__result.__seg_ &= __m;
548-
*__result.__seg_ |= __b1 >> __clz_r;
549-
*__first.__seg_ |= __b2 << __clz_r;
550-
}
551-
// do last word
552-
if (__n > 0) {
553-
__m = ~__storage_type(0) >> (__bits_per_word - __n);
554-
__storage_type __b1 = *__first.__seg_ & __m;
555-
*__first.__seg_ &= ~__m;
556-
__storage_type __dn = std::min<__storage_type>(__n, __clz_r);
557-
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
558-
__storage_type __b2 = *__result.__seg_ & __m;
559-
*__result.__seg_ &= ~__m;
560-
*__result.__seg_ |= __b1 << __result.__ctz_;
561-
*__first.__seg_ |= __b2 >> __result.__ctz_;
562-
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
563-
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
564-
__n -= __dn;
565-
if (__n > 0) {
566-
__m = ~__storage_type(0) >> (__bits_per_word - __n);
567-
__b2 = *__result.__seg_ & __m;
568-
*__result.__seg_ &= ~__m;
569-
*__result.__seg_ |= __b1 >> __dn;
570-
*__first.__seg_ |= __b2 << __dn;
571-
__result.__ctz_ = static_cast<unsigned>(__n);
572-
}
573-
}
574-
}
575-
return __result;
576-
}
577-
578-
template <class _Cl, class _Cr>
579-
inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> swap_ranges(
580-
__bit_iterator<_Cl, false> __first1, __bit_iterator<_Cl, false> __last1, __bit_iterator<_Cr, false> __first2) {
581-
if (__first1.__ctz_ == __first2.__ctz_)
582-
return std::__swap_ranges_aligned(__first1, __last1, __first2);
583-
return std::__swap_ranges_unaligned(__first1, __last1, __first2);
584-
}
585-
586441
// rotate
587442

588443
template <class _Cp>
@@ -987,14 +842,14 @@ private:
987842
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
988843
copy_backward(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
989844
template <class _Cl, class _Cr>
990-
friend __bit_iterator<_Cr, false>
845+
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Cr, false>
991846
__swap_ranges_aligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
992847
template <class _Cl, class _Cr>
993-
friend __bit_iterator<_Cr, false>
848+
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Cr, false>
994849
__swap_ranges_unaligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
995-
template <class _Cl, class _Cr>
996-
friend __bit_iterator<_Cr, false>
997-
swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
850+
template <class, class _Cl, class _Cr>
851+
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
852+
__swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
998853
template <class _Dp>
999854
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
1000855
rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);

0 commit comments

Comments
 (0)