Skip to content

Commit 8edd68a

Browse files
committed
Optimize ranges::equal for vector<bool>::iterator
1 parent 4590f75 commit 8edd68a

File tree

7 files changed

+440
-221
lines changed

7 files changed

+440
-221
lines changed

libcxx/docs/ReleaseNotes/21.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ Improvements and New Features
4646
- The ``std::ranges::{copy, copy_n, copy_backward}`` algorithms have been optimized for ``std::vector<bool>::iterator``\s,
4747
resulting in a performance improvement of up to 2000x.
4848

49+
- The ``std::ranges::equal`` algorithm has been optimized for ``std::vector<bool>::iterator``, resulting in a performance
50+
improvement of up to 188x.
51+
4952
- Updated formatting library to Unicode 16.0.0.
5053

5154
Deprecations and Removals

libcxx/include/__algorithm/equal.h

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,27 @@
1111
#define _LIBCPP___ALGORITHM_EQUAL_H
1212

1313
#include <__algorithm/comp.h>
14+
#include <__algorithm/min.h>
1415
#include <__algorithm/unwrap_iter.h>
1516
#include <__config>
1617
#include <__functional/identity.h>
18+
#include <__fwd/bit_reference.h>
1719
#include <__iterator/distance.h>
1820
#include <__iterator/iterator_traits.h>
21+
#include <__memory/pointer_traits.h>
1922
#include <__string/constexpr_c_functions.h>
2023
#include <__type_traits/desugars_to.h>
2124
#include <__type_traits/enable_if.h>
2225
#include <__type_traits/invoke.h>
2326
#include <__type_traits/is_equality_comparable.h>
27+
#include <__type_traits/is_same.h>
2428
#include <__type_traits/is_volatile.h>
2529
#include <__utility/move.h>
2630

31+
#if _LIBCPP_STD_VER >= 20
32+
# include <__functional/ranges_operations.h>
33+
#endif
34+
2735
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
2836
# pragma GCC system_header
2937
#endif
@@ -33,6 +41,136 @@ _LIBCPP_PUSH_MACROS
3341

3442
_LIBCPP_BEGIN_NAMESPACE_STD
3543

44+
template <class _Cp, bool _IsConst1, bool _IsConst2>
45+
[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool
46+
__equal_unaligned(__bit_iterator<_Cp, _IsConst1> __first1,
47+
__bit_iterator<_Cp, _IsConst1> __last1,
48+
__bit_iterator<_Cp, _IsConst2> __first2) {
49+
using _It = __bit_iterator<_Cp, _IsConst1>;
50+
using difference_type = typename _It::difference_type;
51+
using __storage_type = typename _It::__storage_type;
52+
53+
const int __bits_per_word = _It::__bits_per_word;
54+
difference_type __n = __last1 - __first1;
55+
if (__n > 0) {
56+
// do first word
57+
if (__first1.__ctz_ != 0) {
58+
unsigned __clz_f = __bits_per_word - __first1.__ctz_;
59+
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
60+
__n -= __dn;
61+
__storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
62+
__storage_type __b = *__first1.__seg_ & __m;
63+
unsigned __clz_r = __bits_per_word - __first2.__ctz_;
64+
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
65+
__m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
66+
if (__first2.__ctz_ > __first1.__ctz_) {
67+
if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_)))
68+
return false;
69+
} else {
70+
if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_)))
71+
return false;
72+
}
73+
__first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word;
74+
__first2.__ctz_ = static_cast<unsigned>((__ddn + __first2.__ctz_) % __bits_per_word);
75+
__dn -= __ddn;
76+
if (__dn > 0) {
77+
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
78+
if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn)))
79+
return false;
80+
__first2.__ctz_ = static_cast<unsigned>(__dn);
81+
}
82+
++__first1.__seg_;
83+
// __first1.__ctz_ = 0;
84+
}
85+
// __first1.__ctz_ == 0;
86+
// do middle words
87+
unsigned __clz_r = __bits_per_word - __first2.__ctz_;
88+
__storage_type __m = ~__storage_type(0) << __first2.__ctz_;
89+
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) {
90+
__storage_type __b = *__first1.__seg_;
91+
if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
92+
return false;
93+
++__first2.__seg_;
94+
if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r))
95+
return false;
96+
}
97+
// do last word
98+
if (__n > 0) {
99+
__m = ~__storage_type(0) >> (__bits_per_word - __n);
100+
__storage_type __b = *__first1.__seg_ & __m;
101+
__storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
102+
__m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
103+
if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
104+
return false;
105+
__first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word;
106+
__first2.__ctz_ = static_cast<unsigned>((__dn + __first2.__ctz_) % __bits_per_word);
107+
__n -= __dn;
108+
if (__n > 0) {
109+
__m = ~__storage_type(0) >> (__bits_per_word - __n);
110+
if ((*__first2.__seg_ & __m) != (__b >> __dn))
111+
return false;
112+
}
113+
}
114+
}
115+
return true;
116+
}
117+
118+
template <class _Cp, bool _IsConst1, bool _IsConst2>
119+
[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool
120+
__equal_aligned(__bit_iterator<_Cp, _IsConst1> __first1,
121+
__bit_iterator<_Cp, _IsConst1> __last1,
122+
__bit_iterator<_Cp, _IsConst2> __first2) {
123+
using _It = __bit_iterator<_Cp, _IsConst1>;
124+
using difference_type = typename _It::difference_type;
125+
using __storage_type = typename _It::__storage_type;
126+
127+
const int __bits_per_word = _It::__bits_per_word;
128+
difference_type __n = __last1 - __first1;
129+
if (__n > 0) {
130+
// do first word
131+
if (__first1.__ctz_ != 0) {
132+
unsigned __clz = __bits_per_word - __first1.__ctz_;
133+
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
134+
__n -= __dn;
135+
__storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
136+
if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
137+
return false;
138+
++__first2.__seg_;
139+
++__first1.__seg_;
140+
// __first1.__ctz_ = 0;
141+
// __first2.__ctz_ = 0;
142+
}
143+
// __first1.__ctz_ == 0;
144+
// __first2.__ctz_ == 0;
145+
// do middle words
146+
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_)
147+
if (*__first2.__seg_ != *__first1.__seg_)
148+
return false;
149+
// do last word
150+
if (__n > 0) {
151+
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
152+
if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
153+
return false;
154+
}
155+
}
156+
return true;
157+
}
158+
159+
template <class _Cp,
160+
bool _IsConst1,
161+
bool _IsConst2,
162+
class _BinaryPredicate,
163+
__enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, bool, bool>, int> = 0>
164+
[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(
165+
__bit_iterator<_Cp, _IsConst1> __first1,
166+
__bit_iterator<_Cp, _IsConst1> __last1,
167+
__bit_iterator<_Cp, _IsConst2> __first2,
168+
_BinaryPredicate) {
169+
if (__first1.__ctz_ == __first2.__ctz_)
170+
return std::__equal_aligned(__first1, __last1, __first2);
171+
return std::__equal_unaligned(__first1, __last1, __first2);
172+
}
173+
36174
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
37175
[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(
38176
_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate& __pred) {
@@ -94,6 +232,28 @@ __equal_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _Up*, _Pred&, _Proj1&,
94232
return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1));
95233
}
96234

235+
template <class _Cp,
236+
bool _IsConst1,
237+
bool _IsConst2,
238+
class _Pred,
239+
class _Proj1,
240+
class _Proj2,
241+
__enable_if_t<__desugars_to_v<__equal_tag, _Pred, bool, bool> && __is_identity<_Proj1>::value &&
242+
__is_identity<_Proj2>::value,
243+
int> = 0>
244+
[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl(
245+
__bit_iterator<_Cp, _IsConst1> __first1,
246+
__bit_iterator<_Cp, _IsConst1> __last1,
247+
__bit_iterator<_Cp, _IsConst2> __first2,
248+
__bit_iterator<_Cp, _IsConst2>,
249+
_Pred&,
250+
_Proj1&,
251+
_Proj2&) {
252+
if (__first1.__ctz_ == __first2.__ctz_)
253+
return std::__equal_aligned(__first1, __last1, __first2);
254+
return std::__equal_unaligned(__first1, __last1, __first2);
255+
}
256+
97257
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
98258
[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
99259
equal(_InputIterator1 __first1,

libcxx/include/__bit_reference

Lines changed: 32 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@
1010
#ifndef _LIBCPP___BIT_REFERENCE
1111
#define _LIBCPP___BIT_REFERENCE
1212

13+
#include <__algorithm/comp.h>
1314
#include <__algorithm/copy.h>
1415
#include <__algorithm/copy_backward.h>
1516
#include <__algorithm/copy_n.h>
17+
#include <__algorithm/equal.h>
1618
#include <__algorithm/min.h>
1719
#include <__assert>
1820
#include <__bit/countr.h>
@@ -25,8 +27,10 @@
2527
#include <__memory/construct_at.h>
2628
#include <__memory/pointer_traits.h>
2729
#include <__type_traits/conditional.h>
30+
#include <__type_traits/desugars_to.h>
2831
#include <__type_traits/enable_if.h>
2932
#include <__type_traits/is_constant_evaluated.h>
33+
#include <__type_traits/is_same.h>
3034
#include <__type_traits/is_unsigned.h>
3135
#include <__type_traits/void_t.h>
3236
#include <__utility/pair.h>
@@ -444,127 +448,6 @@ rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle,
444448
return __r;
445449
}
446450

447-
// equal
448-
449-
template <class _Cp, bool _IC1, bool _IC2>
450-
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_unaligned(
451-
__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
452-
using _It = __bit_iterator<_Cp, _IC1>;
453-
using difference_type = typename _It::difference_type;
454-
using __storage_type = typename _It::__storage_type;
455-
456-
const int __bits_per_word = _It::__bits_per_word;
457-
difference_type __n = __last1 - __first1;
458-
if (__n > 0) {
459-
// do first word
460-
if (__first1.__ctz_ != 0) {
461-
unsigned __clz_f = __bits_per_word - __first1.__ctz_;
462-
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
463-
__n -= __dn;
464-
__storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
465-
__storage_type __b = *__first1.__seg_ & __m;
466-
unsigned __clz_r = __bits_per_word - __first2.__ctz_;
467-
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
468-
__m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
469-
if (__first2.__ctz_ > __first1.__ctz_) {
470-
if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_)))
471-
return false;
472-
} else {
473-
if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_)))
474-
return false;
475-
}
476-
__first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word;
477-
__first2.__ctz_ = static_cast<unsigned>((__ddn + __first2.__ctz_) % __bits_per_word);
478-
__dn -= __ddn;
479-
if (__dn > 0) {
480-
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
481-
if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn)))
482-
return false;
483-
__first2.__ctz_ = static_cast<unsigned>(__dn);
484-
}
485-
++__first1.__seg_;
486-
// __first1.__ctz_ = 0;
487-
}
488-
// __first1.__ctz_ == 0;
489-
// do middle words
490-
unsigned __clz_r = __bits_per_word - __first2.__ctz_;
491-
__storage_type __m = ~__storage_type(0) << __first2.__ctz_;
492-
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) {
493-
__storage_type __b = *__first1.__seg_;
494-
if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
495-
return false;
496-
++__first2.__seg_;
497-
if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r))
498-
return false;
499-
}
500-
// do last word
501-
if (__n > 0) {
502-
__m = ~__storage_type(0) >> (__bits_per_word - __n);
503-
__storage_type __b = *__first1.__seg_ & __m;
504-
__storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
505-
__m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
506-
if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_))
507-
return false;
508-
__first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word;
509-
__first2.__ctz_ = static_cast<unsigned>((__dn + __first2.__ctz_) % __bits_per_word);
510-
__n -= __dn;
511-
if (__n > 0) {
512-
__m = ~__storage_type(0) >> (__bits_per_word - __n);
513-
if ((*__first2.__seg_ & __m) != (__b >> __dn))
514-
return false;
515-
}
516-
}
517-
}
518-
return true;
519-
}
520-
521-
template <class _Cp, bool _IC1, bool _IC2>
522-
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_aligned(
523-
__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
524-
using _It = __bit_iterator<_Cp, _IC1>;
525-
using difference_type = typename _It::difference_type;
526-
using __storage_type = typename _It::__storage_type;
527-
528-
const int __bits_per_word = _It::__bits_per_word;
529-
difference_type __n = __last1 - __first1;
530-
if (__n > 0) {
531-
// do first word
532-
if (__first1.__ctz_ != 0) {
533-
unsigned __clz = __bits_per_word - __first1.__ctz_;
534-
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
535-
__n -= __dn;
536-
__storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
537-
if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
538-
return false;
539-
++__first2.__seg_;
540-
++__first1.__seg_;
541-
// __first1.__ctz_ = 0;
542-
// __first2.__ctz_ = 0;
543-
}
544-
// __first1.__ctz_ == 0;
545-
// __first2.__ctz_ == 0;
546-
// do middle words
547-
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_)
548-
if (*__first2.__seg_ != *__first1.__seg_)
549-
return false;
550-
// do last word
551-
if (__n > 0) {
552-
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
553-
if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
554-
return false;
555-
}
556-
}
557-
return true;
558-
}
559-
560-
template <class _Cp, bool _IC1, bool _IC2>
561-
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
562-
equal(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) {
563-
if (__first1.__ctz_ == __first2.__ctz_)
564-
return std::__equal_aligned(__first1, __last1, __first2);
565-
return std::__equal_unaligned(__first1, __last1, __first2);
566-
}
567-
568451
template <class _Cp, bool _IsConst, typename _Cp::__storage_type>
569452
class __bit_iterator {
570453
public:
@@ -787,15 +670,36 @@ private:
787670
template <class _Dp>
788671
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
789672
rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
790-
template <class _Dp, bool _IC1, bool _IC2>
791-
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
792-
__equal_aligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>);
793-
template <class _Dp, bool _IC1, bool _IC2>
673+
template <class _Dp, bool _IsConst1, bool _IsConst2>
794674
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
795-
__equal_unaligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>);
796-
template <class _Dp, bool _IC1, bool _IC2>
675+
__equal_aligned(__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>);
676+
template <class _Dp, bool _IsConst1, bool _IsConst2>
797677
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
798-
equal(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>);
678+
__equal_unaligned(__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>);
679+
template <class _Dp,
680+
bool _IsConst1,
681+
bool _IsConst2,
682+
class _BinaryPredicate,
683+
__enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, bool, bool>, int> >
684+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool __equal_iter_impl(
685+
__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>, _BinaryPredicate);
686+
template <class _Dp,
687+
bool _IsConst1,
688+
bool _IsConst2,
689+
class _Pred,
690+
class _Proj1,
691+
class _Proj2,
692+
__enable_if_t<__desugars_to_v<__equal_tag, _Pred, bool, bool> && __is_identity<_Proj1>::value &&
693+
__is_identity<_Proj2>::value,
694+
int> >
695+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool __equal_impl(
696+
__bit_iterator<_Dp, _IsConst1> __first1,
697+
__bit_iterator<_Dp, _IsConst1> __last1,
698+
__bit_iterator<_Dp, _IsConst2> __first2,
699+
__bit_iterator<_Dp, _IsConst2>,
700+
_Pred&,
701+
_Proj1&,
702+
_Proj2&);
799703
template <bool _ToFind, class _Dp, bool _IC>
800704
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, _IC>
801705
__find_bool(__bit_iterator<_Dp, _IC>, typename __size_difference_type_traits<_Dp>::size_type);

0 commit comments

Comments
 (0)