Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions libcxx/docs/ReleaseNotes/21.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ Improvements and New Features
- The ``std::ranges::equal`` algorithm has been optimized for ``std::vector<bool>::iterator``, resulting in a performance
improvement of up to 188x.

- The ``std::ranges::swap_ranges`` algorithm has been optimized for ``std::vector<bool>::iterator``, resulting in a
performance improvement of up to 611x.

- Updated formatting library to Unicode 16.0.0.

Deprecations and Removals
Expand Down
162 changes: 162 additions & 0 deletions libcxx/include/__algorithm/swap_ranges.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@
#define _LIBCPP___ALGORITHM_SWAP_RANGES_H

#include <__algorithm/iterator_operations.h>
#include <__algorithm/min.h>
#include <__config>
#include <__fwd/bit_reference.h>
#include <__utility/move.h>
#include <__utility/pair.h>
#include <__utility/swap.h>

#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
Expand All @@ -23,6 +26,165 @@ _LIBCPP_PUSH_MACROS

_LIBCPP_BEGIN_NAMESPACE_STD

template <class _Cl, class _Cr>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cr, false> __swap_ranges_aligned(
__bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
using _I1 = __bit_iterator<_Cl, false>;
using difference_type = typename _I1::difference_type;
using __storage_type = typename _I1::__storage_type;

const int __bits_per_word = _I1::__bits_per_word;
difference_type __n = __last - __first;
if (__n > 0) {
// do first word
if (__first.__ctz_ != 0) {
unsigned __clz = __bits_per_word - __first.__ctz_;
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
__n -= __dn;
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
__storage_type __b1 = *__first.__seg_ & __m;
*__first.__seg_ &= ~__m;
__storage_type __b2 = *__result.__seg_ & __m;
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __b1;
*__first.__seg_ |= __b2;
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
++__first.__seg_;
// __first.__ctz_ = 0;
}
// __first.__ctz_ == 0;
// do middle words
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_)
swap(*__first.__seg_, *__result.__seg_);
// do last word
if (__n > 0) {
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
__storage_type __b1 = *__first.__seg_ & __m;
*__first.__seg_ &= ~__m;
__storage_type __b2 = *__result.__seg_ & __m;
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __b1;
*__first.__seg_ |= __b2;
__result.__ctz_ = static_cast<unsigned>(__n);
}
}
return __result;
}

template <class _Cl, class _Cr>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cr, false> __swap_ranges_unaligned(
__bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
using _I1 = __bit_iterator<_Cl, false>;
using difference_type = typename _I1::difference_type;
using __storage_type = typename _I1::__storage_type;

const int __bits_per_word = _I1::__bits_per_word;
difference_type __n = __last - __first;
if (__n > 0) {
// do first word
if (__first.__ctz_ != 0) {
unsigned __clz_f = __bits_per_word - __first.__ctz_;
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
__n -= __dn;
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
__storage_type __b1 = *__first.__seg_ & __m;
*__first.__seg_ &= ~__m;
unsigned __clz_r = __bits_per_word - __result.__ctz_;
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
__storage_type __b2 = *__result.__seg_ & __m;
*__result.__seg_ &= ~__m;
if (__result.__ctz_ > __first.__ctz_) {
unsigned __s = __result.__ctz_ - __first.__ctz_;
*__result.__seg_ |= __b1 << __s;
*__first.__seg_ |= __b2 >> __s;
} else {
unsigned __s = __first.__ctz_ - __result.__ctz_;
*__result.__seg_ |= __b1 >> __s;
*__first.__seg_ |= __b2 << __s;
}
__result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
__result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
__dn -= __ddn;
if (__dn > 0) {
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
__b2 = *__result.__seg_ & __m;
*__result.__seg_ &= ~__m;
unsigned __s = __first.__ctz_ + __ddn;
*__result.__seg_ |= __b1 >> __s;
*__first.__seg_ |= __b2 << __s;
__result.__ctz_ = static_cast<unsigned>(__dn);
}
++__first.__seg_;
// __first.__ctz_ = 0;
}
// __first.__ctz_ == 0;
// do middle words
__storage_type __m = ~__storage_type(0) << __result.__ctz_;
unsigned __clz_r = __bits_per_word - __result.__ctz_;
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
__storage_type __b1 = *__first.__seg_;
__storage_type __b2 = *__result.__seg_ & __m;
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __b1 << __result.__ctz_;
*__first.__seg_ = __b2 >> __result.__ctz_;
++__result.__seg_;
__b2 = *__result.__seg_ & ~__m;
*__result.__seg_ &= __m;
*__result.__seg_ |= __b1 >> __clz_r;
*__first.__seg_ |= __b2 << __clz_r;
}
// do last word
if (__n > 0) {
__m = ~__storage_type(0) >> (__bits_per_word - __n);
__storage_type __b1 = *__first.__seg_ & __m;
*__first.__seg_ &= ~__m;
__storage_type __dn = std::min<__storage_type>(__n, __clz_r);
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
__storage_type __b2 = *__result.__seg_ & __m;
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __b1 << __result.__ctz_;
*__first.__seg_ |= __b2 >> __result.__ctz_;
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
__n -= __dn;
if (__n > 0) {
__m = ~__storage_type(0) >> (__bits_per_word - __n);
__b2 = *__result.__seg_ & __m;
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __b1 >> __dn;
*__first.__seg_ |= __b2 << __dn;
__result.__ctz_ = static_cast<unsigned>(__n);
}
}
}
return __result;
}

// 2+1 iterators: size2 >= size1; used by std::swap_ranges.
template <class, class _Cl, class _Cr>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
__swap_ranges(__bit_iterator<_Cl, false> __first1,
__bit_iterator<_Cl, false> __last1,
__bit_iterator<_Cr, false> __first2) {
if (__first1.__ctz_ == __first2.__ctz_)
return std::make_pair(__last1, std::__swap_ranges_aligned(__first1, __last1, __first2));
return std::make_pair(__last1, std::__swap_ranges_unaligned(__first1, __last1, __first2));
}

// 2+2 iterators: used by std::ranges::swap_ranges.
template <class _AlgPolicy, class _Cl, class _Cr>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
__swap_ranges(__bit_iterator<_Cl, false> __first1,
__bit_iterator<_Cl, false> __last1,
__bit_iterator<_Cr, false> __first2,
__bit_iterator<_Cr, false> __last2) {
if (__last1 - __first1 < __last2 - __first2)
return std::make_pair(__last1, std::__swap_ranges<_AlgPolicy>(__first1, __last1, __first2).second);
return std::make_pair(std::__swap_ranges<_AlgPolicy>(__first2, __last2, __first1).second, __last2);
}

// 2+2 iterators: the shorter size will be used.
template <class _AlgPolicy, class _ForwardIterator1, class _Sentinel1, class _ForwardIterator2, class _Sentinel2>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator1, _ForwardIterator2>
Expand Down
157 changes: 6 additions & 151 deletions libcxx/include/__bit_reference
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <__algorithm/copy_n.h>
#include <__algorithm/equal.h>
#include <__algorithm/min.h>
#include <__algorithm/swap_ranges.h>
#include <__assert>
#include <__bit/countr.h>
#include <__compare/ordering.h>
Expand Down Expand Up @@ -215,152 +216,6 @@ private:
__mask_(__m) {}
};

// swap_ranges

template <class _Cl, class _Cr>
_LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> __swap_ranges_aligned(
__bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
using _I1 = __bit_iterator<_Cl, false>;
using difference_type = typename _I1::difference_type;
using __storage_type = typename _I1::__storage_type;

const int __bits_per_word = _I1::__bits_per_word;
difference_type __n = __last - __first;
if (__n > 0) {
// do first word
if (__first.__ctz_ != 0) {
unsigned __clz = __bits_per_word - __first.__ctz_;
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
__n -= __dn;
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
__storage_type __b1 = *__first.__seg_ & __m;
*__first.__seg_ &= ~__m;
__storage_type __b2 = *__result.__seg_ & __m;
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __b1;
*__first.__seg_ |= __b2;
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
++__first.__seg_;
// __first.__ctz_ = 0;
}
// __first.__ctz_ == 0;
// do middle words
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_)
swap(*__first.__seg_, *__result.__seg_);
// do last word
if (__n > 0) {
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
__storage_type __b1 = *__first.__seg_ & __m;
*__first.__seg_ &= ~__m;
__storage_type __b2 = *__result.__seg_ & __m;
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __b1;
*__first.__seg_ |= __b2;
__result.__ctz_ = static_cast<unsigned>(__n);
}
}
return __result;
}

template <class _Cl, class _Cr>
_LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> __swap_ranges_unaligned(
__bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
using _I1 = __bit_iterator<_Cl, false>;
using difference_type = typename _I1::difference_type;
using __storage_type = typename _I1::__storage_type;

const int __bits_per_word = _I1::__bits_per_word;
difference_type __n = __last - __first;
if (__n > 0) {
// do first word
if (__first.__ctz_ != 0) {
unsigned __clz_f = __bits_per_word - __first.__ctz_;
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
__n -= __dn;
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
__storage_type __b1 = *__first.__seg_ & __m;
*__first.__seg_ &= ~__m;
unsigned __clz_r = __bits_per_word - __result.__ctz_;
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
__storage_type __b2 = *__result.__seg_ & __m;
*__result.__seg_ &= ~__m;
if (__result.__ctz_ > __first.__ctz_) {
unsigned __s = __result.__ctz_ - __first.__ctz_;
*__result.__seg_ |= __b1 << __s;
*__first.__seg_ |= __b2 >> __s;
} else {
unsigned __s = __first.__ctz_ - __result.__ctz_;
*__result.__seg_ |= __b1 >> __s;
*__first.__seg_ |= __b2 << __s;
}
__result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
__result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
__dn -= __ddn;
if (__dn > 0) {
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
__b2 = *__result.__seg_ & __m;
*__result.__seg_ &= ~__m;
unsigned __s = __first.__ctz_ + __ddn;
*__result.__seg_ |= __b1 >> __s;
*__first.__seg_ |= __b2 << __s;
__result.__ctz_ = static_cast<unsigned>(__dn);
}
++__first.__seg_;
// __first.__ctz_ = 0;
}
// __first.__ctz_ == 0;
// do middle words
__storage_type __m = ~__storage_type(0) << __result.__ctz_;
unsigned __clz_r = __bits_per_word - __result.__ctz_;
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
__storage_type __b1 = *__first.__seg_;
__storage_type __b2 = *__result.__seg_ & __m;
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __b1 << __result.__ctz_;
*__first.__seg_ = __b2 >> __result.__ctz_;
++__result.__seg_;
__b2 = *__result.__seg_ & ~__m;
*__result.__seg_ &= __m;
*__result.__seg_ |= __b1 >> __clz_r;
*__first.__seg_ |= __b2 << __clz_r;
}
// do last word
if (__n > 0) {
__m = ~__storage_type(0) >> (__bits_per_word - __n);
__storage_type __b1 = *__first.__seg_ & __m;
*__first.__seg_ &= ~__m;
__storage_type __dn = std::min<__storage_type>(__n, __clz_r);
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
__storage_type __b2 = *__result.__seg_ & __m;
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __b1 << __result.__ctz_;
*__first.__seg_ |= __b2 >> __result.__ctz_;
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
__n -= __dn;
if (__n > 0) {
__m = ~__storage_type(0) >> (__bits_per_word - __n);
__b2 = *__result.__seg_ & __m;
*__result.__seg_ &= ~__m;
*__result.__seg_ |= __b1 >> __dn;
*__first.__seg_ |= __b2 << __dn;
__result.__ctz_ = static_cast<unsigned>(__n);
}
}
}
return __result;
}

template <class _Cl, class _Cr>
inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> swap_ranges(
__bit_iterator<_Cl, false> __first1, __bit_iterator<_Cl, false> __last1, __bit_iterator<_Cr, false> __first2) {
if (__first1.__ctz_ == __first2.__ctz_)
return std::__swap_ranges_aligned(__first1, __last1, __first2);
return std::__swap_ranges_unaligned(__first1, __last1, __first2);
}

// rotate

template <class _Cp>
Expand Down Expand Up @@ -644,14 +499,14 @@ private:
template <class _AlgPolicy>
friend struct __copy_backward_impl;
template <class _Cl, class _Cr>
friend __bit_iterator<_Cr, false>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Cr, false>
__swap_ranges_aligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
template <class _Cl, class _Cr>
friend __bit_iterator<_Cr, false>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Cr, false>
__swap_ranges_unaligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
template <class _Cl, class _Cr>
friend __bit_iterator<_Cr, false>
swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
template <class, class _Cl, class _Cr>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
__swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
template <class _Dp>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
Expand Down
Loading