From 9a3be694a8f231e7aba8525e1187b86b0d3bcee0 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Mon, 23 Dec 2024 08:05:53 -0500 Subject: [PATCH 1/4] Optimize ranges::copy{, _n} for vector::iterator --- libcxx/include/__algorithm/copy.h | 134 +++++++++++++++++- libcxx/include/__bit_reference | 131 +---------------- libcxx/include/bitset | 2 + .../test/benchmarks/algorithms/copy.bench.cpp | 89 ++++++++++++ .../alg.copy/copy.pass.cpp | 33 ++++- .../alg.copy/copy_n.pass.cpp | 126 +++++++++------- .../alg.copy/ranges.copy.pass.cpp | 35 +++++ .../alg.copy/ranges.copy_n.pass.cpp | 52 ++++++- 8 files changed, 417 insertions(+), 185 deletions(-) create mode 100644 libcxx/test/benchmarks/algorithms/copy.bench.cpp diff --git a/libcxx/include/__algorithm/copy.h b/libcxx/include/__algorithm/copy.h index 962aa90059d57..e2e46674d798a 100644 --- a/libcxx/include/__algorithm/copy.h +++ b/libcxx/include/__algorithm/copy.h @@ -13,8 +13,10 @@ #include <__algorithm/for_each_segment.h> #include <__algorithm/min.h> #include <__config> +#include <__fwd/bit_reference.h> #include <__iterator/iterator_traits.h> #include <__iterator/segmented_iterator.h> +#include <__memory/pointer_traits.h> #include <__type_traits/common_type.h> #include <__type_traits/enable_if.h> #include <__utility/move.h> @@ -29,9 +31,129 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD +template +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator +copy(_InputIterator __first, _InputIterator __last, _OutputIterator __result); + template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> __copy(_InIter, _Sent, _OutIter); +template +_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_aligned( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { + using _In = __bit_iterator<_Cp, _IsConst>; + using difference_type = typename _In::difference_type; + using __storage_type = typename _In::__storage_type; + + const int __bits_per_word = _In::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) { + // do first word + if (__first.__ctz_ != 0) { + unsigned __clz = __bits_per_word - __first.__ctz_; + difference_type __dn = std::min(static_cast(__clz), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); + __storage_type __b = *__first.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b; + __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); + ++__first.__seg_; + // __first.__ctz_ = 0; + } + // __first.__ctz_ == 0; + // do middle words + __storage_type __nw = __n / __bits_per_word; + std::copy(std::__to_address(__first.__seg_), + std::__to_address(__first.__seg_ + __nw), + std::__to_address(__result.__seg_)); + __n -= __nw * __bits_per_word; + __result.__seg_ += __nw; + // do last word + if (__n > 0) { + __first.__seg_ += __nw; + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b = *__first.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b; + __result.__ctz_ = static_cast(__n); + } + } + return __result; +} + +template +_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_unaligned( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { + using _In = __bit_iterator<_Cp, _IsConst>; + using difference_type = typename _In::difference_type; + using __storage_type = typename _In::__storage_type; + + const int __bits_per_word = _In::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) { + // do first word + if (__first.__ctz_ != 0) { + unsigned __clz_f = __bits_per_word - __first.__ctz_; + difference_type __dn = std::min(static_cast(__clz_f), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __storage_type __b = *__first.__seg_ & __m; + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r); + __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); + *__result.__seg_ &= ~__m; + if (__result.__ctz_ > __first.__ctz_) + *__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_); + else + *__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_); + __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); + __dn -= __ddn; + if (__dn > 0) { + __m = ~__storage_type(0) >> (__bits_per_word - __dn); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b >> (__first.__ctz_ + __ddn); + __result.__ctz_ = static_cast(__dn); + } + ++__first.__seg_; + // __first.__ctz_ = 0; + } + // __first.__ctz_ == 0; + // do middle words + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __m = ~__storage_type(0) << __result.__ctz_; + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) { + __storage_type __b = *__first.__seg_; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b << __result.__ctz_; + ++__result.__seg_; + *__result.__seg_ &= __m; + *__result.__seg_ |= __b >> __clz_r; + } + // do last word + if (__n > 0) { + __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b = *__first.__seg_ & __m; + __storage_type __dn = std::min(__n, static_cast(__clz_r)); + __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b << __result.__ctz_; + __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); + __n -= __dn; + if (__n > 0) { + __m = ~__storage_type(0) >> (__bits_per_word - __n); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b >> __dn; + __result.__ctz_ = static_cast(__n); + } + } + } + return __result; +} + struct __copy_impl { template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> @@ -95,6 +217,16 @@ struct __copy_impl { } } + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> > + operator()(__bit_iterator<_Cp, _IsConst> __first, + __bit_iterator<_Cp, _IsConst> __last, + __bit_iterator<_Cp, false> __result) { + if (__first.__ctz_ == __result.__ctz_) + return std::make_pair(__last, std::__copy_aligned(__first, __last, __result)); + return std::make_pair(__last, std::__copy_unaligned(__first, __last, __result)); + } + // At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer. template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*> @@ -110,7 +242,7 @@ __copy(_InIter __first, _Sent __last, _OutIter __result) { } template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator copy(_InputIterator __first, _InputIterator __last, _OutputIterator __result) { return std::__copy(__first, __last, __result).second; } diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference index 67abb023122ed..15e76ac8daf8e 100644 --- a/libcxx/include/__bit_reference +++ b/libcxx/include/__bit_reference @@ -10,6 +10,7 @@ #ifndef _LIBCPP___BIT_REFERENCE #define _LIBCPP___BIT_REFERENCE +#include <__algorithm/copy.h> #include <__algorithm/copy_n.h> #include <__algorithm/min.h> #include <__bit/countr.h> @@ -24,6 +25,7 @@ #include <__type_traits/conditional.h> #include <__type_traits/is_constant_evaluated.h> #include <__type_traits/void_t.h> +#include <__utility/pair.h> #include <__utility/swap.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -183,130 +185,6 @@ private: __mask_(__m) {} }; -// copy - -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_aligned( - __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - using _In = __bit_iterator<_Cp, _IsConst>; - using difference_type = typename _In::difference_type; - using __storage_type = typename _In::__storage_type; - - const int __bits_per_word = _In::__bits_per_word; - difference_type __n = __last - __first; - if (__n > 0) { - // do first word - if (__first.__ctz_ != 0) { - unsigned __clz = __bits_per_word - __first.__ctz_; - difference_type __dn = std::min(static_cast(__clz), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); - __storage_type __b = *__first.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b; - __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); - ++__first.__seg_; - // __first.__ctz_ = 0; - } - // __first.__ctz_ == 0; - // do middle words - __storage_type __nw = __n / __bits_per_word; - std::copy_n(std::__to_address(__first.__seg_), __nw, std::__to_address(__result.__seg_)); - __n -= __nw * __bits_per_word; - __result.__seg_ += __nw; - // do last word - if (__n > 0) { - __first.__seg_ += __nw; - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = *__first.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b; - __result.__ctz_ = static_cast(__n); - } - } - return __result; -} - -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_unaligned( - __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - using _In = __bit_iterator<_Cp, _IsConst>; - using difference_type = typename _In::difference_type; - using __storage_type = typename _In::__storage_type; - - const int __bits_per_word = _In::__bits_per_word; - difference_type __n = __last - __first; - if (__n > 0) { - // do first word - if (__first.__ctz_ != 0) { - unsigned __clz_f = __bits_per_word - __first.__ctz_; - difference_type __dn = std::min(static_cast(__clz_f), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __storage_type __b = *__first.__seg_ & __m; - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r); - __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); - *__result.__seg_ &= ~__m; - if (__result.__ctz_ > __first.__ctz_) - *__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_); - else - *__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_); - __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); - __dn -= __ddn; - if (__dn > 0) { - __m = ~__storage_type(0) >> (__bits_per_word - __dn); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b >> (__first.__ctz_ + __ddn); - __result.__ctz_ = static_cast(__dn); - } - ++__first.__seg_; - // __first.__ctz_ = 0; - } - // __first.__ctz_ == 0; - // do middle words - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __m = ~__storage_type(0) << __result.__ctz_; - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) { - __storage_type __b = *__first.__seg_; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b << __result.__ctz_; - ++__result.__seg_; - *__result.__seg_ &= __m; - *__result.__seg_ |= __b >> __clz_r; - } - // do last word - if (__n > 0) { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = *__first.__seg_ & __m; - __storage_type __dn = std::min(__n, static_cast(__clz_r)); - __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b << __result.__ctz_; - __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); - __n -= __dn; - if (__n > 0) { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b >> __dn; - __result.__ctz_ = static_cast(__n); - } - } - } - return __result; -} - -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> -copy(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - if (__first.__ctz_ == __result.__ctz_) - return std::__copy_aligned(__first, __last, __result); - return std::__copy_unaligned(__first, __last, __result); -} - // copy_backward template @@ -989,8 +867,9 @@ private: _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_unaligned( __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); template - _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> - copy(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); + _LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Dp, _IC>, __bit_iterator<_Dp, false> > + __copy_impl::operator()( + __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); template _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_backward_aligned( __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); diff --git a/libcxx/include/bitset b/libcxx/include/bitset index 10576eb80bf2e..a20842985b3d5 100644 --- a/libcxx/include/bitset +++ b/libcxx/include/bitset @@ -129,6 +129,8 @@ template struct hash>; #if __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) # include <__cxx03/bitset> #else +# include <__algorithm/copy.h> +# include <__algorithm/copy_backward.h> # include <__algorithm/count.h> # include <__algorithm/fill.h> # include <__algorithm/fill_n.h> diff --git a/libcxx/test/benchmarks/algorithms/copy.bench.cpp b/libcxx/test/benchmarks/algorithms/copy.bench.cpp new file mode 100644 index 0000000000000..c09c243161763 --- /dev/null +++ b/libcxx/test/benchmarks/algorithms/copy.bench.cpp @@ -0,0 +1,89 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +#include +#include +#include + +static void bm_ranges_copy(benchmark::State& state, bool aligned) { + auto n = state.range(); + std::vector in(n, true); + std::vector out(aligned ? n : n + 8); + benchmark::DoNotOptimize(&in); + auto dst = aligned ? out.begin() : out.begin() + 4; + for (auto _ : state) { + benchmark::DoNotOptimize(std::ranges::copy(in, dst)); + benchmark::DoNotOptimize(&out); + } +} + +static void bm_ranges_copy_n(benchmark::State& state, bool aligned) { + auto n = state.range(); + std::vector in(n, true); + std::vector out(aligned ? n : n + 8); + benchmark::DoNotOptimize(&in); + auto src = in.begin(); + auto dst = aligned ? out.begin() : out.begin() + 4; + for (auto _ : state) { + benchmark::DoNotOptimize(std::ranges::copy_n(src, n, dst)); + benchmark::DoNotOptimize(&out); + } +} + +static void bm_copy(benchmark::State& state, bool aligned) { + auto n = state.range(); + std::vector in(n, true); + std::vector out(aligned ? n : n + 8); + benchmark::DoNotOptimize(&in); + auto beg = in.begin(); + auto end = in.end(); + auto dst = aligned ? out.begin() : out.begin() + 4; + for (auto _ : state) { + benchmark::DoNotOptimize(std::copy(beg, end, dst)); + benchmark::DoNotOptimize(&out); + } +} + +static void bm_copy_n(benchmark::State& state, bool aligned) { + auto n = state.range(); + std::vector in(n, true); + std::vector out(aligned ? n : n + 8); + benchmark::DoNotOptimize(&in); + auto src = in.begin(); + auto dst = aligned ? out.begin() : out.begin() + 4; + for (auto _ : state) { + benchmark::DoNotOptimize(std::copy_n(src, n, dst)); + benchmark::DoNotOptimize(&out); + } +} + +static void bm_ranges_copy_aligned(benchmark::State& state) { bm_ranges_copy(state, true); } +static void bm_ranges_copy_unaligned(benchmark::State& state) { bm_ranges_copy(state, false); } +static void bm_ranges_copy_n_aligned(benchmark::State& state) { bm_ranges_copy_n(state, true); } +static void bm_ranges_copy_n_unaligned(benchmark::State& state) { bm_ranges_copy_n(state, false); } + +static void bm_copy_aligned(benchmark::State& state) { bm_copy(state, true); } +static void bm_copy_unaligned(benchmark::State& state) { bm_copy(state, false); } +static void bm_copy_n_aligned(benchmark::State& state) { bm_copy_n(state, true); } +static void bm_copy_n_unaligned(benchmark::State& state) { bm_copy_n(state, false); } + +// Test std::ranges::copy for vector::iterator +BENCHMARK(bm_ranges_copy_aligned)->Range(8, 1 << 16)->DenseRange(102400, 204800, 4096); +BENCHMARK(bm_ranges_copy_n_aligned)->Range(8, 1 << 20); +BENCHMARK(bm_ranges_copy_unaligned)->Range(8, 1 << 20); +BENCHMARK(bm_ranges_copy_n_unaligned)->Range(8, 1 << 20); + +// Test std::copy for vector::iterator +BENCHMARK(bm_copy_aligned)->Range(8, 1 << 20); +BENCHMARK(bm_copy_n_aligned)->Range(8, 1 << 20); +BENCHMARK(bm_copy_unaligned)->Range(8, 1 << 20); +BENCHMARK(bm_copy_n_unaligned)->Range(8, 1 << 20); + +BENCHMARK_MAIN(); diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy.pass.cpp index b5f0a32b986a0..f3f02d76df635 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy.pass.cpp @@ -14,6 +14,7 @@ #include #include +#include #include "test_macros.h" #include "test_iterators.h" @@ -59,6 +60,28 @@ struct TestInIters { } }; +template +struct TestBitIter { + std::vector in; + TEST_CONSTEXPR_CXX20 TestBitIter() : in(N, false) { + for (std::size_t i = 0; i < N; i += 2) + in[i] = true; + } + TEST_CONSTEXPR_CXX20 void operator()() { + { // Test copy with aligned bytes + std::vector out(N); + std::copy(in.begin(), in.end(), out.begin()); + assert(in == out); + } + { // Test copy with unaligned bytes + std::vector out(N + 8); + std::copy(in.begin(), in.end(), out.begin() + 4); + for (std::size_t i = 0; i < N; ++i) + assert(out[i + 4] == in[i]); + } + } +}; + TEST_CONSTEXPR_CXX20 bool test() { types::for_each(types::cpp17_input_iterator_list(), TestInIters()); @@ -78,13 +101,21 @@ TEST_CONSTEXPR_CXX20 bool test() { assert(std::equal(a, a + 10, expected)); } + { // Test vector::iterator optimization + TestBitIter<8>()(); + TestBitIter<16>()(); + TestBitIter<32>()(); + TestBitIter<64>()(); + TestBitIter<1024>()(); + } + return true; } int main(int, char**) { test(); -#if TEST_STD_VER > 17 +#if TEST_STD_VER >= 20 static_assert(test()); #endif diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_n.pass.cpp index b0acc1060101c..bd9799e5e769c 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_n.pass.cpp @@ -14,6 +14,7 @@ #include #include +#include #include "test_macros.h" #include "test_iterators.h" @@ -37,20 +38,18 @@ class Derived : public PaddedBase { }; template -TEST_CONSTEXPR_CXX20 void -test_copy_n() -{ +TEST_CONSTEXPR_CXX20 void test_copy_n() { { const unsigned N = 1000; - int ia[N] = {}; + int ia[N] = {}; for (unsigned i = 0; i < N; ++i) - ia[i] = i; + ia[i] = i; int ib[N] = {0}; - OutIter r = std::copy_n(InIter(ia), UDI(N/2), OutIter(ib)); - assert(base(r) == ib+N/2); - for (unsigned i = 0; i < N/2; ++i) - assert(ia[i] == ib[i]); + OutIter r = std::copy_n(InIter(ia), UDI(N / 2), OutIter(ib)); + assert(base(r) == ib + N / 2); + for (unsigned i = 0; i < N / 2; ++i) + assert(ia[i] == ib[i]); } { // Make sure that padding bits aren't copied @@ -70,53 +69,80 @@ test_copy_n() } } -TEST_CONSTEXPR_CXX20 bool -test() -{ - test_copy_n, cpp17_output_iterator >(); - test_copy_n, cpp17_input_iterator >(); - test_copy_n, forward_iterator >(); - test_copy_n, bidirectional_iterator >(); - test_copy_n, random_access_iterator >(); - test_copy_n, int*>(); - - test_copy_n, cpp17_output_iterator >(); - test_copy_n, cpp17_input_iterator >(); - test_copy_n, forward_iterator >(); - test_copy_n, bidirectional_iterator >(); - test_copy_n, random_access_iterator >(); - test_copy_n, int*>(); - - test_copy_n, cpp17_output_iterator >(); - test_copy_n, cpp17_input_iterator >(); - test_copy_n, forward_iterator >(); - test_copy_n, bidirectional_iterator >(); - test_copy_n, random_access_iterator >(); - test_copy_n, int*>(); - - test_copy_n, cpp17_output_iterator >(); - test_copy_n, cpp17_input_iterator >(); - test_copy_n, forward_iterator >(); - test_copy_n, bidirectional_iterator >(); - test_copy_n, random_access_iterator >(); - test_copy_n, int*>(); - - test_copy_n >(); - test_copy_n >(); - test_copy_n >(); - test_copy_n >(); - test_copy_n >(); - test_copy_n(); +template +struct TestBitIter { + std::vector in; + TEST_CONSTEXPR_CXX20 TestBitIter() : in(N, false) { + for (std::size_t i = 0; i < N; i += 2) + in[i] = true; + } + TEST_CONSTEXPR_CXX20 void operator()() { + { // Test copy with aligned bytes + std::vector out(N); + std::copy_n(in.begin(), N, out.begin()); + assert(in == out); + } + { // Test copy with unaligned bytes + std::vector out(N + 8); + std::copy_n(in.begin(), N, out.begin() + 4); + for (std::size_t i = 0; i < N; ++i) + assert(out[i + 4] == in[i]); + } + } +}; + +TEST_CONSTEXPR_CXX20 bool test() { + test_copy_n, cpp17_output_iterator >(); + test_copy_n, cpp17_input_iterator >(); + test_copy_n, forward_iterator >(); + test_copy_n, bidirectional_iterator >(); + test_copy_n, random_access_iterator >(); + test_copy_n, int*>(); + + test_copy_n, cpp17_output_iterator >(); + test_copy_n, cpp17_input_iterator >(); + test_copy_n, forward_iterator >(); + test_copy_n, bidirectional_iterator >(); + test_copy_n, random_access_iterator >(); + test_copy_n, int*>(); + + test_copy_n, cpp17_output_iterator >(); + test_copy_n, cpp17_input_iterator >(); + test_copy_n, forward_iterator >(); + test_copy_n, bidirectional_iterator >(); + test_copy_n, random_access_iterator >(); + test_copy_n, int*>(); + + test_copy_n, cpp17_output_iterator >(); + test_copy_n, cpp17_input_iterator >(); + test_copy_n, forward_iterator >(); + test_copy_n, bidirectional_iterator >(); + test_copy_n, random_access_iterator >(); + test_copy_n, int*>(); + + test_copy_n >(); + test_copy_n >(); + test_copy_n >(); + test_copy_n >(); + test_copy_n >(); + test_copy_n(); + + { // Test vector::iterator optimization + TestBitIter<8>()(); + TestBitIter<16>()(); + TestBitIter<32>()(); + TestBitIter<64>()(); + TestBitIter<1024>()(); + } return true; } -int main(int, char**) -{ - test(); +int main(int, char**) { + test(); #if TEST_STD_VER > 17 - static_assert(test()); + static_assert(test()); #endif return 0; diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy.pass.cpp index 2507e594fe944..4924fa3f39bc1 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy.pass.cpp @@ -26,6 +26,7 @@ #include "almost_satisfies_types.h" #include "test_iterators.h" +#include "test_macros.h" #include "type_algorithms.h" template > @@ -99,6 +100,30 @@ constexpr void test_iterators() { } // clang-format on +#if TEST_STD_VER >= 23 +template +struct TestBitIter { + std::vector in; + TEST_CONSTEXPR_CXX20 TestBitIter() : in(N, false) { + for (std::size_t i = 0; i < N; i += 2) + in[i] = true; + } + TEST_CONSTEXPR_CXX20 void operator()() { + { // Test copy with aligned bytes + std::vector out(N); + std::ranges::copy(in, out.begin()); + assert(in == out); + } + { // Test copy with unaligned bytes + std::vector out(N + 8); + std::ranges::copy(in, out.begin() + 4); + for (std::size_t i = 0; i < N; ++i) + assert(out[i + 4] == in[i]); + } + } +}; +#endif + constexpr bool test() { types::for_each(types::forward_iterator_list{}, []() { test_iterators, Out, sentinel_wrapper>>(); @@ -204,6 +229,16 @@ constexpr bool test() { } } +#if TEST_STD_VER >= 23 + { // Test vector::iterator optimization + TestBitIter<8>()(); + TestBitIter<16>()(); + TestBitIter<32>()(); + TestBitIter<64>()(); + TestBitIter<1024>()(); + } +#endif + return true; } diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp index d2a2b7c488830..36cd575cb9614 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp @@ -19,8 +19,10 @@ #include #include #include +#include #include "almost_satisfies_types.h" +#include "test_macros.h" #include "test_iterators.h" template @@ -41,10 +43,10 @@ static_assert(std::is_same_v, std::ranges::i template constexpr void test_iterators() { { // simple test - std::array in {1, 2, 3, 4}; + std::array in{1, 2, 3, 4}; std::array out; std::same_as> auto ret = - std::ranges::copy_n(In(in.data()), in.size(), Out(out.data())); + std::ranges::copy_n(In(in.data()), in.size(), Out(out.data())); assert(in == out); assert(base(ret.in) == in.data() + in.size()); assert(base(ret.out) == out.data() + out.size()); @@ -70,13 +72,39 @@ constexpr void test_in_iterators() { template constexpr void test_proxy_in_iterators() { - test_iterators>, Out, sentinel_wrapper>>>(); + test_iterators>, + Out, + sentinel_wrapper>>>(); test_iterators>, Out>(); test_iterators>, Out>(); test_iterators>, Out>(); test_iterators>, Out>(); } +#if TEST_STD_VER >= 23 +template +struct TestBitIter { + std::vector in; + TEST_CONSTEXPR_CXX20 TestBitIter() : in(N, false) { + for (std::size_t i = 0; i < N; i += 2) + in[i] = true; + } + TEST_CONSTEXPR_CXX20 void operator()() { + { // Test copy with aligned bytes + std::vector out(N); + std::ranges::copy_n(in.begin(), N, out.begin()); + assert(in == out); + } + { // Test copy with unaligned bytes + std::vector out(N + 8); + std::ranges::copy_n(in.begin(), N, out.begin() + 4); + for (std::size_t i = 0; i < N; ++i) + assert(out[i + 4] == in[i]); + } + } +}; +#endif + constexpr bool test() { test_in_iterators>(); test_in_iterators>(); @@ -92,8 +120,8 @@ constexpr bool test() { { // check that every element is copied exactly once struct CopyOnce { - bool copied = false; - constexpr CopyOnce() = default; + bool copied = false; + constexpr CopyOnce() = default; constexpr CopyOnce(const CopyOnce& other) = delete; constexpr CopyOnce& operator=(const CopyOnce& other) { assert(!other.copied); @@ -101,14 +129,24 @@ constexpr bool test() { return *this; } }; - std::array in {}; - std::array out {}; + std::array in{}; + std::array out{}; auto ret = std::ranges::copy_n(in.begin(), in.size(), out.begin()); assert(ret.in == in.end()); assert(ret.out == out.end()); assert(std::all_of(out.begin(), out.end(), [](const auto& e) { return e.copied; })); } +#if TEST_STD_VER >= 23 + { // Test vector::iterator optimization + TestBitIter<8>()(); + TestBitIter<16>()(); + TestBitIter<32>()(); + TestBitIter<64>()(); + TestBitIter<1024>()(); + } +#endif + return true; } From 7d3334b8b6b6954d73ea6e37470cd3efada23ae2 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Mon, 20 Jan 2025 20:35:43 -0500 Subject: [PATCH 2/4] Apply ldionne's suggestions --- libcxx/include/__algorithm/copy.h | 2 +- libcxx/include/__bit_reference | 2 +- libcxx/include/bitset | 1 - .../test/benchmarks/algorithms/copy.bench.cpp | 40 ++++++++-------- .../alg.copy/copy.pass.cpp | 46 +++++++++---------- .../alg.copy/copy_n.pass.cpp | 46 +++++++++---------- .../alg.copy/ranges.copy.pass.cpp | 46 +++++++++---------- .../alg.copy/ranges.copy_n.pass.cpp | 44 +++++++++--------- 8 files changed, 109 insertions(+), 118 deletions(-) diff --git a/libcxx/include/__algorithm/copy.h b/libcxx/include/__algorithm/copy.h index e2e46674d798a..7454c874a4d93 100644 --- a/libcxx/include/__algorithm/copy.h +++ b/libcxx/include/__algorithm/copy.h @@ -221,7 +221,7 @@ struct __copy_impl { _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> > operator()(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, - __bit_iterator<_Cp, false> __result) { + __bit_iterator<_Cp, false> __result) const { if (__first.__ctz_ == __result.__ctz_) return std::make_pair(__last, std::__copy_aligned(__first, __last, __result)); return std::make_pair(__last, std::__copy_unaligned(__first, __last, __result)); diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference index 15e76ac8daf8e..46d2b2f7ed948 100644 --- a/libcxx/include/__bit_reference +++ b/libcxx/include/__bit_reference @@ -869,7 +869,7 @@ private: template _LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Dp, _IC>, __bit_iterator<_Dp, false> > __copy_impl::operator()( - __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); + __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result) const; template _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_backward_aligned( __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); diff --git a/libcxx/include/bitset b/libcxx/include/bitset index a20842985b3d5..2914dee3d5292 100644 --- a/libcxx/include/bitset +++ b/libcxx/include/bitset @@ -130,7 +130,6 @@ template struct hash>; # include <__cxx03/bitset> #else # include <__algorithm/copy.h> -# include <__algorithm/copy_backward.h> # include <__algorithm/count.h> # include <__algorithm/fill.h> # include <__algorithm/fill_n.h> diff --git a/libcxx/test/benchmarks/algorithms/copy.bench.cpp b/libcxx/test/benchmarks/algorithms/copy.bench.cpp index c09c243161763..b6f0f15eb7703 100644 --- a/libcxx/test/benchmarks/algorithms/copy.bench.cpp +++ b/libcxx/test/benchmarks/algorithms/copy.bench.cpp @@ -12,7 +12,7 @@ #include #include -static void bm_ranges_copy(benchmark::State& state, bool aligned) { +static void bm_ranges_copy_vb(benchmark::State& state, bool aligned) { auto n = state.range(); std::vector in(n, true); std::vector out(aligned ? n : n + 8); @@ -24,7 +24,7 @@ static void bm_ranges_copy(benchmark::State& state, bool aligned) { } } -static void bm_ranges_copy_n(benchmark::State& state, bool aligned) { +static void bm_ranges_copy_n_vb(benchmark::State& state, bool aligned) { auto n = state.range(); std::vector in(n, true); std::vector out(aligned ? n : n + 8); @@ -37,7 +37,7 @@ static void bm_ranges_copy_n(benchmark::State& state, bool aligned) { } } -static void bm_copy(benchmark::State& state, bool aligned) { +static void bm_copy_vb(benchmark::State& state, bool aligned) { auto n = state.range(); std::vector in(n, true); std::vector out(aligned ? n : n + 8); @@ -51,7 +51,7 @@ static void bm_copy(benchmark::State& state, bool aligned) { } } -static void bm_copy_n(benchmark::State& state, bool aligned) { +static void bm_copy_n_vb(benchmark::State& state, bool aligned) { auto n = state.range(); std::vector in(n, true); std::vector out(aligned ? n : n + 8); @@ -64,26 +64,26 @@ static void bm_copy_n(benchmark::State& state, bool aligned) { } } -static void bm_ranges_copy_aligned(benchmark::State& state) { bm_ranges_copy(state, true); } -static void bm_ranges_copy_unaligned(benchmark::State& state) { bm_ranges_copy(state, false); } -static void bm_ranges_copy_n_aligned(benchmark::State& state) { bm_ranges_copy_n(state, true); } -static void bm_ranges_copy_n_unaligned(benchmark::State& state) { bm_ranges_copy_n(state, false); } +static void bm_ranges_copy_vb_aligned(benchmark::State& state) { bm_ranges_copy_vb(state, true); } +static void bm_ranges_copy_vb_unaligned(benchmark::State& state) { bm_ranges_copy_vb(state, false); } +static void bm_ranges_copy_n_vb_aligned(benchmark::State& state) { bm_ranges_copy_n_vb(state, true); } +static void bm_ranges_copy_n_vb_unaligned(benchmark::State& state) { bm_ranges_copy_n_vb(state, false); } -static void bm_copy_aligned(benchmark::State& state) { bm_copy(state, true); } -static void bm_copy_unaligned(benchmark::State& state) { bm_copy(state, false); } -static void bm_copy_n_aligned(benchmark::State& state) { bm_copy_n(state, true); } -static void bm_copy_n_unaligned(benchmark::State& state) { bm_copy_n(state, false); } +static void bm_copy_vb_aligned(benchmark::State& state) { bm_copy_vb(state, true); } +static void bm_copy_vb_unaligned(benchmark::State& state) { bm_copy_vb(state, false); } +static void bm_copy_n_vb_aligned(benchmark::State& state) { bm_copy_n_vb(state, true); } +static void bm_copy_n_vb_unaligned(benchmark::State& state) { bm_copy_n_vb(state, false); } // Test std::ranges::copy for vector::iterator -BENCHMARK(bm_ranges_copy_aligned)->Range(8, 1 << 16)->DenseRange(102400, 204800, 4096); -BENCHMARK(bm_ranges_copy_n_aligned)->Range(8, 1 << 20); -BENCHMARK(bm_ranges_copy_unaligned)->Range(8, 1 << 20); -BENCHMARK(bm_ranges_copy_n_unaligned)->Range(8, 1 << 20); +BENCHMARK(bm_ranges_copy_vb_aligned)->Range(8, 1 << 16)->DenseRange(102400, 204800, 4096); +BENCHMARK(bm_ranges_copy_n_vb_aligned)->Range(8, 1 << 20); +BENCHMARK(bm_ranges_copy_vb_unaligned)->Range(8, 1 << 20); +BENCHMARK(bm_ranges_copy_n_vb_unaligned)->Range(8, 1 << 20); // Test std::copy for vector::iterator -BENCHMARK(bm_copy_aligned)->Range(8, 1 << 20); -BENCHMARK(bm_copy_n_aligned)->Range(8, 1 << 20); -BENCHMARK(bm_copy_unaligned)->Range(8, 1 << 20); -BENCHMARK(bm_copy_n_unaligned)->Range(8, 1 << 20); +BENCHMARK(bm_copy_vb_aligned)->Range(8, 1 << 20); +BENCHMARK(bm_copy_n_vb_aligned)->Range(8, 1 << 20); +BENCHMARK(bm_copy_vb_unaligned)->Range(8, 1 << 20); +BENCHMARK(bm_copy_n_vb_unaligned)->Range(8, 1 << 20); BENCHMARK_MAIN(); diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy.pass.cpp index f3f02d76df635..1149f4a0134e1 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy.pass.cpp @@ -60,27 +60,25 @@ struct TestInIters { } }; -template -struct TestBitIter { - std::vector in; - TEST_CONSTEXPR_CXX20 TestBitIter() : in(N, false) { - for (std::size_t i = 0; i < N; i += 2) - in[i] = true; +TEST_CONSTEXPR_CXX20 bool test_vector_bool(std::size_t N) { + std::vector in(N, false); + for (std::size_t i = 0; i < N; i += 2) + in[i] = true; + + { // Test copy with aligned bytes + std::vector out(N); + std::copy(in.begin(), in.end(), out.begin()); + assert(in == out); } - TEST_CONSTEXPR_CXX20 void operator()() { - { // Test copy with aligned bytes - std::vector out(N); - std::copy(in.begin(), in.end(), out.begin()); - assert(in == out); - } - { // Test copy with unaligned bytes - std::vector out(N + 8); - std::copy(in.begin(), in.end(), out.begin() + 4); - for (std::size_t i = 0; i < N; ++i) - assert(out[i + 4] == in[i]); - } + { // Test copy with unaligned bytes + std::vector out(N + 8); + std::copy(in.begin(), in.end(), out.begin() + 4); + for (std::size_t i = 0; i < N; ++i) + assert(out[i + 4] == in[i]); } -}; + + return true; +} TEST_CONSTEXPR_CXX20 bool test() { types::for_each(types::cpp17_input_iterator_list(), TestInIters()); @@ -102,11 +100,11 @@ TEST_CONSTEXPR_CXX20 bool test() { } { // Test vector::iterator optimization - TestBitIter<8>()(); - TestBitIter<16>()(); - TestBitIter<32>()(); - TestBitIter<64>()(); - TestBitIter<1024>()(); + assert(test_vector_bool(8)); + assert(test_vector_bool(16)); + assert(test_vector_bool(32)); + assert(test_vector_bool(64)); + assert(test_vector_bool(256)); } return true; diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_n.pass.cpp index bd9799e5e769c..496e7bab66d6b 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_n.pass.cpp @@ -69,27 +69,25 @@ TEST_CONSTEXPR_CXX20 void test_copy_n() { } } -template -struct TestBitIter { - std::vector in; - TEST_CONSTEXPR_CXX20 TestBitIter() : in(N, false) { - for (std::size_t i = 0; i < N; i += 2) - in[i] = true; +TEST_CONSTEXPR_CXX20 bool test_vector_bool(std::size_t N) { + std::vector in(N, false); + for (std::size_t i = 0; i < N; i += 2) + in[i] = true; + + { // Test copy with aligned bytes + std::vector out(N); + std::copy_n(in.begin(), N, out.begin()); + assert(in == out); } - TEST_CONSTEXPR_CXX20 void operator()() { - { // Test copy with aligned bytes - std::vector out(N); - std::copy_n(in.begin(), N, out.begin()); - assert(in == out); - } - { // Test copy with unaligned bytes - std::vector out(N + 8); - std::copy_n(in.begin(), N, out.begin() + 4); - for (std::size_t i = 0; i < N; ++i) - assert(out[i + 4] == in[i]); - } + { // Test copy with unaligned bytes + std::vector out(N + 8); + std::copy_n(in.begin(), N, out.begin() + 4); + for (std::size_t i = 0; i < N; ++i) + assert(out[i + 4] == in[i]); } -}; + + return true; +} TEST_CONSTEXPR_CXX20 bool test() { test_copy_n, cpp17_output_iterator >(); @@ -128,11 +126,11 @@ TEST_CONSTEXPR_CXX20 bool test() { test_copy_n(); { // Test vector::iterator optimization - TestBitIter<8>()(); - TestBitIter<16>()(); - TestBitIter<32>()(); - TestBitIter<64>()(); - TestBitIter<1024>()(); + assert(test_vector_bool(8)); + assert(test_vector_bool(16)); + assert(test_vector_bool(32)); + assert(test_vector_bool(64)); + assert(test_vector_bool(256)); } return true; diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy.pass.cpp index 4924fa3f39bc1..5ec18398afaf8 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy.pass.cpp @@ -101,27 +101,25 @@ constexpr void test_iterators() { // clang-format on #if TEST_STD_VER >= 23 -template -struct TestBitIter { - std::vector in; - TEST_CONSTEXPR_CXX20 TestBitIter() : in(N, false) { - for (std::size_t i = 0; i < N; i += 2) - in[i] = true; +constexpr bool test_vector_bool(std::size_t N) { + std::vector in(N, false); + for (std::size_t i = 0; i < N; i += 2) + in[i] = true; + + { // Test copy with aligned bytes + std::vector out(N); + std::ranges::copy(in, out.begin()); + assert(in == out); } - TEST_CONSTEXPR_CXX20 void operator()() { - { // Test copy with aligned bytes - std::vector out(N); - std::ranges::copy(in, out.begin()); - assert(in == out); - } - { // Test copy with unaligned bytes - std::vector out(N + 8); - std::ranges::copy(in, out.begin() + 4); - for (std::size_t i = 0; i < N; ++i) - assert(out[i + 4] == in[i]); - } + { // Test copy with unaligned bytes + std::vector out(N + 8); + std::ranges::copy(in, out.begin() + 4); + for (std::size_t i = 0; i < N; ++i) + assert(out[i + 4] == in[i]); } -}; + + return true; +} #endif constexpr bool test() { @@ -231,11 +229,11 @@ constexpr bool test() { #if TEST_STD_VER >= 23 { // Test vector::iterator optimization - TestBitIter<8>()(); - TestBitIter<16>()(); - TestBitIter<32>()(); - TestBitIter<64>()(); - TestBitIter<1024>()(); + assert(test_vector_bool(8)); + assert(test_vector_bool(16)); + assert(test_vector_bool(32)); + assert(test_vector_bool(64)); + assert(test_vector_bool(256)); } #endif diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp index 36cd575cb9614..7fce5b694f415 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp @@ -82,26 +82,24 @@ constexpr void test_proxy_in_iterators() { } #if TEST_STD_VER >= 23 -template -struct TestBitIter { - std::vector in; - TEST_CONSTEXPR_CXX20 TestBitIter() : in(N, false) { - for (std::size_t i = 0; i < N; i += 2) - in[i] = true; +constexpr bool test_vector_bool(std::size_t N) { + std::vector in(N, false); + for (std::size_t i = 0; i < N; i += 2) + in[i] = true; + + { // Test copy with aligned bytes + std::vector out(N); + std::ranges::copy_n(in.begin(), N, out.begin()); + assert(in == out); } - TEST_CONSTEXPR_CXX20 void operator()() { - { // Test copy with aligned bytes - std::vector out(N); - std::ranges::copy_n(in.begin(), N, out.begin()); - assert(in == out); - } - { // Test copy with unaligned bytes - std::vector out(N + 8); - std::ranges::copy_n(in.begin(), N, out.begin() + 4); - for (std::size_t i = 0; i < N; ++i) - assert(out[i + 4] == in[i]); - } + { // Test copy with unaligned bytes + std::vector out(N + 8); + std::ranges::copy_n(in.begin(), N, out.begin() + 4); + for (std::size_t i = 0; i < N; ++i) + assert(out[i + 4] == in[i]); } + + return true; }; #endif @@ -139,11 +137,11 @@ constexpr bool test() { #if TEST_STD_VER >= 23 { // Test vector::iterator optimization - TestBitIter<8>()(); - TestBitIter<16>()(); - TestBitIter<32>()(); - TestBitIter<64>()(); - TestBitIter<1024>()(); + assert(test_vector_bool(8)); + assert(test_vector_bool(16)); + assert(test_vector_bool(32)); + assert(test_vector_bool(64)); + assert(test_vector_bool(256)); } #endif From 25bb4363cf81a0d95cc9b14d509b20b4a9351156 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Wed, 29 Jan 2025 22:41:20 -0500 Subject: [PATCH 3/4] Add test cases for odd-sized vector --- .../alg.modifying.operations/alg.copy/copy.pass.cpp | 4 +++- .../alg.modifying.operations/alg.copy/copy_n.pass.cpp | 4 +++- .../alg.modifying.operations/alg.copy/ranges.copy.pass.cpp | 4 +++- .../alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy.pass.cpp index 1149f4a0134e1..1ca397c92a334 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy.pass.cpp @@ -101,9 +101,11 @@ TEST_CONSTEXPR_CXX20 bool test() { { // Test vector::iterator optimization assert(test_vector_bool(8)); - assert(test_vector_bool(16)); + assert(test_vector_bool(19)); assert(test_vector_bool(32)); + assert(test_vector_bool(49)); assert(test_vector_bool(64)); + assert(test_vector_bool(199)); assert(test_vector_bool(256)); } diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_n.pass.cpp index 496e7bab66d6b..889e71f4eceb9 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy_n.pass.cpp @@ -127,9 +127,11 @@ TEST_CONSTEXPR_CXX20 bool test() { { // Test vector::iterator optimization assert(test_vector_bool(8)); - assert(test_vector_bool(16)); + assert(test_vector_bool(19)); assert(test_vector_bool(32)); + assert(test_vector_bool(49)); assert(test_vector_bool(64)); + assert(test_vector_bool(199)); assert(test_vector_bool(256)); } diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy.pass.cpp index 5ec18398afaf8..68356c80ba7f6 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy.pass.cpp @@ -230,9 +230,11 @@ constexpr bool test() { #if TEST_STD_VER >= 23 { // Test vector::iterator optimization assert(test_vector_bool(8)); - assert(test_vector_bool(16)); + assert(test_vector_bool(19)); assert(test_vector_bool(32)); + assert(test_vector_bool(49)); assert(test_vector_bool(64)); + assert(test_vector_bool(199)); assert(test_vector_bool(256)); } #endif diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp index 7fce5b694f415..c7031f63a02f6 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp @@ -138,9 +138,11 @@ constexpr bool test() { #if TEST_STD_VER >= 23 { // Test vector::iterator optimization assert(test_vector_bool(8)); - assert(test_vector_bool(16)); + assert(test_vector_bool(19)); assert(test_vector_bool(32)); + assert(test_vector_bool(49)); assert(test_vector_bool(64)); + assert(test_vector_bool(199)); assert(test_vector_bool(256)); } #endif From 7cc72afc64fc73f04b11c6db1929476824512787 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Wed, 29 Jan 2025 22:47:00 -0500 Subject: [PATCH 4/4] Update relase note in 21.rst --- libcxx/docs/ReleaseNotes/21.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst index e746244b653d8..8a400065376f4 100644 --- a/libcxx/docs/ReleaseNotes/21.rst +++ b/libcxx/docs/ReleaseNotes/21.rst @@ -44,7 +44,8 @@ Implemented Papers Improvements and New Features ----------------------------- -- TODO +- The ``std::ranges::copy`` and ``std::ranges::copy_n`` algorithms have been optimized for ``std::vector::iterator``\s, + resulting in a performance improvement of up to 2000x. Deprecations and Removals