diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt index 2101f9c71788c..89082aa63c97d 100644 --- a/libcxx/benchmarks/CMakeLists.txt +++ b/libcxx/benchmarks/CMakeLists.txt @@ -197,6 +197,7 @@ set(BENCHMARK_TESTS algorithms/ranges_sort.bench.cpp algorithms/ranges_sort_heap.bench.cpp algorithms/ranges_stable_sort.bench.cpp + algorithms/set_intersection.bench.cpp algorithms/sort.bench.cpp algorithms/sort_heap.bench.cpp algorithms/stable_sort.bench.cpp diff --git a/libcxx/benchmarks/algorithms/set_intersection.bench.cpp b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp new file mode 100644 index 0000000000000..b3fb15fc77b31 --- /dev/null +++ b/libcxx/benchmarks/algorithms/set_intersection.bench.cpp @@ -0,0 +1,184 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#include "common.h" +#include "test_iterators.h" + +namespace { + +// types of containers we'll want to test, covering interesting iterator types +struct VectorContainer { + template + using type = std::vector; + + static constexpr const char* Name = "Vector"; +}; + +struct SetContainer { + template + using type = std::set; + + static constexpr const char* Name = "Set"; +}; + +using AllContainerTypes = std::tuple; + +// set_intersection performance may depend on where matching values lie +enum class OverlapPosition { + None, + Front, + // performance-wise, matches at the back are identical to ones at the front + Interlaced, +}; + +struct AllOverlapPositions : EnumValuesAsTuple { + static constexpr const char* Names[] = {"None", "Front", "Interlaced"}; +}; + +// forward_iterator wrapping which, for each increment, moves the underlying iterator forward Stride elements +template +struct StridedFwdIt { + Wrapped base_; + unsigned stride_; + + using iterator_category = std::forward_iterator_tag; + using difference_type = typename Wrapped::difference_type; + using value_type = typename Wrapped::value_type; + using pointer = typename Wrapped::pointer; + using reference = typename Wrapped::reference; + + StridedFwdIt(Wrapped base, unsigned stride) : base_(base), stride_(stride) { assert(stride_ != 0); } + + StridedFwdIt operator++() { + for (unsigned i = 0; i < stride_; ++i) + ++base_; + return *this; + } + StridedFwdIt operator++(int) { + auto tmp = *this; + ++*this; + return tmp; + } + value_type& operator*() { return *base_; } + const value_type& operator*() const { return *base_; } + value_type& operator->() { return *base_; } + const value_type& operator->() const { return *base_; } + bool operator==(const StridedFwdIt& o) const { return base_ == o.base_; } + bool operator!=(const StridedFwdIt& o) const { return !operator==(o); } +}; +template +StridedFwdIt(Wrapped, unsigned) -> StridedFwdIt; + +template +std::vector getVectorOfRandom(size_t N) { + std::vector v; + fillValues(v, N, Order::Random); + sortValues(v, Order::Random); + return std::vector(v); +} + +// Realistically, data won't all be nicely contiguous in a container, +// we'll go through some effort to ensure that it's shuffled through memory +// this is especially important for containers with non-contiguous element +// storage, but it will affect even a std::vector, because when you copy a +// std::vector the underlying data storage position for the char +// arrays of the copy are likely to have high locality +template +std::pair genCacheUnfriendlyData(size_t size1, size_t size2, OverlapPosition pos) { + using ValueType = typename Container::value_type; + auto move_into = [](auto first, auto last) { + Container out; + std::move(first, last, std::inserter(out, out.begin())); + return out; + }; + const auto src_size = pos == OverlapPosition::None ? size1 + size2 : std::max(size1, size2); + std::vector src = getVectorOfRandom(src_size); + + if (pos == OverlapPosition::None) { + std::sort(src.begin(), src.end()); + return std::make_pair(move_into(src.begin(), src.begin() + size1), move_into(src.begin() + size1, src.end())); + } + + // All other overlap types will have to copy some part of the data, but if + // we copy after sorting it will likely have high locality, so we sort + // each copy separately + auto copy = src; + std::sort(src.begin(), src.end()); + std::sort(copy.begin(), copy.end()); + + switch (pos) { + case OverlapPosition::None: + // we like -Wswitch :) + break; + + case OverlapPosition::Front: + return std::make_pair(move_into(src.begin(), src.begin() + size1), move_into(copy.begin(), copy.begin() + size2)); + + case OverlapPosition::Interlaced: + const auto stride1 = size1 < size2 ? size2 / size1 : 1; + const auto stride2 = size2 < size1 ? size1 / size2 : 1; + return std::make_pair(move_into(StridedFwdIt(src.begin(), stride1), StridedFwdIt(src.end(), stride1)), + move_into(StridedFwdIt(copy.begin(), stride2), StridedFwdIt(copy.end(), stride2))); + } + std::abort(); // would be std::unreachable() if it could + return std::pair(); +} + +template +struct SetIntersection { + using ContainerType = typename Container::template type>; + size_t size1_; + size_t size2_; + + SetIntersection(size_t size1, size_t size2) : size1_(size1), size2_(size2) {} + + bool skip() const noexcept { + // let's save some time and skip simmetrical runs + return size1_ < size2_; + } + + void run(benchmark::State& state) const { + auto input = genCacheUnfriendlyData(size1_, size2_, Overlap()); + std::vector> out(std::min(size1_, size2_)); + + const auto BATCH_SIZE = std::max(size_t{512}, (2 * TestSetElements) / (size1_ + size2_)); + for (const auto& _ : state) { + while (state.KeepRunningBatch(BATCH_SIZE)) { + for (unsigned i = 0; i < BATCH_SIZE; ++i) { + const auto& [c1, c2] = input; + auto res = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin()); + benchmark::DoNotOptimize(res); + } + } + } + } + + std::string name() const { + return std::string("SetIntersection") + Overlap::name() + '_' + Container::Name + ValueType::name() + '_' + + std::to_string(size1_) + '_' + std::to_string(size2_); + } +}; + +} // namespace + +int main(int argc, char** argv) { /**/ + benchmark::Initialize(&argc, argv); + if (benchmark::ReportUnrecognizedArguments(argc, argv)) + return 1; + + makeCartesianProductBenchmark( + Quantities, Quantities); + benchmark::RunSpecifiedBenchmarks(); + return 0; +} diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst index e6d8acb74aeb2..4bbe99f418c6d 100644 --- a/libcxx/docs/ReleaseNotes/19.rst +++ b/libcxx/docs/ReleaseNotes/19.rst @@ -69,6 +69,10 @@ Improvements and New Features - The ``std::ranges::minmax`` algorithm has been optimized for integral types, resulting in a performance increase of up to 100x. +- The ``std::set_intersection`` and ``std::ranges::set_intersection`` algorithms have been optimized to fast-forward over + contiguous ranges of non-matching values, reducing the number of comparisons from linear to + logarithmic growth with the number of elements in best-case scenarios. + - The ``_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM`` macro has been added to make the declarations in ```` available. - The ``_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT`` macro has been added to make the declarations in ```` diff --git a/libcxx/include/__algorithm/iterator_operations.h b/libcxx/include/__algorithm/iterator_operations.h index 5cf13f0a3f292..8ced989233bc4 100644 --- a/libcxx/include/__algorithm/iterator_operations.h +++ b/libcxx/include/__algorithm/iterator_operations.h @@ -11,6 +11,7 @@ #include <__algorithm/iter_swap.h> #include <__algorithm/ranges_iterator_concept.h> +#include <__assert> #include <__config> #include <__iterator/advance.h> #include <__iterator/distance.h> @@ -160,6 +161,59 @@ struct _IterOps<_ClassicAlgPolicy> { _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX14 void __advance_to(_Iter& __first, _Iter __last) { __first = __last; } + + // advance with sentinel, a la std::ranges::advance + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_Iter> + __advance_to(_Iter& __iter, __difference_type<_Iter> __count, const _Iter& __sentinel) { + return _IterOps::__advance_to(__iter, __count, __sentinel, typename iterator_traits<_Iter>::iterator_category()); + } + +private: + // advance with sentinel, a la std::ranges::advance -- InputIterator specialization + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_InputIter> __advance_to( + _InputIter& __iter, __difference_type<_InputIter> __count, const _InputIter& __sentinel, input_iterator_tag) { + __difference_type<_InputIter> __dist = 0; + for (; __dist < __count && __iter != __sentinel; ++__dist) + ++__iter; + return __count - __dist; + } + + // advance with sentinel, a la std::ranges::advance -- BidirectionalIterator specialization + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_BiDirIter> + __advance_to(_BiDirIter& __iter, + __difference_type<_BiDirIter> __count, + const _BiDirIter& __sentinel, + bidirectional_iterator_tag) { + __difference_type<_BiDirIter> __dist = 0; + if (__count >= 0) + for (; __dist < __count && __iter != __sentinel; ++__dist) + ++__iter; + else + for (__count = -__count; __dist < __count && __iter != __sentinel; ++__dist) + --__iter; + return __count - __dist; + } + + // advance with sentinel, a la std::ranges::advance -- RandomIterator specialization + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static __difference_type<_RandIter> + __advance_to(_RandIter& __iter, + __difference_type<_RandIter> __count, + const _RandIter& __sentinel, + random_access_iterator_tag) { + auto __dist = _IterOps::distance(__iter, __sentinel); + _LIBCPP_ASSERT_VALID_INPUT_RANGE( + __count == 0 || (__dist < 0) == (__count < 0), "__sentinel must precede __iter when __count < 0"); + if (__count < 0) + __dist = __dist > __count ? __dist : __count; + else + __dist = __dist < __count ? __dist : __count; + __iter += __dist; + return __count - __dist; + } }; _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h index 8fd355a7cfc4a..c417d84835497 100644 --- a/libcxx/include/__algorithm/lower_bound.h +++ b/libcxx/include/__algorithm/lower_bound.h @@ -27,11 +27,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter -__lower_bound(_Iter __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) { - auto __len = _IterOps<_AlgPolicy>::distance(__first, __last); - +template +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lower_bound_bisecting( + _Iter __first, + const _Type& __value, + typename iterator_traits<_Iter>::difference_type __len, + _Comp& __comp, + _Proj& __proj) { while (__len != 0) { auto __l2 = std::__half_positive(__len); _Iter __m = __first; @@ -46,6 +48,48 @@ __lower_bound(_Iter __first, _Sent __last, const _Type& __value, _Comp& __comp, return __first; } +// One-sided binary search, aka meta binary search, has been in the public domain for decades, and has the general +// advantage of being \Omega(1) rather than the classic algorithm's \Omega(log(n)), with the downside of executing at +// most 2*log(n) comparisons vs the classic algorithm's exact log(n). There are two scenarios in which it really shines: +// the first one is when operating over non-random-access iterators, because the classic algorithm requires knowing the +// container's size upfront, which adds \Omega(n) iterator increments to the complexity. The second one is when you're +// traversing the container in order, trying to fast-forward to the next value: in that case, the classic algorithm +// would yield \Omega(n*log(n)) comparisons and, for non-random-access iterators, \Omega(n^2) iterator increments, +// whereas the one-sided version will yield O(n) operations on both counts, with a \Omega(log(n)) bound on the number of +// comparisons. +template +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +__lower_bound_onesided(_ForwardIterator __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) { + // step = 0, ensuring we can always short-circuit when distance is 1 later on + if (__first == __last || !std::__invoke(__comp, std::__invoke(__proj, *__first), __value)) + return __first; + + using _Distance = typename iterator_traits<_ForwardIterator>::difference_type; + for (_Distance __step = 1; __first != __last; __step <<= 1) { + auto __it = __first; + auto __dist = __step - _IterOps<_AlgPolicy>::__advance_to(__it, __step, __last); + // once we reach the last range where needle can be we must start + // looking inwards, bisecting that range + if (__it == __last || !std::__invoke(__comp, std::__invoke(__proj, *__it), __value)) { + // we've already checked the previous value and it was less, we can save + // one comparison by skipping bisection + if (__dist == 1) + return __it; + return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj); + } + // range not found, move forward! + __first = __it; + } + return __first; +} + +template +_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +__lower_bound(_ForwardIterator __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) { + const auto __dist = _IterOps<_AlgPolicy>::distance(__first, __last); + return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj); +} + template _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h index 73d888d1b0384..bb0d86cd0f58d 100644 --- a/libcxx/include/__algorithm/set_intersection.h +++ b/libcxx/include/__algorithm/set_intersection.h @@ -12,10 +12,15 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> #include <__algorithm/iterator_operations.h> +#include <__algorithm/lower_bound.h> #include <__config> +#include <__functional/identity.h> #include <__iterator/iterator_traits.h> #include <__iterator/next.h> +#include <__type_traits/is_same.h> +#include <__utility/exchange.h> #include <__utility/move.h> +#include <__utility/swap.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -38,10 +43,103 @@ struct __set_intersection_result { : __in1_(std::move(__in_iter1)), __in2_(std::move(__in_iter2)), __out_(std::move(__out_iter)) {} }; -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter> +// Helper for __set_intersection() with one-sided binary search: populate result and advance input iterators if they +// are found to potentially contain the same value in two consecutive calls. This function is very intimately related to +// the way it is used and doesn't attempt to abstract that, it's not appropriate for general usage outside of its +// context. +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __set_intersection_add_output_if_equal( + bool __may_be_equal, + _InForwardIter1& __first1, + _InForwardIter2& __first2, + _OutIter& __result, + bool& __prev_may_be_equal) { + if (__may_be_equal && __prev_may_be_equal) { + *__result = *__first1; + ++__result; + ++__first1; + ++__first2; + __prev_may_be_equal = false; + } else { + __prev_may_be_equal = __may_be_equal; + } +} + +// With forward iterators we can make multiple passes over the data, allowing the use of one-sided binary search to +// reduce best-case complexity to log(N). Understanding how we can use binary search and still respect complexity +// guarantees is _not_ straightforward: the guarantee is "at most 2*(N+M)-1 comparisons", and one-sided binary search +// will necessarily overshoot depending on the position of the needle in the haystack -- for instance, if we're +// searching for 3 in (1, 2, 3, 4), we'll check if 3<1, then 3<2, then 3<4, and, finally, 3<3, for a total of 4 +// comparisons, when linear search would have yielded 3. However, because we won't need to perform the intervening +// reciprocal comparisons (ie 1<3, 2<3, 4<3), that extra comparison doesn't run afoul of the guarantee. Additionally, +// this type of scenario can only happen for match distances of up to 5 elements, because 2*log2(8) is 6, and we'll +// still be worse-off at position 5 of an 8-element set. From then onwards these scenarios can't happen. TL;DR: we'll be +// 1 comparison worse-off compared to the classic linear-searching algorithm if matching position 3 of a set with 4 +// elements, or position 5 if the set has 7 or 8 elements, but we'll never exceed the complexity guarantees from the +// standard. +template +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI +_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter> __set_intersection( - _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) { + _InForwardIter1 __first1, + _Sent1 __last1, + _InForwardIter2 __first2, + _Sent2 __last2, + _OutIter __result, + _Compare&& __comp, + std::forward_iterator_tag, + std::forward_iterator_tag) { + _LIBCPP_CONSTEXPR std::__identity __proj; + bool __prev_may_be_equal = false; + + while (__first2 != __last2) { + _InForwardIter1 __first1_next = + std::__lower_bound_onesided<_AlgPolicy>(__first1, __last1, *__first2, __comp, __proj); + std::swap(__first1_next, __first1); + // keeping in mind that a==b iff !(a(__first2, __last2, *__first1, __comp, __proj); + std::swap(__first2_next, __first2); + std::__set_intersection_add_output_if_equal( + __first2 == __first2_next, __first1, __first2, __result, __prev_may_be_equal); + } + return __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter>( + _IterOps<_AlgPolicy>::next(std::move(__first1), std::move(__last1)), + _IterOps<_AlgPolicy>::next(std::move(__first2), std::move(__last2)), + std::move(__result)); +} + +// input iterators are not suitable for multipass algorithms, so we stick to the classic single-pass version +template +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI +_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter> +__set_intersection( + _InInputIter1 __first1, + _Sent1 __last1, + _InInputIter2 __first2, + _Sent2 __last2, + _OutIter __result, + _Compare&& __comp, + std::input_iterator_tag, + std::input_iterator_tag) { while (__first1 != __last1 && __first2 != __last2) { if (__comp(*__first1, *__first2)) ++__first1; @@ -55,12 +153,28 @@ __set_intersection( } } - return __set_intersection_result<_InIter1, _InIter2, _OutIter>( + return __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter>( _IterOps<_AlgPolicy>::next(std::move(__first1), std::move(__last1)), _IterOps<_AlgPolicy>::next(std::move(__first2), std::move(__last2)), std::move(__result)); } +template +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI +_LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter> +__set_intersection( + _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) { + return std::__set_intersection<_AlgPolicy>( + std::move(__first1), + std::move(__last1), + std::move(__first2), + std::move(__last2), + std::move(__result), + std::forward<_Compare>(__comp), + typename std::_IterOps<_AlgPolicy>::template __iterator_category<_InIter1>(), + typename std::_IterOps<_AlgPolicy>::template __iterator_category<_InIter2>()); +} + template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator set_intersection( _InputIterator1 __first1, diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp index 5323bb1bc1193..f7870485cfefc 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/ranges_set_intersection.pass.cpp @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include "almost_satisfies_types.h" #include "MoveOnly.h" @@ -463,75 +465,6 @@ constexpr bool test() { } } - // Complexity: At most 2 * ((last1 - first1) + (last2 - first2)) - 1 comparisons and applications of each projection. - { - std::array r1{{{1}, {3}, {5}, {7}, {9}}}; - std::array r2{{{2}, {4}, {6}, {8}, {10}}}; - std::array expected{}; - - const std::size_t maxOperation = 2 * (r1.size() + r2.size()) - 1; - - // iterator overload - { - std::array out{}; - std::size_t numberOfComp = 0; - std::size_t numberOfProj1 = 0; - std::size_t numberOfProj2 = 0; - - const auto comp = [&numberOfComp](int x, int y) { - ++numberOfComp; - return x < y; - }; - - const auto proj1 = [&numberOfProj1](const Data& d) { - ++numberOfProj1; - return d.data; - }; - - const auto proj2 = [&numberOfProj2](const Data& d) { - ++numberOfProj2; - return d.data; - }; - - std::ranges::set_intersection(r1.begin(), r1.end(), r2.begin(), r2.end(), out.data(), comp, proj1, proj2); - - assert(std::ranges::equal(out, expected, {}, &Data::data)); - assert(numberOfComp < maxOperation); - assert(numberOfProj1 < maxOperation); - assert(numberOfProj2 < maxOperation); - } - - // range overload - { - std::array out{}; - std::size_t numberOfComp = 0; - std::size_t numberOfProj1 = 0; - std::size_t numberOfProj2 = 0; - - const auto comp = [&numberOfComp](int x, int y) { - ++numberOfComp; - return x < y; - }; - - const auto proj1 = [&numberOfProj1](const Data& d) { - ++numberOfProj1; - return d.data; - }; - - const auto proj2 = [&numberOfProj2](const Data& d) { - ++numberOfProj2; - return d.data; - }; - - std::ranges::set_intersection(r1, r2, out.data(), comp, proj1, proj2); - - assert(std::ranges::equal(out, expected, {}, &Data::data)); - assert(numberOfComp < maxOperation); - assert(numberOfProj1 < maxOperation); - assert(numberOfProj2 < maxOperation); - } - } - // Comparator convertible to bool { struct ConvertibleToBool { diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp new file mode 100644 index 0000000000000..ddf4087ddd6cd --- /dev/null +++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.intersection/set_intersection_complexity.pass.cpp @@ -0,0 +1,404 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +// Algorithmic complexity tests for both std::set_intersection and std::ranges::set_intersection + +// template +// requires OutputIterator +// && OutputIterator +// && HasLess +// && HasLess +// constexpr OutIter // constexpr after C++17 +// set_intersection(InIter1 first1, InIter1 last1, InIter2 first2, InIter2 last2, +// OutIter result); +// +// template S1, input_iterator I2, sentinel_for S2, +// weakly_incrementable O, class Comp = ranges::less, +// class Proj1 = identity, class Proj2 = identity> +// requires mergeable +// constexpr set_intersection_result +// set_intersection(I1 first1, S1 last1, I2 first2, S2 last2, O result, +// Comp comp = {}, Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 +// +// template +// requires mergeable, iterator_t, O, Comp, Proj1, Proj2> +// constexpr set_intersection_result, borrowed_iterator_t, O> +// set_intersection(R1&& r1, R2&& r2, O result, +// Comp comp = {}, Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++20 + +#include +#include +#include +#include + +#include "test_iterators.h" + +namespace { + +// __debug_less will perform an additional comparison in an assertion +static constexpr unsigned std_less_comparison_count_multiplier() noexcept { +#if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG + return 2; +#else + return 1; +#endif +} + +struct [[nodiscard]] OperationCounts { + std::size_t comparisons{}; + struct PerInput { + std::size_t proj{}; + IteratorOpCounts iterops; + + [[nodiscard]] constexpr bool isNotBetterThan(const PerInput& other) { + return proj >= other.proj && iterops.increments + iterops.decrements + iterops.zero_moves >= + other.iterops.increments + other.iterops.decrements + other.iterops.zero_moves; + } + }; + std::array in; + + [[nodiscard]] constexpr bool isNotBetterThan(const OperationCounts& expect) { + return std_less_comparison_count_multiplier() * comparisons >= expect.comparisons && + in[0].isNotBetterThan(expect.in[0]) && in[1].isNotBetterThan(expect.in[1]); + } +}; + +template +struct counted_set_intersection_result { + std::array result; + OperationCounts opcounts; + + constexpr counted_set_intersection_result() = default; + + constexpr explicit counted_set_intersection_result(std::array&& contents) : result{contents} {} + + constexpr void assertNotBetterThan(const counted_set_intersection_result& other) { + assert(result == other.result); + assert(opcounts.isNotBetterThan(other.opcounts)); + } +}; + +template +counted_set_intersection_result(std::array) -> counted_set_intersection_result; + +template