-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[libc++] Speed up set_intersection() by fast-forwarding over ranges of non-matching elements with one-sided binary search. #75230
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
b65415f
f6bcf27
36bb63e
c23272c
0b57ea0
08af548
7aa3927
c44c2a2
46cc95f
450f5ce
d0c5f2b
faa3115
995d04b
d568d49
6ba7061
76c33ca
bb872e0
a1cd8ff
24d1d5b
f17fa58
4b73773
65bd9b7
d0facc5
a12aa37
69dba78
fe1fe8c
bb2c758
c6b895c
31321b9
6c88549
3805e95
090df86
cb92d3c
f4a6f36
3f9cfec
1afb99d
613e64a
4588447
2af9a6f
4f05ded
161d81c
3c9f800
4aa4a82
8307b2d
be6c5c8
62a6010
e2af5cc
89201ea
5f6e7fe
109e5a4
cc95b51
91e4e51
c977bb7
b4fad5b
87f12c2
505c004
95b118a
b1bfa0f
f501bdc
c5df570
6189e95
6eacf2f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,207 @@ | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include <algorithm> | ||
| #include <iterator> | ||
| #include <set> | ||
| #include <vector> | ||
|
|
||
| #include "common.h" | ||
|
|
||
| namespace { | ||
|
|
||
| // types of containers we'll want to test, covering interesting iterator types | ||
| struct VectorContainer { | ||
| template <typename... Args> | ||
| using type = std::vector<Args...>; | ||
|
|
||
| static constexpr const char* Name = "Vector"; | ||
| }; | ||
|
|
||
| struct SetContainer { | ||
| template <typename... Args> | ||
| using type = std::set<Args...>; | ||
|
|
||
| static constexpr const char* Name = "Set"; | ||
| }; | ||
|
|
||
| using AllContainerTypes = std::tuple<VectorContainer, SetContainer>; | ||
|
|
||
| // set_intersection performance may depend on where matching values lie | ||
| enum class OverlapPosition { | ||
| None, | ||
| Front, | ||
| // performance-wise, matches at the back are identical to ones at the front | ||
| Interlaced, | ||
| }; | ||
|
|
||
| struct AllOverlapPositions : EnumValuesAsTuple<AllOverlapPositions, OverlapPosition, 3> { | ||
| static constexpr const char* Names[] = {"None", "Front", "Interlaced"}; | ||
| }; | ||
|
|
||
| // functor that moves elements from an iterator range into a new Container instance | ||
| template <typename Container> | ||
| struct MoveInto { | ||
| template <class It> | ||
| [[nodiscard]] static Container operator()(It first, It last) { | ||
| Container out; | ||
| std::move(first, last, std::inserter(out, out.begin())); | ||
| return out; | ||
| } | ||
| }; | ||
|
||
|
|
||
| // lightweight wrapping around fillValues() which puts a little effort into | ||
| // making that would be contiguous when sorted non-contiguous in memory | ||
ichaer marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| template <typename T> | ||
| std::vector<T> getVectorOfRandom(size_t N) { | ||
| std::vector<T> v; | ||
| fillValues(v, N, Order::Random); | ||
| sortValues(v, Order::Random); | ||
| return std::vector<T>(v); | ||
| } | ||
|
|
||
| // forward_iterator wrapping which, for each increment, moves the underlying iterator forward Stride elements | ||
| template <typename Wrapped> | ||
| struct StridedFwdIt { | ||
| Wrapped base_; | ||
| unsigned stride_; | ||
|
|
||
| using iterator_category = std::forward_iterator_tag; | ||
| using difference_type = typename Wrapped::difference_type; | ||
| using value_type = typename Wrapped::value_type; | ||
| using pointer = typename Wrapped::pointer; | ||
| using reference = typename Wrapped::reference; | ||
|
|
||
| StridedFwdIt(Wrapped base, unsigned stride) : base_(base), stride_(stride) { assert(stride_ != 0); } | ||
|
|
||
| StridedFwdIt operator++() { | ||
| for (unsigned i = 0; i < stride_; ++i) | ||
| ++base_; | ||
| return *this; | ||
| } | ||
| StridedFwdIt operator++(int) { | ||
| auto tmp = *this; | ||
| ++*this; | ||
| return tmp; | ||
| } | ||
| value_type& operator*() { return *base_; } | ||
| const value_type& operator*() const { return *base_; } | ||
| value_type& operator->() { return *base_; } | ||
| const value_type& operator->() const { return *base_; } | ||
| bool operator==(const StridedFwdIt& o) const { return base_ == o.base_; } | ||
| bool operator!=(const StridedFwdIt& o) const { return !operator==(o); } | ||
| }; | ||
| template <typename Wrapped> | ||
| StridedFwdIt(Wrapped, unsigned) -> StridedFwdIt<Wrapped>; | ||
|
|
||
| // realistically, data won't all be nicely contiguous in a container | ||
| // we'll go through some effort to ensure that it's shuffled through memory | ||
| template <class Container> | ||
| std::pair<Container, Container> genCacheUnfriendlyData(size_t size1, size_t size2, OverlapPosition pos) { | ||
| using ValueType = typename Container::value_type; | ||
| const MoveInto<Container> move_into; | ||
| const auto src_size = pos == OverlapPosition::None ? size1 + size2 : std::max(size1, size2); | ||
| std::vector<ValueType> src = getVectorOfRandom<ValueType>(src_size); | ||
|
|
||
| if (pos == OverlapPosition::None) { | ||
| std::sort(src.begin(), src.end()); | ||
| return std::make_pair(move_into(src.begin(), src.begin() + size1), move_into(src.begin() + size1, src.end())); | ||
| } | ||
|
|
||
| // all other overlap types will have to copy some part of the data, but if | ||
ichaer marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| // we copy after sorting it will likely have high cache locality, so we sort | ||
| // each copy separately | ||
| auto copy = src; | ||
| std::sort(src.begin(), src.end()); | ||
| std::sort(copy.begin(), copy.end()); | ||
|
|
||
| switch (pos) { | ||
| case OverlapPosition::None: | ||
| break; | ||
|
|
||
| case OverlapPosition::Front: | ||
| return std::make_pair(move_into(src.begin(), src.begin() + size1), move_into(copy.begin(), copy.begin() + size2)); | ||
|
|
||
| case OverlapPosition::Interlaced: | ||
| const auto stride1 = size1 < size2 ? size2 / size1 : 1; | ||
| const auto stride2 = size2 < size1 ? size1 / size2 : 1; | ||
| return std::make_pair(move_into(StridedFwdIt(src.begin(), stride1), StridedFwdIt(src.end(), stride1)), | ||
| move_into(StridedFwdIt(copy.begin(), stride2), StridedFwdIt(copy.end(), stride2))); | ||
| } | ||
| abort(); | ||
ichaer marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return std::pair<Container, Container>(); | ||
| } | ||
|
|
||
| template <class ValueType, class Container, class Overlap> | ||
| struct SetIntersection { | ||
| using ContainerType = typename Container::template type<Value<ValueType>>; | ||
| size_t size1_; | ||
| size_t size2_; | ||
|
|
||
| SetIntersection(size_t size1, size_t size2) : size1_(size1), size2_(size2) {} | ||
|
|
||
| bool skip() const noexcept { | ||
| // let's save some time and skip simmetrical runs | ||
| return size1_ <= size2_; | ||
| } | ||
|
|
||
| void run(benchmark::State& state) const { | ||
| state.PauseTiming(); | ||
| auto input = genCacheUnfriendlyData<ContainerType>(size1_, size2_, Overlap()); | ||
| std::vector<Value<ValueType>> out(std::min(size1_, size2_)); | ||
|
|
||
| size_t cmp; | ||
| auto tracking_less = [&cmp](const Value<ValueType>& lhs, const Value<ValueType>& rhs) { | ||
| ++cmp; | ||
| return std::less<Value<ValueType>>{}(lhs, rhs); | ||
| }; | ||
|
|
||
| const auto BATCH_SIZE = std::max(size_t{512}, (2 * TestSetElements) / (size1_ + size2_)); | ||
| state.ResumeTiming(); | ||
|
|
||
| for (const auto& _ : state) { | ||
| while (state.KeepRunningBatch(BATCH_SIZE)) { | ||
| for (unsigned i = 0; i < BATCH_SIZE; ++i) { | ||
| cmp = 0; | ||
| const auto& [c1, c2] = input; | ||
| auto res = std::set_intersection(c1.begin(), c1.end(), c2.begin(), c2.end(), out.begin(), tracking_less); | ||
| benchmark::DoNotOptimize(res); | ||
| state.counters["Comparisons"] = cmp; | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| std::string name() const { | ||
| return std::string("SetIntersection") + Overlap::name() + '_' + Container::Name + ValueType::name() + '_' + | ||
| std::to_string(size1_) + '_' + std::to_string(size2_); | ||
| } | ||
| }; | ||
|
|
||
| } // namespace | ||
|
|
||
| int main(int argc, char** argv) { /**/ | ||
| benchmark::Initialize(&argc, argv); | ||
| if (benchmark::ReportUnrecognizedArguments(argc, argv)) | ||
| return 1; | ||
| const std::vector<size_t> Quantities = { | ||
ichaer marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| 1 << 0, | ||
| 1 << 4, | ||
| 1 << 8, | ||
| 1 << 14, | ||
| // Running each benchmark in parallel consumes too much memory with MSAN | ||
| // and can lead to the test process being killed. | ||
| #if !TEST_HAS_FEATURE(memory_sanitizer) | ||
| 1 << 18 | ||
| #endif | ||
| }; | ||
|
|
||
| makeCartesianProductBenchmark<SetIntersection, AllValueTypes, AllContainerTypes, AllOverlapPositions>( | ||
| Quantities, Quantities); | ||
| benchmark::RunSpecifiedBenchmarks(); | ||
| } | ||
ichaer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -87,6 +87,54 @@ struct _IterOps<_ClassicAlgPolicy> { | |
| std::advance(__iter, __count); | ||
| } | ||
|
|
||
| // advance with sentinel, a la std::ranges::advance | ||
|
||
| // it's unclear whether _Iter has a difference_type and whether that's signed, so we play it safe: | ||
ichaer marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| // use the incoming type for returning and steer clear of negative overflows | ||
ldionne marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| template <class _Iter, class _Distance> | ||
| _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance | ||
| advance(_Iter& __iter, _Distance __count, const _Iter& __sentinel) { | ||
|
||
| return _IterOps::__advance(__iter, __count, __sentinel, typename iterator_traits<_Iter>::iterator_category()); | ||
| } | ||
|
|
||
| // advance with sentinel, a la std::ranges::advance -- InputIterator specialization | ||
| template <class _InputIter, class _Distance> | ||
|
||
| _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance | ||
| __advance(_InputIter& __iter, _Distance __count, const _InputIter& __sentinel, input_iterator_tag) { | ||
| _Distance __dist{}; | ||
| for (; __dist < __count && __iter != __sentinel; ++__dist) | ||
| ++__iter; | ||
| return __count - __dist; | ||
| } | ||
|
|
||
| // advance with sentinel, a la std::ranges::advance -- BidirectionalIterator specialization | ||
| template <class _BiDirIter, class _Distance> | ||
| _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static _Distance | ||
| __advance(_BiDirIter& __iter, _Distance __count, const _BiDirIter& __sentinel, bidirectional_iterator_tag) { | ||
| _Distance __dist{}; | ||
| if (__count >= 0) | ||
| for (; __dist < __count && __iter != __sentinel; ++__dist) | ||
| ++__iter; | ||
| else | ||
| for (__count = -__count; __dist < __count && __iter != __sentinel; ++__dist) | ||
| --__iter; | ||
| return __count - __dist; | ||
| } | ||
|
|
||
| // advance with sentinel, a la std::ranges::advance -- RandomIterator specialization | ||
| template <class _RandIter, class _Distance> | ||
| _LIBCPP_HIDE_FROM_ABI constexpr static _Distance | ||
| __advance(_RandIter& __iter, _Distance __count, const _RandIter& __sentinel, random_access_iterator_tag) { | ||
| auto __dist = _IterOps::distance(__iter, __sentinel); | ||
| _LIBCPP_ASSERT_UNCATEGORIZED( | ||
ichaer marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| __count == 0 || (__dist < 0) == (__count < 0), "__sentinel must precede __iter when __count<0"); | ||
ichaer marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if (__count < 0) | ||
| __dist = __dist > __count ? __dist : __count; | ||
| else | ||
| __dist = __dist < __count ? __dist : __count; | ||
| __iter += __dist; | ||
| return __count - __dist; | ||
| } | ||
|
|
||
| // distance | ||
| template <class _Iter> | ||
| _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.