-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[libcxx][algorithm] Optimize std::stable_sort via radix sort algorithm #104683
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libcxx][algorithm] Optimize std::stable_sort via radix sort algorithm #104683
Conversation
|
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write If you have received no comments on your PR for a week, you can request a review If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
|
@llvm/pr-subscribers-libcxx Author: Дмитрий Изволов (izvolov) ChangesThe radix sort (MSD) algorithm allows to speed up std::stable_sort dramatically in case we sort integers. Patch is 28.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/104683.diff 5 Files Affected:
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 32579272858a8e..95e4e3faf88671 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -74,6 +74,7 @@ set(files
__algorithm/prev_permutation.h
__algorithm/pstl.h
__algorithm/push_heap.h
+ __algorithm/radix_sort.h
__algorithm/ranges_adjacent_find.h
__algorithm/ranges_all_of.h
__algorithm/ranges_any_of.h
diff --git a/libcxx/include/__algorithm/radix_sort.h b/libcxx/include/__algorithm/radix_sort.h
new file mode 100644
index 00000000000000..5e14dec9df0918
--- /dev/null
+++ b/libcxx/include/__algorithm/radix_sort.h
@@ -0,0 +1,410 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_RADIX_SORT_H
+#define _LIBCPP___ALGORITHM_RADIX_SORT_H
+
+#include <__algorithm/copy.h>
+#include <__algorithm/for_each.h>
+#include <__config>
+#include <__iterator/iterator_traits.h>
+#include <__iterator/move_iterator.h>
+#include <__iterator/next.h>
+#include <__numeric/partial_sum.h>
+#include <__type_traits/decay.h>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/invoke.h>
+#include <__type_traits/is_assignable.h>
+#include <__type_traits/is_integral.h>
+#include <__type_traits/is_unsigned.h>
+#include <__type_traits/make_unsigned.h>
+#include <__utility/forward.h>
+#include <__utility/integer_sequence.h>
+#include <__utility/move.h>
+#include <__utility/pair.h>
+#include <climits>
+#include <cstdint>
+#include <initializer_list>
+#include <limits>
+#include <stdexcept>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 14
+
+inline void __variadic_expansion_dummy(initializer_list<int>) {}
+
+# define EXPAND_VARIADIC(expression) __variadic_expansion_dummy({(expression, 0)...})
+
+template <typename _Iterator>
+constexpr auto __move_assign_please(_Iterator __i)
+ -> enable_if_t<is_move_assignable<typename iterator_traits<_Iterator>::value_type>::value,
+ move_iterator<_Iterator> > {
+ return make_move_iterator(std::move(__i));
+}
+
+template <typename _Iterator>
+constexpr auto __move_assign_please(_Iterator __i)
+ -> enable_if_t<not is_move_assignable<typename iterator_traits<_Iterator>::value_type>::value, _Iterator> {
+ return __i;
+}
+
+template <typename _Integer>
+constexpr _Integer __intlog2_impl(_Integer __integer) {
+ auto __degree = _Integer{0};
+
+ while ((__integer >>= 1) > 0) {
+ ++__degree;
+ }
+
+ return __degree;
+}
+
+template <typename _Integer>
+constexpr _Integer __intlog2(_Integer __integer) {
+ static_assert(is_integral<_Integer>::value, "Must be an integral type");
+
+ return __integer > 0 ? __intlog2_impl(__integer)
+ : throw domain_error("The binary logarithm is not defined on non-positive numbers");
+}
+
+template <typename _InputIterator, typename _OutputIterator>
+pair<_OutputIterator, typename iterator_traits<_InputIterator>::value_type>
+__partial_sum_max(_InputIterator __first, _InputIterator __last, _OutputIterator __result) {
+ if (__first == __last)
+ return {__result, 0};
+
+ auto __max = *__first;
+ typename iterator_traits<_InputIterator>::value_type __sum = *__first;
+ *__result = __sum;
+
+ while (++__first != __last) {
+ if (__max < *__first) {
+ __max = *__first;
+ }
+ __sum = std::move(__sum) + *__first;
+ *++__result = __sum;
+ }
+ return {++__result, __max};
+}
+
+template <typename _Value, typename _Map, typename _Radix>
+struct __radix_sort_traits {
+ using image_type = decay_t<invoke_result_t<_Map, _Value> >;
+ static_assert(is_integral<image_type>::value, "");
+ static_assert(is_unsigned<image_type>::value, "");
+
+ using radix_type = decay_t<invoke_result_t<_Radix, image_type> >;
+ static_assert(is_integral<radix_type>::value, "");
+
+ constexpr static auto radix_value_range = numeric_limits<radix_type>::max() + 1;
+ constexpr static auto radix_size = __intlog2<uint64_t>(radix_value_range);
+ constexpr static auto radix_count = sizeof(image_type) * CHAR_BIT / radix_size;
+};
+
+template <typename _Value, typename _Map>
+struct __counting_sort_traits {
+ using image_type = decay_t<invoke_result_t<_Map, _Value> >;
+ static_assert(is_integral<image_type>::value, "");
+ static_assert(is_unsigned<image_type>::value, "");
+
+ constexpr static const auto value_range = numeric_limits<image_type>::max() + 1;
+ constexpr static auto radix_size = __intlog2<uint64_t>(value_range);
+};
+
+template <typename _Radix>
+auto __nth_radix(size_t __radix_number, _Radix __radix) {
+ return [__radix_number, __radix = std::move(__radix)](auto __n) {
+ using value_type = decltype(__n);
+ static_assert(is_integral<value_type>::value, "");
+ static_assert(is_unsigned<value_type>::value, "");
+ using traits = __counting_sort_traits<value_type, _Radix>;
+
+ return __radix(static_cast<value_type>(__n >> traits::radix_size * __radix_number));
+ };
+}
+
+template <typename _ForwardIterator, typename _Map, typename _RandomAccessIterator>
+void __count(_ForwardIterator __first, _ForwardIterator __last, _Map __map, _RandomAccessIterator __counters) {
+ std::for_each(__first, __last, [&__counters, &__map](const auto& __preimage) { ++__counters[__map(__preimage)]; });
+}
+
+template <typename _ForwardIterator, typename _Map, typename _RandomAccessIterator>
+void __collect(_ForwardIterator __first, _ForwardIterator __last, _Map __map, _RandomAccessIterator __counters) {
+ using value_type = typename iterator_traits<_ForwardIterator>::value_type;
+ using traits = __counting_sort_traits<value_type, _Map>;
+
+ __count(__first, __last, __map, __counters);
+
+ const auto __counters_end = __counters + traits::value_range;
+ partial_sum(__counters, __counters_end, __counters);
+}
+
+template <typename _ForwardIterator, typename _RandomAccessIterator1, typename _Map, typename _RandomAccessIterator2>
+void __dispose(_ForwardIterator __first,
+ _ForwardIterator __last,
+ _RandomAccessIterator1 __result,
+ _Map __map,
+ _RandomAccessIterator2 __counters) {
+ std::for_each(__first, __last, [&__result, &__counters, &__map](auto&& __preimage) {
+ auto __index = __counters[__map(__preimage)]++;
+ __result[__index] = std::forward<decltype(__preimage)>(__preimage);
+ });
+}
+
+template <typename _BidirectionalIterator,
+ typename _RandomAccessIterator1,
+ typename _Map,
+ typename _RandomAccessIterator2>
+void dispose_backward(_BidirectionalIterator __first,
+ _BidirectionalIterator __last,
+ _RandomAccessIterator1 __result,
+ _Map __map,
+ _RandomAccessIterator2 __counters) {
+ std::for_each(make_reverse_iterator(__last),
+ make_reverse_iterator(__first),
+ [&__result, &__counters, &__map](auto&& __preimage) {
+ auto __index = --__counters[__map(__preimage)];
+ __result[__index] = std::forward<decltype(__preimage)>(__preimage);
+ });
+}
+
+template <typename _ForwardIterator,
+ typename _Map,
+ typename _Radix,
+ typename _RandomAccessIterator1,
+ typename _RandomAccessIterator2,
+ size_t... _Radices>
+bool __collect_impl(
+ _ForwardIterator __first,
+ _ForwardIterator __last,
+ _Map __map,
+ _Radix __radix,
+ _RandomAccessIterator1 __counters,
+ _RandomAccessIterator2 __maximums,
+ index_sequence<_Radices...>) {
+ using value_type = typename iterator_traits<_ForwardIterator>::value_type;
+ constexpr auto __radix_value_range = __radix_sort_traits<value_type, _Map, _Radix>::radix_value_range;
+
+ auto __previous = numeric_limits<invoke_result_t<_Map, value_type> >::min();
+ auto __is_sorted = true;
+ for_each(__first, __last, [&__counters, &__map, &__radix, &__previous, &__is_sorted](const auto& value) {
+ auto __current = __map(value);
+ __is_sorted &= (__current >= __previous);
+ __previous = __current;
+
+ EXPAND_VARIADIC(++__counters[_Radices][__nth_radix(_Radices, __radix)(__current)]);
+ });
+
+ EXPAND_VARIADIC(
+ __maximums[_Radices] =
+ __partial_sum_max(__counters[_Radices], __counters[_Radices] + __radix_value_range, __counters[_Radices])
+ .second);
+
+ return __is_sorted;
+}
+
+template <typename _ForwardIterator,
+ typename _Map,
+ typename _Radix,
+ typename _RandomAccessIterator1,
+ typename _RandomAccessIterator2>
+bool __collect(_ForwardIterator __first,
+ _ForwardIterator __last,
+ _Map __map,
+ _Radix __radix,
+ _RandomAccessIterator1 __counters,
+ _RandomAccessIterator2 __maximums) {
+ using value_type = typename iterator_traits<_ForwardIterator>::value_type;
+ constexpr auto __radix_count = __radix_sort_traits<value_type, _Map, _Radix>::radix_count;
+ return __collect_impl(__first, __last, __map, __radix, __counters, __maximums, make_index_sequence<__radix_count>());
+}
+
+template <typename _BidirectionalIterator,
+ typename _RandomAccessIterator1,
+ typename _Map,
+ typename _RandomAccessIterator2>
+void __dispose_backward(_BidirectionalIterator __first,
+ _BidirectionalIterator __last,
+ _RandomAccessIterator1 __result,
+ _Map __map,
+ _RandomAccessIterator2 __counters) {
+ for_each(
+ make_reverse_iterator(__last), make_reverse_iterator(__first), [&__result, &__counters, &__map](auto&& preimage) {
+ auto __index = --__counters[__map(preimage)];
+ __result[__index] = std::forward<decltype(preimage)>(preimage);
+ });
+}
+
+template <typename _ForwardIterator, typename _RandomAccessIterator, typename _Map>
+_RandomAccessIterator
+__counting_sort_impl(_ForwardIterator __first, _ForwardIterator __last, _RandomAccessIterator __result, _Map __map) {
+ using value_type = typename iterator_traits<_ForwardIterator>::value_type;
+ using traits = __counting_sort_traits<value_type, _Map>;
+
+ using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
+ difference_type __counters[traits::value_range + 1] = {0};
+
+ __collect(__first, __last, __map, next(std::begin(__counters)));
+ __dispose(__first, __last, __result, __map, std::begin(__counters));
+
+ return __result + __counters[traits::value_range];
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Map, typename _Radix>
+typename enable_if<
+ __radix_sort_traits<typename iterator_traits<_RandomAccessIterator1>::value_type, _Map, _Radix>::radix_count == 1,
+ void>::type
+__radix_sort_impl(_RandomAccessIterator1 __first,
+ _RandomAccessIterator1 __last,
+ _RandomAccessIterator2 buffer,
+ _Map __map,
+ _Radix __radix) {
+ auto __buffer_end = __counting_sort_impl(
+ __move_assign_please(__first), __move_assign_please(__last), buffer, [&__map, &__radix](const auto& value) {
+ return __radix(__map(value));
+ });
+
+ std::copy(__move_assign_please(buffer), __move_assign_please(__buffer_end), __first);
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Map, typename _Radix>
+typename enable_if<
+ __radix_sort_traits<typename iterator_traits<_RandomAccessIterator1>::value_type, _Map, _Radix>::radix_count % 2 ==
+ 0,
+ void>::type
+__radix_sort_impl(_RandomAccessIterator1 __first,
+ _RandomAccessIterator1 __last,
+ _RandomAccessIterator2 __buffer_begin,
+ _Map __map,
+ _Radix __radix) {
+ using value_type = typename iterator_traits<_RandomAccessIterator1>::value_type;
+ using traits = __radix_sort_traits<value_type, _Map, _Radix>;
+
+ using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type;
+ difference_type __counters[traits::radix_count][traits::radix_value_range] = {{0}};
+ difference_type __maximums[traits::radix_count] = {0};
+ const auto __is_sorted = __collect(__first, __last, __map, __radix, __counters, __maximums);
+ if (not __is_sorted) {
+ const auto __range_size = distance(__first, __last);
+ auto __buffer_end = __buffer_begin + __range_size;
+ for (size_t __radix_number = 0; __radix_number < traits::radix_count; __radix_number += 2) {
+ const auto __n0th_is_single = __maximums[__radix_number] == __range_size;
+ const auto __n1th_is_single = __maximums[__radix_number + 1] == __range_size;
+
+ if (__n0th_is_single && __n1th_is_single) {
+ continue;
+ }
+
+ if (__n0th_is_single) {
+ copy(__move_assign_please(__first), __move_assign_please(__last), __buffer_begin);
+ } else {
+ auto __n0th = [__radix_number, &__map, &__radix](const auto& __v) {
+ return __nth_radix(__radix_number, __radix)(__map(__v));
+ };
+ __dispose_backward(
+ __move_assign_please(__first),
+ __move_assign_please(__last),
+ __buffer_begin,
+ __n0th,
+ __counters[__radix_number]);
+ }
+
+ if (__n1th_is_single) {
+ copy(__move_assign_please(__buffer_begin), __move_assign_please(__buffer_end), __first);
+ } else {
+ auto __n1th = [__radix_number, &__map, &__radix](const auto& __v) {
+ return __nth_radix(__radix_number + 1, __radix)(__map(__v));
+ };
+ __dispose_backward(
+ __move_assign_please(__buffer_begin),
+ __move_assign_please(__buffer_end),
+ __first,
+ __n1th,
+ __counters[__radix_number + 1]);
+ }
+ }
+ }
+}
+
+constexpr auto __to_unsigned(bool __b) { return __b; }
+
+template <typename _Ip>
+constexpr auto __to_unsigned(_Ip __n) {
+ constexpr const auto __min_value = numeric_limits<_Ip>::min();
+ return static_cast<make_unsigned_t<_Ip> >(__n ^ __min_value);
+}
+
+struct __identity_fn {
+ template <typename _Tp>
+ constexpr decltype(auto) operator()(_Tp&& __value) const {
+ return std::forward<_Tp>(__value);
+ }
+};
+
+struct __low_byte_fn {
+ template <typename _Ip>
+ constexpr uint8_t operator()(_Ip __integer) const {
+ static_assert(is_integral<_Ip>::value, "");
+ static_assert(is_unsigned<_Ip>::value, "");
+
+ return static_cast<uint8_t>(__integer & 0xff);
+ }
+};
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Map, typename _Radix>
+void __radix_sort(_RandomAccessIterator1 __first,
+ _RandomAccessIterator1 __last,
+ _RandomAccessIterator2 buffer,
+ _Map __map,
+ _Radix __radix) {
+ auto __map_to_unsigned = [__map = std::move(__map)](const auto& x) { return __to_unsigned(__map(x)); };
+ __radix_sort_impl(__first, __last, buffer, __map_to_unsigned, __radix);
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2>
+void __radix_sort(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 buffer) {
+ __radix_sort(__first, __last, buffer, __identity_fn{}, __low_byte_fn{});
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2>
+bool __radix_sort(
+ _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 buffer, _BoolConstant<true>) {
+ __radix_sort(__first, __last, buffer, __identity_fn{}, __low_byte_fn{});
+ return true;
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2>
+bool __radix_sort(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _BoolConstant<false>) {
+ return false;
+}
+
+# undef EXPAND_VARIADIC
+
+#else // _LIBCPP_STD_VER > 14
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, bool _B>
+bool __radix_sort(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _BoolConstant<_B>) {
+ return false;
+}
+
+#endif // _LIBCPP_STD_VER > 14
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___ALGORITHM_RADIX_SORT_H
diff --git a/libcxx/include/__algorithm/ranges_stable_sort.h b/libcxx/include/__algorithm/ranges_stable_sort.h
index 9c7df80ae98722..96d84b208687fc 100644
--- a/libcxx/include/__algorithm/ranges_stable_sort.h
+++ b/libcxx/include/__algorithm/ranges_stable_sort.h
@@ -24,6 +24,8 @@
#include <__ranges/access.h>
#include <__ranges/concepts.h>
#include <__ranges/dangling.h>
+#include <__type_traits/is_integral.h>
+#include <__type_traits/is_same.h>
#include <__utility/forward.h>
#include <__utility/move.h>
@@ -45,7 +47,18 @@ struct __stable_sort {
auto __last_iter = ranges::next(__first, __last);
auto&& __projected_comp = std::__make_projected(__comp, __proj);
- std::__stable_sort_impl<_RangeAlgPolicy>(std::move(__first), __last_iter, __projected_comp);
+ constexpr auto __default_comp = is_same_v<_Comp, ranges::less>;
+ constexpr auto __default_proj = is_same_v<_Proj, identity>;
+ constexpr auto __integral_value = is_integral_v<iter_value_t<_Iter>>;
+ constexpr auto __integral_projection = __default_proj && __integral_value;
+ // constexpr auto __integral_projection = is_integral_v<remove_reference_t<invoke_result_t<_Proj&,
+ // iter_value_t<_Iter>>>>;
+ // TODO: Support projection in stable_sort
+ std::__stable_sort_impl<_RangeAlgPolicy>(
+ std::move(__first),
+ __last_iter,
+ __projected_comp,
+ _BoolConstant < __default_comp && __integral_projection > {});
return __last_iter;
}
diff --git a/libcxx/include/__algorithm/stable_sort.h b/libcxx/include/__algorithm/stable_sort.h
index 726e7e16b3564a..f8624726a4e323 100644
--- a/libcxx/include/__algorithm/stable_sort.h
+++ b/libcxx/include/__algorithm/stable_sort.h
@@ -13,6 +13,7 @@
#include <__algorithm/comp_ref_type.h>
#include <__algorithm/inplace_merge.h>
#include <__algorithm/iterator_operations.h>
+#include <__algorithm/radix_sort.h>
#include <__algorithm/sort.h>
#include <__config>
#include <__debug_utils/strict_weak_ordering_check.h>
@@ -20,6 +21,9 @@
#include <__memory/destruct_n.h>
#include <__memory/temporary_buffer.h>
#include <__memory/unique_ptr.h>
+#include <__type_traits/integral_constant.h>
+#include <__type_traits/is_integral.h>
+#include <__type_traits/is_same.h>
#include <__type_traits/is_trivially_assignable.h>
#include <__utility/move.h>
#include <__utility/pair.h>
@@ -133,20 +137,24 @@ _LIBCPP_HIDE_FROM_ABI void __merge_move_assign(
*__result = _Ops::__iter_move(__first2);
}
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
-void __stable_sort(_RandomAccessIterator __first,
- _RandomAccessIterator __last,
- _Compare __comp,
- typename iterator_traits<_RandomAccessIterator>::difference_type __len,
- typename iterator_traits<_RandomAccessIterator>::value_type* __buff,
- ptrdiff_t __buff_size);
+template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, bool _EnableRadixSort>
+void __stable_sort(
+ _RandomAccessIterator __first,
+ _RandomAccessIterator __last,
+ _Compare __comp,
+ typename iterator_traits<_RandomAccessIterator>::difference_type __len,
+ typename iterator_traits<_RandomAccessIterator>::value_type* __buff,
+ ptrdiff_t __buff_size,
+ _BoolConstant<_EnableRadixSort>);
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
-void __stable_sort_move(_RandomAccessIterator ...
[truncated]
|
|
Couldn't we simply forward to |
|
Radix sort is much faster than comparison sorts (including |
If that is the case, why wouldn't we use it in |
|
Oh, I didn't understand the question first time. The key moment is that radix sort needs a temporary buffer, and |
c401e86 to
671ac8f
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can reuse __to_unsigned_like in <__type_traits/make_unsigned.h>.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can't. As I can see, __to_unsigned_like is a static cast to corresponding unsigned. But it's not just enough to cast to unsigned. We need to shift the value range from [0, 2^n) to [-2^(n-1), 2^(n-1)), so that if x is less than y in the old range, then x will also be less than y in the new range.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need to shift the value range from
[0, 2^n)to[-2^(n-1), 2^(n-1)), so that ifxis less thanyin the old range, thenxwill also be less thanyin the new range.
Hmm, I think this function actually shifts from [-2^(n-1), 2^(n-1)) to [0, 2^n), which should be intended. How about changing the name to __shift_to_unsiged or __plus_to_unsigned as a plain to has closer meaning to casting in the standard library?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, you're right, I wanted to write that values are shifted from [-2^(n-1), 2^(n-1)) to [0, 2^n).
__shift_to_unsigned is a good name, thanks. Changed it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| typename iterator_traits<_RandomAccessIterator>::difference_type __len, | |
| __iter_diff_t<_RandomAccessIterator> __len, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the code I didn't changed, just reformatted. Should I change to __iter_diff_t and __iter_value_type anyway?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm. This code should already be formatted correctly. Do you maybe have a clang-format that's too old?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm using
$ clang-format --version
clang-format version 18.1.8
and the following command called from the repo root.
git diff -U0 --no-color my/main | clang/tools/clang-format/clang-format-diff.py -style=file:libcxx/.clang-format -p1 -i
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why are these values not simply in the function and how did you determine these values?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was an analogy to __stable_sort_switch.
Values were determined empirically based on benchmarks of raw comparison of radix sort vs std::sort vs std::stable_sort, and with respect to special benchmark cases such as ascending, descending, pipe-organ and others.
The raw comparison is here
- https://github.com/izvolov/burst?tab=readme-ov-file#%D1%86%D0%B5%D0%BB%D0%BE%D1%87%D0%B8%D1%81%D0%BB%D0%B5%D0%BD%D0%BD%D1%8B%D0%B5-%D1%81%D0%BE%D1%80%D1%82%D0%B8%D1%80%D0%BE%D0%B2%D0%BA%D0%B8
- https://github.com/izvolov/burst/blob/master/doc/README.md#intsort
Code: - https://github.com/izvolov/burst/blob/master/benchmark/burst/algorithm/integer_sort_comparison.py.in
- https://github.com/izvolov/burst/blob/master/benchmark/burst/algorithm/radix_sort.cpp
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I'd make them constexpr functions instead, since we can.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mean something like this?
template <class _Tp>
constexpr unsigned __radix_sort_min_bound () {
static_assert(std::is_integral_v<_Tp>);
if constexpr (sizeof(_Tp) == 1) {
return 1 << 8;
}
return 1 << 10;
}Is it legal to use C++11 and later features in stable_sort.h since there aren't any version guards there?
If it is, I'd also rewrite the code to remove _BoolConstant<_EnableRadixSort>.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
The only my concern is that now we require C++17 to enable radix sort. Is it ok?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we need C++17. if constexpr is supported by both Clang and GCC since C++11. Assuming I've missed something and we actually need C++17 that seems fine though. Most people use C++17 by now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if constexpr is C++17 AFAIK.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, but we don't care about pedantic warnings. All compilers we care about support it in C++11 and 14 mode: https://godbolt.org/z/YGrsh9Kh6
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, I tried it at first, but got CI errors. So I made a commit with C++17 guard: eb84acf
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, it can be done in this PR. It's simple enough.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You've missed a few enable_ifs here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
1b2dedc to
8b5d583
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // from zero), or moves them back to the initial range (for each odd `i`). It there is only one radix in sorted integers | |
| // from zero), or moves them back to the initial range (for each odd `i`). If there is only one radix in sorted integers |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // (e.g. int8), than sorted values are placed to the buffer, and then moved back to the initial range. | |
| // (e.g. int8), the sorted values are placed to the buffer, and then moved back to the initial range. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm. This code should already be formatted correctly. Do you maybe have a clang-format that's too old?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can simply reduce the requirements (e.g. static_assert them). Also no need to consolidate it with other functions, that was simply a comment with no action requested.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we do the dispatching in __stable_sort_impl instead we don't need to do the dispatching twice.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I'd make them constexpr functions instead, since we can.
c9192ec to
8e8e007
Compare
12aa59c to
dd3581e
Compare
|
@philnik777 ping |
philnik777
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry for the slow review. I've taken a look and it looks mostly fine, but I think the CI will find a few problems. Please check and ping me again once it's green (or I have to approve a run again).
| std::__radix_sort(__first, __last, __buffer, __identity{}, __low_byte_fn{}); | ||
| } | ||
|
|
||
| #endif // _LIBCPP_STD_VER > 14 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| #endif // _LIBCPP_STD_VER > 14 | |
| #endif // _LIBCPP_STD_VER >= 14 |
Same below.
|
@philnik777 I've fixed all recent issues, please approve a run. |
|
@philnik777 The latest CI run has failed, but I can't find out if the reason of the fail: if it is an error in my PR, or it's just a CI flap. Could you help, please? |
|
The latest CI failures were unrelated to this PR. The one of (generic-hardening-mode-fast-with-abi-breaks, libcxx-runners-set) is being fixed by #115271. |
Thanks for making it clear. Should I wait for that PR merged? |
5af8bc7 to
48969e2
Compare
philnik777
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This Looks Good to me except for a few nits.
@ldionne would you also like to take a look?
| template <class _Tp> | ||
| constexpr unsigned __radix_sort_max_bound() { | ||
| static_assert(is_integral<_Tp>::value); | ||
| if constexpr (sizeof(_Tp) == 8) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be sizeof(_Tp) >= 8 instead? We support __int128.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, thank you.
Done.
libcxx/include/__bit/bit_log2.h
Outdated
| template <class _Tp> | ||
| _LIBCPP_HIDE_FROM_ABI constexpr _Tp __bit_log2(_Tp __t) noexcept { | ||
| return numeric_limits<_Tp>::digits - 1 - std::countl_zero(__t); | ||
| static_assert(is_unsigned<_Tp>::value, "__bit_log2 requires an unsigned integer type"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we want to use __libcpp_is_unsigned_integer here instead just to avoid any behaviour change in this patch. (I'm not sure whether using is_unsigned results in one)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we need C++17. if constexpr is supported by both Clang and GCC since C++11. Assuming I've missed something and we actually need C++17 that seems fine though. Most people use C++17 by now.
| constexpr static auto __radix_value_range = numeric_limits<__radix_type>::max() + 1; | ||
| constexpr static auto __radix_size = std::__bit_log2<uint64_t>(__radix_value_range); | ||
| constexpr static auto __radix_count = sizeof(__image_type) * CHAR_BIT / __radix_size; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| constexpr static auto __radix_value_range = numeric_limits<__radix_type>::max() + 1; | |
| constexpr static auto __radix_size = std::__bit_log2<uint64_t>(__radix_value_range); | |
| constexpr static auto __radix_count = sizeof(__image_type) * CHAR_BIT / __radix_size; | |
| static constexpr auto __radix_value_range = numeric_limits<__radix_type>::max() + 1; | |
| static constexpr auto __radix_size = std::__bit_log2<uint64_t>(__radix_value_range); | |
| static constexpr auto __radix_count = sizeof(__image_type) * CHAR_BIT / __radix_size; |
I think we usually write it this way around.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
| template <class _Value, class _Map> | ||
| struct __counting_sort_traits { | ||
| using __image_type = decay_t<typename __invoke_of<_Map, _Value>::type>; | ||
| static_assert(is_unsigned<__image_type>::value, ""); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| static_assert(is_unsigned<__image_type>::value, ""); | |
| static_assert(is_unsigned<__image_type>::value); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is C++17 feature also.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also available in C++14.
|
CI flapped again. |
|
I feel uncomfortable bothering you, but I believe, CI flapped again. |
Don't worry. I'm more annoyed at the CI than anything else. No idea why this is so problematic here. I haven't seen it nearly this much on other patches. |
|
@philnik777 @ldionne ping |
ldionne
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The CI issues have to do with our spurious-failure restarter not working 100% correctly. I'm trying to fix that but it's taking longer than I'd like.
Thanks for the patch and thanks to reviewers for the multiple iterations on this. I'm deferring to other reviewers for the algorithm itself, but I have a comment about testing that I'd like to be addressed. I'll let @philnik777 give the final thumbs up.
|
@philnik777 ping |
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
Should code in |
Nearby tests are almost formatted correctly, so I reformatted the test. |
|
@philnik777 Approve CI run, please. |
|
I'll try to give it a last pass next week. |
|
@philnik777 Could you rerun failed jobs, please? |
|
@philnik777 ping |
philnik777
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with nits addressed.
| template <class _Value, class _Map> | ||
| struct __counting_sort_traits { | ||
| using __image_type = decay_t<typename __invoke_of<_Map, _Value>::type>; | ||
| static_assert(is_unsigned<__image_type>::value, ""); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also available in C++14.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| using traits = __counting_sort_traits<_Integer, _Radix>; | |
| using __traits = __counting_sort_traits<_Integer, _Radix>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| using value_type = __iter_value_type<_RandomAccessIterator1>; | |
| using traits = __radix_sort_traits<value_type, _Map, _Radix>; | |
| using __value_type = __iter_value_type<_RandomAccessIterator1>; | |
| using __traits = __radix_sort_traits<value_type, _Map, _Radix>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| if (not __is_sorted) { | |
| if (!__is_sorted) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why does this get a __map parameter? AFAICT this is always __identity{}?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At this point, yes. But the algorithm is more general, for example, we can extend the functionality to sort floating point types, and this can be done by adding the appropriate __map parameter. I'm going to work on it later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| using value_type = __iter_value_type<_ForwardIterator>; | |
| using __value_type = __iter_value_type<_ForwardIterator>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| typedef typename std::iterator_traits<RI>::value_type value_type; | |
| using value_type = typename std::iterator_traits<RI>::value_type; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
d7b8d00 to
75155ae
Compare
|
Squashed the commits and rebased to the latest |
|
Finally all green :) . |
|
@izvolov Thanks for the patch again! Sorry that it took so long. No, there is nothing else to do. |
|
@izvolov Congratulations on having your first Pull Request (PR) merged into the LLVM Project! Your changes will be combined with recent changes from other authors, then tested by our build bots. If there is a problem with a build, you may receive a report in an email or a comment on this PR. Please check whether problems have been caused by your change specifically, as the builds can include changes from many authors. It is not uncommon for your change to be included in a build that fails due to someone else's changes, or infrastructure issues. How to do this, and the rest of the post-merge process, is covered in detail here. If your change does cause a problem, it may be reverted, or you can revert it yourself. This is a normal part of LLVM development. You can fix your changes and open a new PR to merge them again. If you don't get any reports, no action is required from you. Your changes are working as expected, well done! |
|
@philnik777 thank you! |
… floating-point types (#129452) These changes speed up `std::stable_sort` in the case of sorting floating-point types. This applies only to IEEE 754 floats. The speedup is similar to that achieved for integers in PR #104683 (see benchmarks below). Why does this worth doing? Previously, `std::stable_sort` had almost no chance of beating `std::sort`. Now there are cases when `std::stable_sort` is preferrable, and the difference is significant. ``` --------------------------------------------------------------------------- Benchmark | std::stable_sort | std::sort | std::stable_sort | without radix_sort | | with radix_sort --------------------------------------------------------------------------- float_Random_1 | 1.62 ns | 2.15 ns | 1.61 ns float_Random_4 | 18.0 ns | 2.71 ns | 16.3 ns float_Random_16 | 118 ns | 113 ns | 112 ns float_Random_64 | 751 ns | 647 ns | 730 ns float_Random_256 | 4715 ns | 2937 ns | 4669 ns float_Random_1024 | 25713 ns | 13172 ns | 5959 ns <-- float_Random_4096 | 131307 ns | 56870 ns | 19294 ns <-- float_Random_16384 | 624996 ns | 242953 ns | 64264 ns <-- float_Random_65536 | 2895661 ns | 1027279 ns | 288553 ns <-- float_Random_262144 | 13285372 ns | 4342593 ns | 3022377 ns <-- float_Random_1048576 | 60595871 ns | 19087591 ns | 18690457 ns <-- float_Random_2097152 | 131336117 ns | 38800396 ns | 52325016 ns float_Random_4194304 | 270043042 ns | 79978019 ns | 102907726 ns double_Random_1 | 1.60 ns | 2.15 ns | 1.61 ns double_Random_4 | 15.2 ns | 2.70 ns | 16.9 ns double_Random_16 | 104 ns | 112 ns | 119 ns double_Random_64 | 712 ns | 614 ns | 755 ns double_Random_256 | 4496 ns | 2966 ns | 4820 ns double_Random_1024 | 24722 ns | 12679 ns | 6189 ns <-- double_Random_4096 | 126075 ns | 54484 ns | 20999 ns <-- double_Random_16384 | 613782 ns | 232557 ns | 110276 ns <-- double_Random_65536 | 2894972 ns | 988531 ns | 774302 ns <-- double_Random_262144 | 13460273 ns | 4278059 ns | 5115123 ns double_Random_1048576 | 61119996 ns | 18408462 ns | 27166574 ns double_Random_2097152 | 132511525 ns | 37986158 ns | 54423869 ns double_Random_4194304 | 272949862 ns | 77912616 ns | 147670834 ns ``` Comparison for only `std::stable_sort`: ``` Benchmark Time Time Old Time New -------------------------------------------------------------------------------------------------- BM_StableSort_float_Random_1024 -0.7997 25438 5096 BM_StableSort_float_Random_4096 -0.8731 128157 16260 BM_StableSort_float_Random_16384 -0.9024 621271 60623 BM_StableSort_float_Random_65536 -0.9081 2922413 268619 BM_StableSort_float_Random_262144 -0.7766 13386345 2990408 BM_StableSort_float_Random_1048576 -0.6954 60673010 18481751 BM_StableSort_float_Random_2097152 -0.6026 130977358 52052182 BM_StableSort_float_Random_4194304 -0.6252 271556583 101770500 BM_StableSort_float_Ascending_1024 -0.6430 6711 2396 BM_StableSort_float_Ascending_4096 -0.7979 38460 7773 BM_StableSort_float_Ascending_16384 -0.8471 191069 29222 BM_StableSort_float_Ascending_65536 -0.8683 882321 116194 BM_StableSort_float_Ascending_262144 -0.8346 3868552 639937 BM_StableSort_float_Ascending_1048576 -0.7460 16521233 4195953 BM_StableSort_float_Ascending_2097152 -0.5439 21757532 9922776 BM_StableSort_float_Ascending_4194304 -0.7525 67847496 16791582 BM_StableSort_float_Descending_1024 -0.6359 15038 5475 BM_StableSort_float_Descending_4096 -0.7090 62810 18278 BM_StableSort_float_Descending_16384 -0.7763 311844 69750 BM_StableSort_float_Descending_65536 -0.7228 1270513 352202 BM_StableSort_float_Descending_262144 -0.6785 5484173 1763045 BM_StableSort_float_Descending_1048576 -0.5084 20223149 9941852 BM_StableSort_float_Descending_2097152 -0.7646 60523254 14247014 BM_StableSort_float_Descending_4194304 -0.5638 95706839 41748858 BM_StableSort_float_SingleElement_1024 +0.3715 1732 2375 BM_StableSort_float_SingleElement_4096 -0.1685 9357 7781 BM_StableSort_float_SingleElement_16384 -0.3793 47307 29362 BM_StableSort_float_SingleElement_65536 -0.4925 227666 115536 BM_StableSort_float_SingleElement_262144 -0.4271 1075853 616387 BM_StableSort_float_SingleElement_1048576 -0.3736 5097599 3193279 BM_StableSort_float_SingleElement_2097152 -0.2470 9854161 7420158 BM_StableSort_float_SingleElement_4194304 -0.3384 22175964 14670720 BM_StableSort_float_PipeOrgan_1024 -0.4885 10664 5455 BM_StableSort_float_PipeOrgan_4096 -0.6340 50095 18337 BM_StableSort_float_PipeOrgan_16384 -0.7078 238700 69739 BM_StableSort_float_PipeOrgan_65536 -0.6740 1102419 359378 BM_StableSort_float_PipeOrgan_262144 -0.7460 4698739 1193511 BM_StableSort_float_PipeOrgan_1048576 -0.5657 18493972 8032392 BM_StableSort_float_PipeOrgan_2097152 -0.7116 41089206 11850349 BM_StableSort_float_PipeOrgan_4194304 -0.6650 83445011 27955737 BM_StableSort_float_QuickSortAdversary_1024 -0.6863 17402 5460 BM_StableSort_float_QuickSortAdversary_4096 -0.7715 79864 18247 BM_StableSort_float_QuickSortAdversary_16384 -0.7800 317480 69839 BM_StableSort_float_QuickSortAdversary_65536 -0.7400 1357601 352967 BM_StableSort_float_QuickSortAdversary_262144 -0.6450 5662094 2009769 BM_StableSort_float_QuickSortAdversary_1048576 -0.5092 21173627 10392107 BM_StableSort_float_QuickSortAdversary_2097152 -0.7333 61748178 16469993 BM_StableSort_float_QuickSortAdversary_4194304 -0.5607 98459863 43250182 BM_StableSort_double_Random_1024 -0.7657 24769 5802 BM_StableSort_double_Random_4096 -0.8441 126449 19717 BM_StableSort_double_Random_16384 -0.8269 614910 106447 BM_StableSort_double_Random_65536 -0.7413 2905000 751427 BM_StableSort_double_Random_262144 -0.6287 13449514 4994348 BM_StableSort_double_Random_1048576 -0.5635 60863246 26568349 BM_StableSort_double_Random_2097152 -0.5959 130293892 52654532 BM_StableSort_double_Random_4194304 -0.4772 272616445 142526267 BM_StableSort_double_Ascending_1024 -0.4870 6757 3466 BM_StableSort_double_Ascending_4096 -0.7360 37592 9923 BM_StableSort_double_Ascending_16384 -0.7971 183967 37324 BM_StableSort_double_Ascending_65536 -0.7465 897116 227398 BM_StableSort_double_Ascending_262144 -0.6764 4020980 1301033 BM_StableSort_double_Ascending_1048576 -0.6407 16421799 5900751 BM_StableSort_double_Ascending_2097152 -0.6380 29347139 10622419 BM_StableSort_double_Ascending_4194304 -0.5934 70439925 28644185 BM_StableSort_double_Descending_1024 -0.5988 15216 6105 BM_StableSort_double_Descending_4096 -0.6857 65069 20449 BM_StableSort_double_Descending_16384 -0.6922 329321 101381 BM_StableSort_double_Descending_65536 -0.7038 1367970 405242 BM_StableSort_double_Descending_262144 -0.6472 5361644 1891429 BM_StableSort_double_Descending_1048576 -0.6656 22031404 7366459 BM_StableSort_double_Descending_2097152 -0.7593 68922467 16591242 BM_StableSort_double_Descending_4194304 -0.6392 96283643 34743223 BM_StableSort_double_SingleElement_1024 +0.9128 1895 3625 BM_StableSort_double_SingleElement_4096 +0.1475 10013 11490 BM_StableSort_double_SingleElement_16384 -0.1901 52382 42424 BM_StableSort_double_SingleElement_65536 -0.2096 254698 201313 BM_StableSort_double_SingleElement_262144 -0.1833 1248478 1019648 BM_StableSort_double_SingleElement_1048576 -0.1741 5703397 4710603 BM_StableSort_double_SingleElement_2097152 -0.1751 10922197 9009835 BM_StableSort_double_SingleElement_4194304 -0.1538 26571923 22485137 BM_StableSort_double_PipeOrgan_1024 -0.4406 10752 6014 BM_StableSort_double_PipeOrgan_4096 -0.5917 49456 20195 BM_StableSort_double_PipeOrgan_16384 -0.6258 270515 101221 BM_StableSort_double_PipeOrgan_65536 -0.7098 1159462 336457 BM_StableSort_double_PipeOrgan_262144 -0.6591 4735711 1614433 BM_StableSort_double_PipeOrgan_1048576 -0.6620 19353110 6541172 BM_StableSort_double_PipeOrgan_2097152 -0.7288 49131812 13323391 BM_StableSort_double_PipeOrgan_4194304 -0.5988 81958974 32878171 BM_StableSort_double_QuickSortAdversary_1024 -0.6516 17948 6254 BM_StableSort_double_QuickSortAdversary_4096 -0.7527 82359 20363 BM_StableSort_double_QuickSortAdversary_16384 -0.7009 340410 101811 BM_StableSort_double_QuickSortAdversary_65536 -0.6854 1487480 467928 BM_StableSort_double_QuickSortAdversary_262144 -0.6386 5648460 2041377 BM_StableSort_double_QuickSortAdversary_1048576 -0.6127 22859142 8852587 BM_StableSort_double_QuickSortAdversary_2097152 -0.7161 68693975 19499381 BM_StableSort_double_QuickSortAdversary_4194304 -0.5909 95532179 39077491 OVERALL_GEOMEAN -0.6472 0 0 ```
The radix sort (LSD) algorithm allows to speed up std::stable_sort dramatically in case we sort integers.
The speed up varies from a relatively small to x10 times, depending on type of sorted elements and the initial state of the sorted array.