Skip to content

Commit 57b9d7a

Browse files
committed
optimize minmax_element
1 parent 28f7ed6 commit 57b9d7a

File tree

1 file changed

+112
-20
lines changed

1 file changed

+112
-20
lines changed

libcxx/include/__algorithm/minmax_element.h

Lines changed: 112 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#define _LIBCPP___ALGORITHM_MINMAX_ELEMENT_H
1111

1212
#include <__algorithm/comp.h>
13+
#include <__algorithm/simd_utils.h>
14+
#include <__algorithm/unwrap_iter.h>
1315
#include <__config>
1416
#include <__functional/identity.h>
1517
#include <__iterator/iterator_traits.h>
@@ -81,43 +83,133 @@ __minmax_element_loop(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj)
8183
}
8284

8385

84-
// template<class _Tp>
85-
// typename std::iterator_traits<_Iter>::value_type
86-
// __minmax_element_vectorized(_Tp __first, _Tp __last) {
86+
template<class _Iter>
87+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
88+
__minmax_element_vectorized(_Iter __first, _Iter __last) {
89+
using __value_type = __iter_value_type<_Iter>;
90+
constexpr size_t __unroll_count = 4;
91+
constexpr size_t __vec_size = __native_vector_size<__value_type>;
92+
using __vec_type = __simd_vector<__value_type, __vec_size>;
93+
if (__last == __first) [[__unlikely__]] {
94+
return {__first, __first};
95+
}
8796

88-
// }
97+
__value_type __min_element = *__first;
98+
__value_type __max_element = *__first;
99+
100+
_Iter __min_block_start = __first;
101+
_Iter __min_block_end = __last + 1;
102+
_Iter __max_block_start = __first;
103+
_Iter __max_block_end = __last + 1;
104+
105+
while(static_cast<size_t>(__last - __first) >= __unroll_count * __vec_size) [[__likely__]]{
106+
__vec_type __vec[__unroll_count];
107+
for(size_t __i = 0; __i < __unroll_count; ++__i) {
108+
__vec[__i] = std::__load_vector<__vec_type>(__first + __i * __vec_size);
109+
// min
110+
auto __block_min_element = __builtin_reduce_min(__vec[__i]);
111+
if (__block_min_element < __min_element) {
112+
__min_element = __block_min_element;
113+
__min_block_start = __first + __i * __vec_size;
114+
__min_block_start = __first + (__i + 1) * __vec_size;
115+
}
116+
// max
117+
auto __block_max_element = __builtin_reduce_max(__vec[__i]);
118+
if (__block_max_element > __max_element) {
119+
__max_element = __block_max_element;
120+
__max_block_start = __first + __i * __vec_size;
121+
__max_block_start = __first + (__i + 1) * __vec_size;
122+
}
123+
}
124+
__first += __unroll_count * __vec_size;
125+
}
89126

127+
// remaining vectors
128+
while(static_cast<size_t>(__last - __first) >= __vec_size) {
129+
__vec_type __vec = std::__load_vector<__vec_type>(__first + __vec_size);
130+
auto __block_min_element = __builtin_reduce_min(__vec);
131+
if (__block_min_element < __min_element) {
132+
__min_element = __block_min_element;
133+
__min_block_start = __first + __i * __vec_size;
134+
__min_block_start = __first + (__i + 1) * __vec_size;
135+
}
136+
// max
137+
auto __block_max_element = __builtin_reduce_max(__vec);
138+
if (__block_max_element > __max_element) {
139+
__max_element = __block_max_element;
140+
__max_block_start = __first + __i * __vec_size;
141+
__max_block_start = __first + (__i + 1) * __vec_size;
142+
}
143+
__first += __vec_size;
144+
}
90145

91-
template <class _Iter, class _Proj, class _Comp,
92-
__enable_if_t<is_integral_v<typename std::iterator_traits<_Iter>::value_type>
93-
&& __is_identity<_Proj>::value && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>,
94-
int> = 0
95-
>
96-
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
97-
__minmax_element_impl(_Iter __first, _Iter __last, _Comp& __comp, _Proj& __proj) {
98-
if (__libcpp_is_constant_evaluated()) {
99-
return __minmax_element_loop(__first, __last, __comp, __proj);
100-
} else {
146+
if (__last > __first) {
147+
__less_tag __pred;
148+
__identity __proj;
149+
auto __epilogue = std::__minmax_element_loop(__first, __last, __pred, __proj);
150+
auto __epilogue_min_element = *__epilogue.first;
151+
auto __epilogue_max_element = *__epilogue.second;
152+
if (__epilogue_min_element < __min_element && __epilogue_max_element > __max_element) {
153+
return __epilogue;
154+
} else if (__epilogue_min_element < __min_element) {
155+
__min_element = __epilogue_min_element;
156+
__min_block_start = __first;
157+
__min_block_end = __last;
158+
} else {
159+
__max_element = __epilogue_max_element;
160+
__max_block_start = __first;
161+
__max_block_end = __last;
162+
}
163+
}
101164

165+
// locate min
166+
for(; __min_block_start != __min_block_end; ++__min_block_start) {
167+
if (*__min_block_start == __min_element)
168+
break;
102169
}
170+
171+
for(; __max_block_start != __max_block_end; ++__max_block_start) {
172+
if (*__max_block_start == __max_element)
173+
break;
174+
}
175+
176+
return {__min_block_start, __max_block_start};
103177
}
104178

105179
template <class _Iter, class _Proj, class _Comp,
106-
__enable_if_t<!is_integral_v<typename std::iterator_traits<_Iter>::value_type>
107-
&& __can_map_to_integer_v<typename std::iterator_traits<_Iter>::value_type>
108-
&& __libcpp_is_trivially_equality_comparable<typename std::iterator_traits<_Iter>::value_type, typename std::iterator_traits<_Iter>::value_type>::value
109-
&& __is_identity<_Proj>::value && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>,
180+
__enable_if_t
181+
<is_integral_v<__iter_value_type<_Iter>>
182+
&& is_same_v<__iterator_category_type<_Iter>, random_access_iterator_tag>
183+
&& __is_identity<_Proj>::value
184+
&& __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>,
110185
int> = 0
111186
>
112187
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
113188
__minmax_element_impl(_Iter __first, _Iter __last, _Comp& __comp, _Proj& __proj) {
114189
if (__libcpp_is_constant_evaluated()) {
115190
return __minmax_element_loop(__first, __last, __comp, __proj);
116191
} else {
117-
192+
auto __res = std::__minmax_element_vectorized(std::__unwrap_iter(__first), std::__unwrap_iter(__last));
193+
return {std::__rewrap_iter(__first, __res.first), std::__rewrap_iter(__first, __res.second)};
118194
}
119195
}
120-
196+
// template <class _Iter, class _Proj, class _Comp,
197+
// __enable_if_t
198+
// <!is_integral_v<__iter_value_type<_Iter>>
199+
// && is_same_v<__iterator_category_type<_Iter>, random_access_iterator_tag>
200+
// && __can_map_to_integer_v<__iter_value_type<_Iter>>
201+
// && __libcpp_is_trivially_equality_comparable<__iter_value_type<_Iter>, __iter_value_type<_Iter>>::value
202+
// && __is_identity<_Proj>::value
203+
// && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>,
204+
// int> = 0
205+
// >
206+
// _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
207+
// __minmax_element_impl(_Iter __first, _Iter __last, _Comp& __comp, _Proj& __proj) {
208+
// if (__libcpp_is_constant_evaluated()) {
209+
// return __minmax_element_loop(__first, __last, __comp, __proj);
210+
// } else {
211+
// }
212+
// }
121213
template <class _Iter, class _Sent, class _Proj, class _Comp>
122214
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
123215
__minmax_element_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) {

0 commit comments

Comments
 (0)