|
10 | 10 | #define _LIBCPP___ALGORITHM_MINMAX_ELEMENT_H |
11 | 11 |
|
12 | 12 | #include <__algorithm/comp.h> |
| 13 | +#include <__algorithm/simd_utils.h> |
| 14 | +#include <__algorithm/unwrap_iter.h> |
13 | 15 | #include <__config> |
14 | 16 | #include <__functional/identity.h> |
15 | 17 | #include <__iterator/iterator_traits.h> |
@@ -81,43 +83,133 @@ __minmax_element_loop(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) |
81 | 83 | } |
82 | 84 |
|
83 | 85 |
|
84 | | -// template<class _Tp> |
85 | | -// typename std::iterator_traits<_Iter>::value_type |
86 | | -// __minmax_element_vectorized(_Tp __first, _Tp __last) { |
| 86 | +template<class _Iter> |
| 87 | +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter> |
| 88 | +__minmax_element_vectorized(_Iter __first, _Iter __last) { |
| 89 | + using __value_type = __iter_value_type<_Iter>; |
| 90 | + constexpr size_t __unroll_count = 4; |
| 91 | + constexpr size_t __vec_size = __native_vector_size<__value_type>; |
| 92 | + using __vec_type = __simd_vector<__value_type, __vec_size>; |
| 93 | + if (__last == __first) [[__unlikely__]] { |
| 94 | + return {__first, __first}; |
| 95 | + } |
87 | 96 |
|
88 | | -// } |
| 97 | + __value_type __min_element = *__first; |
| 98 | + __value_type __max_element = *__first; |
| 99 | + |
| 100 | + _Iter __min_block_start = __first; |
| 101 | + _Iter __min_block_end = __last + 1; |
| 102 | + _Iter __max_block_start = __first; |
| 103 | + _Iter __max_block_end = __last + 1; |
| 104 | + |
| 105 | + while(static_cast<size_t>(__last - __first) >= __unroll_count * __vec_size) [[__likely__]]{ |
| 106 | + __vec_type __vec[__unroll_count]; |
| 107 | + for(size_t __i = 0; __i < __unroll_count; ++__i) { |
| 108 | + __vec[__i] = std::__load_vector<__vec_type>(__first + __i * __vec_size); |
| 109 | + // min |
| 110 | + auto __block_min_element = __builtin_reduce_min(__vec[__i]); |
| 111 | + if (__block_min_element < __min_element) { |
| 112 | + __min_element = __block_min_element; |
| 113 | + __min_block_start = __first + __i * __vec_size; |
| 114 | + __min_block_start = __first + (__i + 1) * __vec_size; |
| 115 | + } |
| 116 | + // max |
| 117 | + auto __block_max_element = __builtin_reduce_max(__vec[__i]); |
| 118 | + if (__block_max_element > __max_element) { |
| 119 | + __max_element = __block_max_element; |
| 120 | + __max_block_start = __first + __i * __vec_size; |
| 121 | + __max_block_start = __first + (__i + 1) * __vec_size; |
| 122 | + } |
| 123 | + } |
| 124 | + __first += __unroll_count * __vec_size; |
| 125 | + } |
89 | 126 |
|
| 127 | + // remaining vectors |
| 128 | + while(static_cast<size_t>(__last - __first) >= __vec_size) { |
| 129 | + __vec_type __vec = std::__load_vector<__vec_type>(__first + __vec_size); |
| 130 | + auto __block_min_element = __builtin_reduce_min(__vec); |
| 131 | + if (__block_min_element < __min_element) { |
| 132 | + __min_element = __block_min_element; |
| 133 | + __min_block_start = __first + __i * __vec_size; |
| 134 | + __min_block_start = __first + (__i + 1) * __vec_size; |
| 135 | + } |
| 136 | + // max |
| 137 | + auto __block_max_element = __builtin_reduce_max(__vec); |
| 138 | + if (__block_max_element > __max_element) { |
| 139 | + __max_element = __block_max_element; |
| 140 | + __max_block_start = __first + __i * __vec_size; |
| 141 | + __max_block_start = __first + (__i + 1) * __vec_size; |
| 142 | + } |
| 143 | + __first += __vec_size; |
| 144 | + } |
90 | 145 |
|
91 | | -template <class _Iter, class _Proj, class _Comp, |
92 | | - __enable_if_t<is_integral_v<typename std::iterator_traits<_Iter>::value_type> |
93 | | - && __is_identity<_Proj>::value && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>, |
94 | | - int> = 0 |
95 | | - > |
96 | | -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter> |
97 | | -__minmax_element_impl(_Iter __first, _Iter __last, _Comp& __comp, _Proj& __proj) { |
98 | | - if (__libcpp_is_constant_evaluated()) { |
99 | | - return __minmax_element_loop(__first, __last, __comp, __proj); |
100 | | - } else { |
| 146 | + if (__last > __first) { |
| 147 | + __less_tag __pred; |
| 148 | + __identity __proj; |
| 149 | + auto __epilogue = std::__minmax_element_loop(__first, __last, __pred, __proj); |
| 150 | + auto __epilogue_min_element = *__epilogue.first; |
| 151 | + auto __epilogue_max_element = *__epilogue.second; |
| 152 | + if (__epilogue_min_element < __min_element && __epilogue_max_element > __max_element) { |
| 153 | + return __epilogue; |
| 154 | + } else if (__epilogue_min_element < __min_element) { |
| 155 | + __min_element = __epilogue_min_element; |
| 156 | + __min_block_start = __first; |
| 157 | + __min_block_end = __last; |
| 158 | + } else { |
| 159 | + __max_element = __epilogue_max_element; |
| 160 | + __max_block_start = __first; |
| 161 | + __max_block_end = __last; |
| 162 | + } |
| 163 | + } |
101 | 164 |
|
| 165 | + // locate min |
| 166 | + for(; __min_block_start != __min_block_end; ++__min_block_start) { |
| 167 | + if (*__min_block_start == __min_element) |
| 168 | + break; |
102 | 169 | } |
| 170 | + |
| 171 | + for(; __max_block_start != __max_block_end; ++__max_block_start) { |
| 172 | + if (*__max_block_start == __max_element) |
| 173 | + break; |
| 174 | + } |
| 175 | + |
| 176 | + return {__min_block_start, __max_block_start}; |
103 | 177 | } |
104 | 178 |
|
105 | 179 | template <class _Iter, class _Proj, class _Comp, |
106 | | - __enable_if_t<!is_integral_v<typename std::iterator_traits<_Iter>::value_type> |
107 | | - && __can_map_to_integer_v<typename std::iterator_traits<_Iter>::value_type> |
108 | | - && __libcpp_is_trivially_equality_comparable<typename std::iterator_traits<_Iter>::value_type, typename std::iterator_traits<_Iter>::value_type>::value |
109 | | - && __is_identity<_Proj>::value && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>, |
| 180 | + __enable_if_t |
| 181 | + <is_integral_v<__iter_value_type<_Iter>> |
| 182 | + && is_same_v<__iterator_category_type<_Iter>, random_access_iterator_tag> |
| 183 | + && __is_identity<_Proj>::value |
| 184 | + && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>, |
110 | 185 | int> = 0 |
111 | 186 | > |
112 | 187 | _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter> |
113 | 188 | __minmax_element_impl(_Iter __first, _Iter __last, _Comp& __comp, _Proj& __proj) { |
114 | 189 | if (__libcpp_is_constant_evaluated()) { |
115 | 190 | return __minmax_element_loop(__first, __last, __comp, __proj); |
116 | 191 | } else { |
117 | | - |
| 192 | + auto __res = std::__minmax_element_vectorized(std::__unwrap_iter(__first), std::__unwrap_iter(__last)); |
| 193 | + return {std::__rewrap_iter(__first, __res.first), std::__rewrap_iter(__first, __res.second)}; |
118 | 194 | } |
119 | 195 | } |
120 | | - |
| 196 | +// template <class _Iter, class _Proj, class _Comp, |
| 197 | +// __enable_if_t |
| 198 | +// <!is_integral_v<__iter_value_type<_Iter>> |
| 199 | +// && is_same_v<__iterator_category_type<_Iter>, random_access_iterator_tag> |
| 200 | +// && __can_map_to_integer_v<__iter_value_type<_Iter>> |
| 201 | +// && __libcpp_is_trivially_equality_comparable<__iter_value_type<_Iter>, __iter_value_type<_Iter>>::value |
| 202 | +// && __is_identity<_Proj>::value |
| 203 | +// && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>, |
| 204 | +// int> = 0 |
| 205 | +// > |
| 206 | +// _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter> |
| 207 | +// __minmax_element_impl(_Iter __first, _Iter __last, _Comp& __comp, _Proj& __proj) { |
| 208 | +// if (__libcpp_is_constant_evaluated()) { |
| 209 | +// return __minmax_element_loop(__first, __last, __comp, __proj); |
| 210 | +// } else { |
| 211 | +// } |
| 212 | +// } |
121 | 213 | template <class _Iter, class _Sent, class _Proj, class _Comp> |
122 | 214 | _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter> |
123 | 215 | __minmax_element_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { |
|
0 commit comments