Skip to content

Commit 01c71b4

Browse files
committed
[libc++] Optimize std::min_element
[libc++] Fix formatting and move min_element.bench.cpp [libc++] Use __invoke instead of invoke [libc++] Fix build issues with find
1 parent f1b76c5 commit 01c71b4

File tree

4 files changed

+133
-6
lines changed

4 files changed

+133
-6
lines changed

libcxx/benchmarks/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ set(BENCHMARK_TESTS
121121
algorithms/make_heap_then_sort_heap.bench.cpp
122122
algorithms/min.bench.cpp
123123
algorithms/minmax.bench.cpp
124+
algorithms/min_element.bench.cpp
124125
algorithms/min_max_element.bench.cpp
125126
algorithms/mismatch.bench.cpp
126127
algorithms/pop_heap.bench.cpp
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#include <algorithm>
2+
#include <limits>
3+
#include <vector>
4+
5+
#include <benchmark/benchmark.h>
6+
#include <random>
7+
8+
template <typename T>
9+
static void BM_stdmin_element_decreasing(benchmark::State& state) {
10+
std::vector<T> v(state.range(0));
11+
T start = std::numeric_limits<T>::max();
12+
T end = std::numeric_limits<T>::min();
13+
14+
for (size_t i = 0; i < v.size(); i++)
15+
v[i] = ((start != end) ? start-- : end);
16+
17+
for (auto _ : state) {
18+
benchmark::DoNotOptimize(v);
19+
benchmark::DoNotOptimize(std::min_element(v.begin(), v.end()));
20+
}
21+
}
22+
23+
BENCHMARK(BM_stdmin_element_decreasing<char>)
24+
->DenseRange(1, 8)
25+
->Range(32, 128)
26+
->Range(256, 4096)
27+
->DenseRange(5000, 10000, 1000)
28+
->Range(1 << 14, 1 << 16)
29+
->Arg(70000);
30+
BENCHMARK(BM_stdmin_element_decreasing<short>)
31+
->DenseRange(1, 8)
32+
->Range(32, 128)
33+
->Range(256, 4096)
34+
->DenseRange(5000, 10000, 1000)
35+
->Range(1 << 14, 1 << 16)
36+
->Arg(70000);
37+
BENCHMARK(BM_stdmin_element_decreasing<int>)
38+
->DenseRange(1, 8)
39+
->Range(32, 128)
40+
->Range(256, 4096)
41+
->DenseRange(5000, 10000, 1000)
42+
->Range(1 << 14, 1 << 16)
43+
->Arg(70000);
44+
BENCHMARK(BM_stdmin_element_decreasing<long long>)
45+
->DenseRange(1, 8)
46+
->Range(32, 128)
47+
->Range(256, 4096)
48+
->DenseRange(5000, 10000, 1000)
49+
->Range(1 << 14, 1 << 16)
50+
->Arg(70000);
51+
BENCHMARK(BM_stdmin_element_decreasing<unsigned char>)
52+
->DenseRange(1, 8)
53+
->Range(32, 128)
54+
->Range(256, 4096)
55+
->DenseRange(5000, 10000, 1000)
56+
->Range(1 << 14, 1 << 16)
57+
->Arg(70000);
58+
BENCHMARK(BM_stdmin_element_decreasing<unsigned short>)
59+
->DenseRange(1, 8)
60+
->Range(32, 128)
61+
->Range(256, 4096)
62+
->DenseRange(5000, 10000, 1000)
63+
->Range(1 << 14, 1 << 16)
64+
->Arg(70000);
65+
BENCHMARK(BM_stdmin_element_decreasing<unsigned int>)
66+
->DenseRange(1, 8)
67+
->Range(32, 128)
68+
->Range(256, 4096)
69+
->DenseRange(5000, 10000, 1000)
70+
->Range(1 << 14, 1 << 16)
71+
->Arg(70000);
72+
BENCHMARK(BM_stdmin_element_decreasing<unsigned long long>)
73+
->DenseRange(1, 8)
74+
->Range(32, 128)
75+
->Range(256, 4096)
76+
->DenseRange(5000, 10000, 1000)
77+
->Range(1 << 14, 1 << 16)
78+
->Arg(70000);
79+
80+
BENCHMARK_MAIN();

libcxx/include/__algorithm/find.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ __find_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n)
104104
// do first partial word
105105
if (__first.__ctz_ != 0) {
106106
__storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
107-
__storage_type __dn = std::min(__clz_f, __n);
107+
__storage_type __dn = (__clz_f < __n) ? __clz_f : __n;
108108
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
109109
__storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
110110
if (__b)

libcxx/include/__algorithm/min_element.h

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
#include <__algorithm/comp.h>
1313
#include <__algorithm/comp_ref_type.h>
14+
#include <__algorithm/find.h>
15+
#include <__algorithm/iterator_operations.h>
1416
#include <__config>
1517
#include <__functional/identity.h>
1618
#include <__functional/invoke.h>
@@ -33,12 +35,56 @@ __min_element(_Iter __first, _Sent __last, _Comp __comp, _Proj& __proj) {
3335
if (__first == __last)
3436
return __first;
3537

36-
_Iter __i = __first;
37-
while (++__i != __last)
38-
if (std::__invoke(__comp, std::__invoke(__proj, *__i), std::__invoke(__proj, *__first)))
39-
__first = __i;
38+
const size_t __n = static_cast<size_t>(std::distance(__first, __last));
4039

41-
return __first;
40+
if (__n <= 64) {
41+
_Iter __i = __first;
42+
while (++__i != __last)
43+
if (std::__invoke(__comp, std::__invoke(__proj, *__i), std::__invoke(__proj, *__first)))
44+
__first = __i;
45+
return __first;
46+
}
47+
48+
size_t __block_size = 256;
49+
50+
size_t __n_blocked = __n - (__n % __block_size);
51+
_Iter __block_start = __first, __block_end = __first;
52+
53+
typedef typename std::iterator_traits<_Iter>::value_type value_type;
54+
value_type __min_val = std::__invoke(__proj, *__first);
55+
56+
_Iter __curr = __first;
57+
for (size_t __i = 0; __i < __n_blocked; __i += __block_size) {
58+
_Iter __start = __curr;
59+
value_type __block_min = __min_val;
60+
for (size_t j = 0; j < __block_size; j++) {
61+
if (std::__invoke(__comp, std::__invoke(__proj, *__curr), __block_min)) {
62+
__block_min = *__curr;
63+
}
64+
__curr++;
65+
}
66+
if (std::__invoke(__comp, __block_min, __min_val)) {
67+
__min_val = __block_min;
68+
__block_start = __start;
69+
__block_end = __curr;
70+
}
71+
}
72+
73+
value_type __epilogue_min = __min_val;
74+
_Iter __epilogue_start = __curr;
75+
while (__curr != __last) {
76+
if (std::__invoke(__comp, std::__invoke(__proj, *__curr), __epilogue_min)) {
77+
__epilogue_min = *__curr;
78+
}
79+
__curr++;
80+
}
81+
if (std::__invoke(__comp, __epilogue_min, __min_val)) {
82+
__min_val = __epilogue_min;
83+
__block_start = __epilogue_start;
84+
__block_end = __last;
85+
}
86+
87+
return std::__find(__block_start, __block_end, __min_val, __proj);
4288
}
4389

4490
template <class _Comp, class _Iter, class _Sent>

0 commit comments

Comments
 (0)