Skip to content

Commit ea857c7

Browse files
AlexGutenievcpplearnerStephanTLavavej
authored
Vectorize find-like algorithms for Clang for more types (#5767)
Co-authored-by: S. B. Tam <[email protected]> Co-authored-by: Stephan T. Lavavej <[email protected]>
1 parent de8c84c commit ea857c7

File tree

4 files changed

+103
-123
lines changed

4 files changed

+103
-123
lines changed

benchmarks/src/find_and_count.cpp

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,21 @@ enum class Op {
2525

2626
using namespace std;
2727

28-
template <class T, template <class> class Alloc, Op Operation>
28+
template <class T, template <class> class Alloc, Op Operation, T FillVal = T{'0'}, T FoundVal = T{'1'}>
2929
void bm(benchmark::State& state) {
3030
const auto size = static_cast<size_t>(state.range(0));
3131
const auto pos = static_cast<size_t>(state.range(1));
3232

3333
using Container =
3434
conditional_t<Operation >= Op::StringFind, basic_string<T, char_traits<T>, Alloc<T>>, vector<T, Alloc<T>>>;
3535

36-
Container a(size, T{'0'});
36+
Container a(size, FillVal);
3737

3838
if (pos < size) {
3939
if constexpr (Operation == Op::StringRFind || Operation == Op::StringFindNotLastOne) {
40-
a[size - pos - 1] = T{'1'};
40+
a[size - pos - 1] = FoundVal;
4141
} else {
42-
a[pos] = T{'1'};
42+
a[pos] = FoundVal;
4343
}
4444
} else {
4545
if constexpr (Operation == Op::FindUnsized) {
@@ -49,19 +49,19 @@ void bm(benchmark::State& state) {
4949

5050
for (auto _ : state) {
5151
if constexpr (Operation == Op::FindSized) {
52-
benchmark::DoNotOptimize(ranges::find(a.begin(), a.end(), T{'1'}));
52+
benchmark::DoNotOptimize(ranges::find(a.begin(), a.end(), FoundVal));
5353
} else if constexpr (Operation == Op::FindUnsized) {
54-
benchmark::DoNotOptimize(ranges::find(a.begin(), unreachable_sentinel, T{'1'}));
54+
benchmark::DoNotOptimize(ranges::find(a.begin(), unreachable_sentinel, FoundVal));
5555
} else if constexpr (Operation == Op::Count) {
56-
benchmark::DoNotOptimize(ranges::count(a.begin(), a.end(), T{'1'}));
56+
benchmark::DoNotOptimize(ranges::count(a.begin(), a.end(), FoundVal));
5757
} else if constexpr (Operation == Op::StringFind) {
58-
benchmark::DoNotOptimize(a.find(T{'1'}));
58+
benchmark::DoNotOptimize(a.find(FoundVal));
5959
} else if constexpr (Operation == Op::StringRFind) {
60-
benchmark::DoNotOptimize(a.rfind(T{'1'}));
60+
benchmark::DoNotOptimize(a.rfind(FoundVal));
6161
} else if constexpr (Operation == Op::StringFindNotFirstOne) {
62-
benchmark::DoNotOptimize(a.find_first_not_of(T{'0'}));
62+
benchmark::DoNotOptimize(a.find_first_not_of(FillVal));
6363
} else if constexpr (Operation == Op::StringFindNotLastOne) {
64-
benchmark::DoNotOptimize(a.find_last_not_of(T{'0'}));
64+
benchmark::DoNotOptimize(a.find_last_not_of(FillVal));
6565
}
6666
}
6767
}
@@ -72,6 +72,13 @@ void common_args(auto bm) {
7272
bm->Args({63, 62})->Args({31, 30})->Args({15, 14})->Args({7, 6});
7373
}
7474

75+
struct point {
76+
int16_t x;
77+
int16_t y;
78+
79+
bool operator==(const point&) const = default;
80+
};
81+
7582
BENCHMARK(bm<uint8_t, not_highly_aligned_allocator, Op::FindSized>)->Apply(common_args);
7683
BENCHMARK(bm<uint8_t, highly_aligned_allocator, Op::FindSized>)->Apply(common_args);
7784
BENCHMARK(bm<uint8_t, not_highly_aligned_allocator, Op::FindUnsized>)->Apply(common_args);
@@ -99,4 +106,7 @@ BENCHMARK(bm<char32_t, not_highly_aligned_allocator, Op::StringFindNotLastOne>)-
99106
BENCHMARK(bm<uint64_t, not_highly_aligned_allocator, Op::FindSized>)->Apply(common_args);
100107
BENCHMARK(bm<uint64_t, not_highly_aligned_allocator, Op::Count>)->Apply(common_args);
101108

109+
BENCHMARK(bm<point, not_highly_aligned_allocator, Op::FindSized>)->Apply(common_args);
110+
BENCHMARK(bm<point, not_highly_aligned_allocator, Op::Count>)->Apply(common_args);
111+
102112
BENCHMARK_MAIN();

stl/inc/algorithm

Lines changed: 15 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -310,43 +310,31 @@ bool _Includes_vectorized(
310310
template <class _Ty, class _TVal1, class _TVal2>
311311
__declspec(noalias) void _Replace_vectorized(
312312
_Ty* const _First, _Ty* const _Last, const _TVal1 _Old_val, const _TVal2 _New_val) noexcept {
313-
if constexpr (is_pointer_v<_Ty>) {
314-
#ifdef _WIN64
315-
::__std_replace_8(_First, _Last, reinterpret_cast<uint64_t>(_Old_val), reinterpret_cast<uint64_t>(_New_val));
316-
#else // ^^^ defined(_WIN64) / !defined(_WIN64) vvv
317-
::__std_replace_4(_First, _Last, reinterpret_cast<uint32_t>(_Old_val), reinterpret_cast<uint32_t>(_New_val));
318-
#endif // ^^^ !defined(_WIN64) ^^^
319-
} else if constexpr (sizeof(_Ty) == 4) {
320-
::__std_replace_4(_First, _Last, static_cast<uint32_t>(_Old_val), static_cast<uint32_t>(_New_val));
313+
if constexpr (sizeof(_Ty) == 4) {
314+
::__std_replace_4(
315+
_First, _Last, _STD _Find_arg_cast<uint32_t>(_Old_val), _STD _Find_arg_cast<uint32_t>(_New_val));
321316
} else if constexpr (sizeof(_Ty) == 8) {
322-
::__std_replace_8(_First, _Last, static_cast<uint64_t>(_Old_val), static_cast<uint64_t>(_New_val));
317+
::__std_replace_8(
318+
_First, _Last, _STD _Find_arg_cast<uint64_t>(_Old_val), _STD _Find_arg_cast<uint64_t>(_New_val));
323319
} else {
324320
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected size
325321
}
326322
}
327323

328324
template <class _Ty, class _TVal>
329325
_Ty* _Search_n_vectorized(_Ty* const _First, _Ty* const _Last, const size_t _Count, const _TVal _Val) noexcept {
330-
if constexpr (is_pointer_v<_Ty>) {
331-
#ifdef _WIN64
332-
return const_cast<_Ty*>(
333-
static_cast<const _Ty*>(::__std_search_n_8(_First, _Last, _Count, reinterpret_cast<uint64_t>(_Val))));
334-
#else // ^^^ defined(_WIN64) / !defined(_WIN64) vvv
335-
return const_cast<_Ty*>(
336-
static_cast<const _Ty*>(::__std_search_n_4(_First, _Last, _Count, reinterpret_cast<uint32_t>(_Val))));
337-
#endif // ^^^ !defined(_WIN64) ^^^
338-
} else if constexpr (sizeof(_Ty) == 1) {
326+
if constexpr (sizeof(_Ty) == 1) {
339327
return const_cast<_Ty*>(
340-
static_cast<const _Ty*>(::__std_search_n_1(_First, _Last, _Count, static_cast<uint8_t>(_Val))));
328+
static_cast<const _Ty*>(::__std_search_n_1(_First, _Last, _Count, _STD _Find_arg_cast<uint8_t>(_Val))));
341329
} else if constexpr (sizeof(_Ty) == 2) {
342330
return const_cast<_Ty*>(
343-
static_cast<const _Ty*>(::__std_search_n_2(_First, _Last, _Count, static_cast<uint16_t>(_Val))));
331+
static_cast<const _Ty*>(::__std_search_n_2(_First, _Last, _Count, _STD _Find_arg_cast<uint16_t>(_Val))));
344332
} else if constexpr (sizeof(_Ty) == 4) {
345333
return const_cast<_Ty*>(
346-
static_cast<const _Ty*>(::__std_search_n_4(_First, _Last, _Count, static_cast<uint32_t>(_Val))));
334+
static_cast<const _Ty*>(::__std_search_n_4(_First, _Last, _Count, _STD _Find_arg_cast<uint32_t>(_Val))));
347335
} else if constexpr (sizeof(_Ty) == 8) {
348336
return const_cast<_Ty*>(
349-
static_cast<const _Ty*>(::__std_search_n_8(_First, _Last, _Count, static_cast<uint64_t>(_Val))));
337+
static_cast<const _Ty*>(::__std_search_n_8(_First, _Last, _Count, _STD _Find_arg_cast<uint64_t>(_Val))));
350338
} else {
351339
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected size
352340
}
@@ -370,20 +358,14 @@ _Ty* _Unique_vectorized(_Ty* const _First, _Ty* const _Last) noexcept {
370358
template <class _Ty, class _TVal>
371359
_Ty* _Remove_copy_vectorized(
372360
const _Ty* const _First, const _Ty* const _Last, _Ty* const _Dest, const _TVal _Val) noexcept {
373-
if constexpr (is_pointer_v<_Ty>) {
374-
#ifdef _WIN64
375-
return reinterpret_cast<_Ty*>(::__std_remove_copy_8(_First, _Last, _Dest, reinterpret_cast<uint64_t>(_Val)));
376-
#else // ^^^ defined(_WIN64) / !defined(_WIN64) vvv
377-
return reinterpret_cast<_Ty*>(::__std_remove_copy_4(_First, _Last, _Dest, reinterpret_cast<uint32_t>(_Val)));
378-
#endif // ^^^ !defined(_WIN64) ^^^
379-
} else if constexpr (sizeof(_Ty) == 1) {
380-
return reinterpret_cast<_Ty*>(::__std_remove_copy_1(_First, _Last, _Dest, static_cast<uint8_t>(_Val)));
361+
if constexpr (sizeof(_Ty) == 1) {
362+
return reinterpret_cast<_Ty*>(::__std_remove_copy_1(_First, _Last, _Dest, _STD _Find_arg_cast<uint8_t>(_Val)));
381363
} else if constexpr (sizeof(_Ty) == 2) {
382-
return reinterpret_cast<_Ty*>(::__std_remove_copy_2(_First, _Last, _Dest, static_cast<uint16_t>(_Val)));
364+
return reinterpret_cast<_Ty*>(::__std_remove_copy_2(_First, _Last, _Dest, _STD _Find_arg_cast<uint16_t>(_Val)));
383365
} else if constexpr (sizeof(_Ty) == 4) {
384-
return reinterpret_cast<_Ty*>(::__std_remove_copy_4(_First, _Last, _Dest, static_cast<uint32_t>(_Val)));
366+
return reinterpret_cast<_Ty*>(::__std_remove_copy_4(_First, _Last, _Dest, _STD _Find_arg_cast<uint32_t>(_Val)));
385367
} else if constexpr (sizeof(_Ty) == 8) {
386-
return reinterpret_cast<_Ty*>(::__std_remove_copy_8(_First, _Last, _Dest, static_cast<uint64_t>(_Val)));
368+
return reinterpret_cast<_Ty*>(::__std_remove_copy_8(_First, _Last, _Dest, _STD _Find_arg_cast<uint64_t>(_Val)));
387369
} else {
388370
_STL_INTERNAL_STATIC_ASSERT(false); // Unexpected size
389371
}

stl/inc/xmemory

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,20 +42,14 @@ void* __stdcall __std_remove_8(void* _First, void* _Last, uint64_t _Val) noexcep
4242
_STD_BEGIN
4343
template <class _Ty, class _TVal>
4444
_Ty* _Remove_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val) noexcept {
45-
if constexpr (is_pointer_v<_Ty>) {
46-
#ifdef _WIN64
47-
return reinterpret_cast<_Ty*>(::__std_remove_8(_First, _Last, reinterpret_cast<uint64_t>(_Val)));
48-
#else
49-
return reinterpret_cast<_Ty*>(::__std_remove_4(_First, _Last, reinterpret_cast<uint32_t>(_Val)));
50-
#endif
51-
} else if constexpr (sizeof(_Ty) == 1) {
52-
return reinterpret_cast<_Ty*>(::__std_remove_1(_First, _Last, static_cast<uint8_t>(_Val)));
45+
if constexpr (sizeof(_Ty) == 1) {
46+
return reinterpret_cast<_Ty*>(::__std_remove_1(_First, _Last, _STD _Find_arg_cast<uint8_t>(_Val)));
5347
} else if constexpr (sizeof(_Ty) == 2) {
54-
return reinterpret_cast<_Ty*>(::__std_remove_2(_First, _Last, static_cast<uint16_t>(_Val)));
48+
return reinterpret_cast<_Ty*>(::__std_remove_2(_First, _Last, _STD _Find_arg_cast<uint16_t>(_Val)));
5549
} else if constexpr (sizeof(_Ty) == 4) {
56-
return reinterpret_cast<_Ty*>(::__std_remove_4(_First, _Last, static_cast<uint32_t>(_Val)));
50+
return reinterpret_cast<_Ty*>(::__std_remove_4(_First, _Last, _STD _Find_arg_cast<uint32_t>(_Val)));
5751
} else if constexpr (sizeof(_Ty) == 8) {
58-
return reinterpret_cast<_Ty*>(::__std_remove_8(_First, _Last, static_cast<uint64_t>(_Val)));
52+
return reinterpret_cast<_Ty*>(::__std_remove_8(_First, _Last, _STD _Find_arg_cast<uint64_t>(_Val)));
5953
} else {
6054
_STL_INTERNAL_STATIC_ASSERT(false); // Unexpected size
6155
}

0 commit comments

Comments
 (0)