Skip to content

Commit 7dff4d7

Browse files
Add algorithm-specific detection macros for vectorization (#5801)
1 parent e21d834 commit 7dff4d7

File tree

8 files changed

+399
-198
lines changed

8 files changed

+399
-198
lines changed

stl/inc/__msvc_string_view.hpp

Lines changed: 47 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ _STL_DISABLE_CLANG_WARNINGS
1919
#pragma push_macro("new")
2020
#undef new
2121

22-
#if _USE_STD_VECTOR_ALGORITHMS
2322
extern "C" {
2423
// The "noalias" attribute tells the compiler optimizer that pointers going into these hand-vectorized algorithms
2524
// won't be stored beyond the lifetime of the function, and that the function will only reference arrays denoted by
@@ -29,6 +28,7 @@ extern "C" {
2928
// compiler has to assume that the denoted arrays are "globally address taken", and that any later calls to
3029
// unanalyzable routines may modify those arrays.
3130

31+
#if _VECTORIZED_FIND_FIRST_OF
3232
__declspec(noalias) size_t __stdcall __std_find_first_of_trivial_pos_1(
3333
const void* _Haystack, size_t _Haystack_length, const void* _Needle, size_t _Needle_length) noexcept;
3434
__declspec(noalias) size_t __stdcall __std_find_first_of_trivial_pos_2(
@@ -37,17 +37,23 @@ __declspec(noalias) size_t __stdcall __std_find_first_of_trivial_pos_4(
3737
const void* _Haystack, size_t _Haystack_length, const void* _Needle, size_t _Needle_length) noexcept;
3838
__declspec(noalias) size_t __stdcall __std_find_first_of_trivial_pos_8(
3939
const void* _Haystack, size_t _Haystack_length, const void* _Needle, size_t _Needle_length) noexcept;
40+
#endif // ^^^ _VECTORIZED_FIND_FIRST_OF ^^^
4041

42+
#if _VECTORIZED_FIND_LAST_OF
4143
__declspec(noalias) size_t __stdcall __std_find_last_of_trivial_pos_1(
4244
const void* _Haystack, size_t _Haystack_length, const void* _Needle, size_t _Needle_length) noexcept;
4345
__declspec(noalias) size_t __stdcall __std_find_last_of_trivial_pos_2(
4446
const void* _Haystack, size_t _Haystack_length, const void* _Needle, size_t _Needle_length) noexcept;
47+
#endif // ^^^ _VECTORIZED_FIND_LAST_OF ^^^
4548

49+
#if _VECTORIZED_FIND
4650
const void* __stdcall __std_find_not_ch_1(const void* _First, const void* _Last, uint8_t _Val) noexcept;
4751
const void* __stdcall __std_find_not_ch_2(const void* _First, const void* _Last, uint16_t _Val) noexcept;
4852
const void* __stdcall __std_find_not_ch_4(const void* _First, const void* _Last, uint32_t _Val) noexcept;
4953
const void* __stdcall __std_find_not_ch_8(const void* _First, const void* _Last, uint64_t _Val) noexcept;
54+
#endif // ^^^ _VECTORIZED_FIND ^^^
5055

56+
#if _VECTORIZED_FIND_LAST
5157
__declspec(noalias) size_t __stdcall __std_find_last_not_ch_pos_1(
5258
const void* _First, const void* _Last, uint8_t _Val) noexcept;
5359
__declspec(noalias) size_t __stdcall __std_find_last_not_ch_pos_2(
@@ -56,21 +62,27 @@ __declspec(noalias) size_t __stdcall __std_find_last_not_ch_pos_4(
5662
const void* _First, const void* _Last, uint32_t _Val) noexcept;
5763
__declspec(noalias) size_t __stdcall __std_find_last_not_ch_pos_8(
5864
const void* _First, const void* _Last, uint64_t _Val) noexcept;
65+
#endif // ^^^ _VECTORIZED_FIND_LAST ^^^
5966

67+
#if _VECTORIZED_FIND_FIRST_OF
6068
__declspec(noalias) size_t __stdcall __std_find_first_not_of_trivial_pos_1(
6169
const void* _Haystack, size_t _Haystack_length, const void* _Needle, size_t _Needle_length) noexcept;
6270
__declspec(noalias) size_t __stdcall __std_find_first_not_of_trivial_pos_2(
6371
const void* _Haystack, size_t _Haystack_length, const void* _Needle, size_t _Needle_length) noexcept;
72+
#endif // ^^^ _VECTORIZED_FIND_FIRST_OF ^^^
6473

74+
#if _VECTORIZED_FIND_LAST_OF
6575
__declspec(noalias) size_t __stdcall __std_find_last_not_of_trivial_pos_1(
6676
const void* _Haystack, size_t _Haystack_length, const void* _Needle, size_t _Needle_length) noexcept;
6777
__declspec(noalias) size_t __stdcall __std_find_last_not_of_trivial_pos_2(
6878
const void* _Haystack, size_t _Haystack_length, const void* _Needle, size_t _Needle_length) noexcept;
79+
#endif // ^^^ _VECTORIZED_FIND_LAST_OF ^^^
6980

7081
} // extern "C"
7182

7283
_STD_BEGIN
7384

85+
#if _VECTORIZED_FIND_FIRST_OF
7486
template <class _Ty1, class _Ty2>
7587
size_t _Find_first_of_pos_vectorized(const _Ty1* const _Haystack, const size_t _Haystack_length,
7688
const _Ty2* const _Needle, const size_t _Needle_length) noexcept {
@@ -87,7 +99,9 @@ size_t _Find_first_of_pos_vectorized(const _Ty1* const _Haystack, const size_t _
8799
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected size
88100
}
89101
}
102+
#endif // ^^^ _VECTORIZED_FIND_FIRST_OF ^^^
90103

104+
#if _VECTORIZED_FIND_LAST_OF
91105
template <class _Ty1, class _Ty2>
92106
size_t _Find_last_of_pos_vectorized(const _Ty1* const _Haystack, const size_t _Haystack_length,
93107
const _Ty2* const _Needle, const size_t _Needle_length) noexcept {
@@ -100,7 +114,9 @@ size_t _Find_last_of_pos_vectorized(const _Ty1* const _Haystack, const size_t _H
100114
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected size
101115
}
102116
}
117+
#endif // ^^^ _VECTORIZED_FIND_LAST_OF ^^^
103118

119+
#if _VECTORIZED_FIND
104120
template <class _Ty>
105121
const _Ty* _Find_not_ch_vectorized(const _Ty* const _First, const _Ty* const _Last, const _Ty _Ch) noexcept {
106122
if constexpr (sizeof(_Ty) == 1) {
@@ -115,7 +131,9 @@ const _Ty* _Find_not_ch_vectorized(const _Ty* const _First, const _Ty* const _La
115131
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected size
116132
}
117133
}
134+
#endif // ^^^ _VECTORIZED_FIND ^^^
118135

136+
#if _VECTORIZED_FIND_LAST
119137
template <class _Ty>
120138
size_t _Find_last_not_ch_pos_vectorized(const _Ty* const _First, const _Ty* const _Last, const _Ty _Ch) noexcept {
121139
if constexpr (sizeof(_Ty) == 1) {
@@ -130,6 +148,9 @@ size_t _Find_last_not_ch_pos_vectorized(const _Ty* const _First, const _Ty* cons
130148
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected size
131149
}
132150
}
151+
#endif // ^^^ _VECTORIZED_FIND_LAST ^^^
152+
153+
#if _VECTORIZED_FIND_FIRST_OF
133154
template <class _Ty1, class _Ty2>
134155
size_t _Find_first_not_of_pos_vectorized(const _Ty1* const _Haystack, const size_t _Haystack_length,
135156
const _Ty2* const _Needle, const size_t _Needle_length) noexcept {
@@ -142,7 +163,9 @@ size_t _Find_first_not_of_pos_vectorized(const _Ty1* const _Haystack, const size
142163
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected size
143164
}
144165
}
166+
#endif // ^^^ _VECTORIZED_FIND_FIRST_OF ^^^
145167

168+
#if _VECTORIZED_FIND_LAST_OF
146169
template <class _Ty1, class _Ty2>
147170
size_t _Find_last_not_of_pos_vectorized(const _Ty1* const _Haystack, const size_t _Haystack_length,
148171
const _Ty2* const _Needle, const size_t _Needle_length) noexcept {
@@ -155,12 +178,8 @@ size_t _Find_last_not_of_pos_vectorized(const _Ty1* const _Haystack, const size_
155178
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected size
156179
}
157180
}
181+
#endif // ^^^ _VECTORIZED_FIND_LAST_OF ^^^
158182

159-
_STD_END
160-
161-
#endif // _USE_STD_VECTOR_ALGORITHMS
162-
163-
_STD_BEGIN
164183
#ifdef __clang__
165184
#define _HAS_MEMCPY_MEMMOVE_INTRINSICS 1
166185
#else // ^^^ use __builtin_memcpy and __builtin_memmove / use workaround vvv
@@ -360,7 +379,7 @@ struct _WChar_traits : private _Char_traits<_Elem, unsigned short> {
360379
_NODISCARD static _CONSTEXPR17 int compare(_In_reads_(_Count) const _Elem* const _First1,
361380
_In_reads_(_Count) const _Elem* const _First2, const size_t _Count) noexcept /* strengthened */ {
362381
// compare [_First1, _First1 + _Count) with [_First2, ...)
363-
#if _USE_STD_VECTOR_ALGORITHMS
382+
#if _VECTORIZED_MISMATCH
364383
if (!_STD _Is_constant_evaluated()) {
365384
// TRANSITION, GH-2289: Use vectorized algorithms for better performance than __builtin_wmemcmp.
366385
const size_t _Pos = _Mismatch_vectorized<sizeof(_Elem)>(_First1, _First2, _Count);
@@ -370,7 +389,7 @@ struct _WChar_traits : private _Char_traits<_Elem, unsigned short> {
370389
return _First1[_Pos] < _First2[_Pos] ? -1 : +1;
371390
}
372391
}
373-
#endif // ^^^ _USE_STD_VECTOR_ALGORITHMS ^^^
392+
#endif // ^^^ _VECTORIZED_MISMATCH ^^^
374393

375394
if constexpr (is_same_v<_Elem, wchar_t>) {
376395
return __builtin_wmemcmp(_First1, _First2, _Count);
@@ -728,7 +747,7 @@ constexpr size_t _Traits_find(_In_reads_(_Hay_size) const _Traits_ptr_t<_Traits>
728747
return _Start_at;
729748
}
730749

731-
#if _USE_STD_VECTOR_ALGORITHMS
750+
#if _VECTORIZED_SEARCH
732751
if constexpr (_Is_implementation_handled_char_traits<_Traits>) {
733752
if (!_STD _Is_constant_evaluated()) {
734753
const auto _End = _Haystack + _Hay_size;
@@ -741,7 +760,7 @@ constexpr size_t _Traits_find(_In_reads_(_Hay_size) const _Traits_ptr_t<_Traits>
741760
}
742761
}
743762
}
744-
#endif // _USE_STD_VECTOR_ALGORITHMS
763+
#endif // ^^^ _VECTORIZED_SEARCH ^^^
745764

746765
const auto _Possible_matches_end = _Haystack + (_Hay_size - _Needle_size) + 1;
747766
for (auto _Match_try = _Haystack + _Start_at;; ++_Match_try) {
@@ -764,7 +783,7 @@ constexpr size_t _Traits_find_ch(_In_reads_(_Hay_size) const _Traits_ptr_t<_Trai
764783
return static_cast<size_t>(-1); // (npos) no room for match
765784
}
766785

767-
#if _USE_STD_VECTOR_ALGORITHMS
786+
#if _VECTORIZED_FIND
768787
if constexpr (_Is_implementation_handled_char_traits<_Traits>) {
769788
if (!_STD _Is_constant_evaluated()) {
770789
const auto _End = _Haystack + _Hay_size;
@@ -777,7 +796,7 @@ constexpr size_t _Traits_find_ch(_In_reads_(_Hay_size) const _Traits_ptr_t<_Trai
777796
}
778797
}
779798
}
780-
#endif // _USE_STD_VECTOR_ALGORITHMS
799+
#endif // ^^^ _VECTORIZED_FIND ^^^
781800

782801
const auto _Found_at = _Traits::find(_Haystack + _Start_at, _Hay_size - _Start_at, _Ch);
783802
if (_Found_at) {
@@ -802,7 +821,7 @@ constexpr size_t _Traits_rfind(_In_reads_(_Hay_size) const _Traits_ptr_t<_Traits
802821

803822
const size_t _Actual_start_at = (_STD min) (_Start_at, _Hay_size - _Needle_size);
804823

805-
#if _USE_STD_VECTOR_ALGORITHMS
824+
#if _VECTORIZED_FIND_END
806825
if constexpr (_Is_implementation_handled_char_traits<_Traits>) {
807826
if (!_STD _Is_constant_evaluated()) {
808827
// _Find_end_vectorized takes into account the needle length when locating the search start.
@@ -819,7 +838,7 @@ constexpr size_t _Traits_rfind(_In_reads_(_Hay_size) const _Traits_ptr_t<_Traits
819838
}
820839
}
821840
}
822-
#endif // _USE_STD_VECTOR_ALGORITHMS
841+
#endif // ^^^ _VECTORIZED_FIND_END ^^^
823842

824843
for (auto _Match_try = _Haystack + _Actual_start_at;; --_Match_try) {
825844
if (_Traits::eq(*_Match_try, *_Needle) && _Traits::compare(_Match_try, _Needle, _Needle_size) == 0) {
@@ -843,7 +862,7 @@ constexpr size_t _Traits_rfind_ch(_In_reads_(_Hay_size) const _Traits_ptr_t<_Tra
843862

844863
const size_t _Actual_start_at = (_STD min) (_Start_at, _Hay_size - 1);
845864

846-
#if _USE_STD_VECTOR_ALGORITHMS
865+
#if _VECTORIZED_FIND_LAST
847866
if constexpr (_Is_implementation_handled_char_traits<_Traits>) {
848867
if (!_STD _Is_constant_evaluated()) {
849868
const auto _End = _Haystack + _Actual_start_at + 1;
@@ -856,7 +875,7 @@ constexpr size_t _Traits_rfind_ch(_In_reads_(_Hay_size) const _Traits_ptr_t<_Tra
856875
}
857876
}
858877
}
859-
#endif // _USE_STD_VECTOR_ALGORITHMS
878+
#endif // ^^^ _VECTORIZED_FIND_LAST ^^^
860879

861880
for (auto _Match_try = _Haystack + _Actual_start_at;; --_Match_try) {
862881
if (_Traits::eq(*_Match_try, _Ch)) {
@@ -934,7 +953,7 @@ constexpr size_t _Traits_find_first_of(_In_reads_(_Hay_size) const _Traits_ptr_t
934953
const auto _Hay_end = _Haystack + _Hay_size;
935954

936955
if constexpr (_Is_implementation_handled_char_traits<_Traits>) {
937-
#if _USE_STD_VECTOR_ALGORITHMS
956+
#if _VECTORIZED_FIND_FIRST_OF
938957
if (!_STD _Is_constant_evaluated()) {
939958
const size_t _Remaining_size = _Hay_size - _Start_at;
940959
if (_Remaining_size + _Needle_size >= _Threshold_find_first_of) {
@@ -945,7 +964,7 @@ constexpr size_t _Traits_find_first_of(_In_reads_(_Hay_size) const _Traits_ptr_t
945964
return _Pos;
946965
}
947966
}
948-
#endif // _USE_STD_VECTOR_ALGORITHMS
967+
#endif // ^^^ _VECTORIZED_FIND_FIRST_OF ^^^
949968

950969
_String_bitmap<typename _Traits::char_type> _Matches;
951970

@@ -983,7 +1002,7 @@ constexpr size_t _Traits_find_last_of(_In_reads_(_Hay_size) const _Traits_ptr_t<
9831002

9841003
if constexpr (_Is_implementation_handled_char_traits<_Traits>) {
9851004
using _Elem = typename _Traits::char_type;
986-
#if _USE_STD_VECTOR_ALGORITHMS
1005+
#if _VECTORIZED_FIND_LAST_OF
9871006
if constexpr (sizeof(_Elem) <= 2) {
9881007
if (!_STD _Is_constant_evaluated()) {
9891008
const size_t _Remaining_size = _Hay_start + 1;
@@ -992,7 +1011,7 @@ constexpr size_t _Traits_find_last_of(_In_reads_(_Hay_size) const _Traits_ptr_t<
9921011
}
9931012
}
9941013
}
995-
#endif // _USE_STD_VECTOR_ALGORITHMS
1014+
#endif // ^^^ _VECTORIZED_FIND_LAST_OF ^^^
9961015

9971016
_String_bitmap<_Elem> _Matches;
9981017
if (_Matches._Mark(_Needle, _Needle + _Needle_size)) {
@@ -1035,7 +1054,7 @@ constexpr size_t _Traits_find_first_not_of(_In_reads_(_Hay_size) const _Traits_p
10351054

10361055
if constexpr (_Is_implementation_handled_char_traits<_Traits>) {
10371056
using _Elem = typename _Traits::char_type;
1038-
#if _USE_STD_VECTOR_ALGORITHMS
1057+
#if _VECTORIZED_FIND_FIRST_OF
10391058
if constexpr (sizeof(_Elem) <= 2) {
10401059
if (!_STD _Is_constant_evaluated()) {
10411060
const size_t _Remaining_size = _Hay_size - _Start_at;
@@ -1048,7 +1067,7 @@ constexpr size_t _Traits_find_first_not_of(_In_reads_(_Hay_size) const _Traits_p
10481067
}
10491068
}
10501069
}
1051-
#endif // _USE_STD_VECTOR_ALGORITHMS
1070+
#endif // ^^^ _VECTORIZED_FIND_FIRST_OF ^^^
10521071

10531072
_String_bitmap<_Elem> _Matches;
10541073
if (_Matches._Mark(_Needle, _Needle + _Needle_size)) {
@@ -1082,7 +1101,7 @@ constexpr size_t _Traits_find_not_ch(_In_reads_(_Hay_size) const _Traits_ptr_t<_
10821101

10831102
const auto _End = _Haystack + _Hay_size;
10841103

1085-
#if _USE_STD_VECTOR_ALGORITHMS
1104+
#if _VECTORIZED_FIND
10861105
if constexpr (_Is_implementation_handled_char_traits<_Traits>) {
10871106
if (!_STD _Is_constant_evaluated()) {
10881107
const auto _Result = _STD _Find_not_ch_vectorized(_Haystack + _Start_at, _End, _Ch);
@@ -1093,7 +1112,7 @@ constexpr size_t _Traits_find_not_ch(_In_reads_(_Hay_size) const _Traits_ptr_t<_
10931112
}
10941113
}
10951114
}
1096-
#endif // _USE_STD_VECTOR_ALGORITHMS
1115+
#endif // ^^^ _VECTORIZED_FIND ^^^
10971116

10981117
for (auto _Match_try = _Haystack + _Start_at; _Match_try < _End; ++_Match_try) {
10991118
if (!_Traits::eq(*_Match_try, _Ch)) {
@@ -1117,7 +1136,7 @@ constexpr size_t _Traits_find_last_not_of(_In_reads_(_Hay_size) const _Traits_pt
11171136

11181137
if constexpr (_Is_implementation_handled_char_traits<_Traits>) {
11191138
using _Elem = typename _Traits::char_type;
1120-
#if _USE_STD_VECTOR_ALGORITHMS
1139+
#if _VECTORIZED_FIND_LAST_OF
11211140
if constexpr (sizeof(_Elem) <= 2) {
11221141
if (!_STD _Is_constant_evaluated()) {
11231142
const size_t _Remaining_size = _Hay_start + 1;
@@ -1126,7 +1145,7 @@ constexpr size_t _Traits_find_last_not_of(_In_reads_(_Hay_size) const _Traits_pt
11261145
}
11271146
}
11281147
}
1129-
#endif // _USE_STD_VECTOR_ALGORITHMS
1148+
#endif // ^^^ _VECTORIZED_FIND_LAST_OF ^^^
11301149

11311150
_String_bitmap<_Elem> _Matches;
11321151
if (_Matches._Mark(_Needle, _Needle + _Needle_size)) {
@@ -1165,13 +1184,13 @@ constexpr size_t _Traits_rfind_not_ch(_In_reads_(_Hay_size) const _Traits_ptr_t<
11651184

11661185
const size_t _Actual_start_at = (_STD min) (_Start_at, _Hay_size - 1);
11671186

1168-
#if _USE_STD_VECTOR_ALGORITHMS
1187+
#if _VECTORIZED_FIND_LAST
11691188
if constexpr (_Is_implementation_handled_char_traits<_Traits>) {
11701189
if (!_STD _Is_constant_evaluated()) {
11711190
return _STD _Find_last_not_ch_pos_vectorized(_Haystack, _Haystack + _Actual_start_at + 1, _Ch);
11721191
}
11731192
}
1174-
#endif // _USE_STD_VECTOR_ALGORITHMS
1193+
#endif // ^^^ _VECTORIZED_FIND_LAST ^^^
11751194

11761195
for (auto _Match_try = _Haystack + _Actual_start_at;; --_Match_try) {
11771196
if (!_Traits::eq(*_Match_try, _Ch)) {

0 commit comments

Comments
 (0)