-
Notifications
You must be signed in to change notification settings - Fork 41
WIP: clang build fixes and workarounds #16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 2 commits
a364375
f2d541e
166d341
497120c
959de34
d50057f
4b22866
2faf842
62e261f
f27600d
6a484db
c594c4a
dd6f6a2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -59,6 +59,12 @@ using __m512d [[__gnu__::__vector_size__(64)]] = double; | |
| using __m512i [[__gnu__::__vector_size__(64)]] = long long; | ||
| #endif | ||
|
|
||
| #if __clang__ | ||
| template<typename T> auto __builtin_ia32_ps256_ps (T x) { return __builtin_shufflevector(x, _mm_setzero_ps() , 0, 1, 2, 3, 4, 4, 4, 4); } | ||
| template<typename T> auto __builtin_ia32_ps512_ps (T x) { return __builtin_shufflevector(x, _mm_setzero_ps() , 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4); } | ||
| template<typename T> auto __builtin_ia32_ps512_256ps(T x) { return __builtin_shufflevector(x, _mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8); } | ||
| #endif | ||
|
|
||
| // __next_power_of_2{{{ | ||
| /** | ||
| * \internal | ||
|
|
@@ -178,7 +184,7 @@ using __value_type_or_identity_t | |
| // }}} | ||
| // __is_vectorizable {{{ | ||
| template <typename _Tp> | ||
| struct __is_vectorizable : public std::is_arithmetic<_Tp> | ||
| struct __is_vectorizable : public std::is_arithmetic<std::remove_reference_t<_Tp>> | ||
| { | ||
| }; | ||
| template <> struct __is_vectorizable<bool> : public false_type | ||
|
|
@@ -1039,7 +1045,7 @@ template <size_t _Np, bool _Sanitized> struct _BitMask | |
| "not implemented for bitmasks larger than one ullong"); | ||
| if constexpr (_NewSize == 1) // must sanitize because the return _Tp is bool | ||
| return _SanitizedBitMask<1>{ | ||
| {static_cast<bool>(_M_bits[0] & (_Tp(1) << _DropLsb))}}; | ||
| (static_cast<bool>(_M_bits[0] & (_Tp(1) << _DropLsb)))}; | ||
| else | ||
| return _BitMask<_NewSize, | ||
| ((_NewSize + _DropLsb == sizeof(_Tp) * CHAR_BIT | ||
|
|
@@ -1285,7 +1291,7 @@ struct __vector_type_n<_Tp, _Np, | |
| static constexpr size_t _Bytes = _Np * sizeof(_Tp) < __min_vector_size<_Tp> | ||
| ? __min_vector_size<_Tp> | ||
| : __next_power_of_2(_Np * sizeof(_Tp)); | ||
| using type [[__gnu__::__vector_size__(_Bytes)]] = _Tp; | ||
| using type [[__gnu__::__vector_size__(_Bytes)]] = std::remove_reference_t<_Tp>; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. again, |
||
| }; | ||
|
|
||
| template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)> | ||
|
|
@@ -2068,7 +2074,7 @@ struct __intrinsic_type< | |
| static constexpr std::size_t _VBytes | ||
| = _Bytes <= 16 ? 16 : _Bytes <= 32 ? 32 : 64; | ||
| using type [[__gnu__::__vector_size__(_VBytes)]] | ||
| = std::conditional_t<std::is_integral_v<_Tp>, long long int, _Tp>; | ||
| = std::conditional_t<std::is_integral_v<std::remove_reference_t<_Tp>>, long long int, std::remove_reference_t<_Tp>>; | ||
| }; | ||
| #endif // _GLIBCXX_SIMD_HAVE_SSE | ||
|
|
||
|
|
@@ -3559,8 +3565,7 @@ split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x) | |
|
|
||
| // }}} | ||
| // split<_Sizes...>(simd) {{{ | ||
| template <size_t... _Sizes, typename _Tp, typename _Ap, | ||
| typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>> | ||
| template <size_t... _Sizes, typename _Tp, typename _Ap, typename> | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. removing the SFINAE condition breaks the spec
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SFINAE is there in declaration. Here it is definition. clang says it is redefinition of default argument
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Declaration is at line 3314
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK thanks. I'll take a look |
||
| _GLIBCXX_SIMD_ALWAYS_INLINE | ||
| std::tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...> | ||
| split(const simd<_Tp, _Ap>& __x) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -624,7 +624,7 @@ __convert_all(_From __v) | |
| return __vector_bitcast<_FromT, decltype(__n)::value>(__vv); | ||
| }; | ||
| [[maybe_unused]] const auto __vi = __to_intrin(__v); | ||
| auto&& __make_array = [](std::initializer_list<auto> __xs) { | ||
| auto&& __make_array = [](auto __xs) { | ||
|
||
| return __call_with_subscripts( | ||
| __xs.begin(), std::make_index_sequence<_Np>(), | ||
| [](auto... __ys) { return _R{__vector_bitcast<_ToT>(__ys)...}; }); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4225,7 +4225,7 @@ struct _MaskImplX86 : _MaskImplX86Mixin, _MaskImplBuiltin<_Abi> | |
| __m128i __a = {}; | ||
| __builtin_memcpy(&__a, __mem, 16); | ||
| const auto __b = _mm512_cvtepi8_epi32(__a); | ||
| __builtin_memcpy(&__a, __mem + 16, size<_Tp> - 16); | ||
| __builtin_memcpy(&__a, static_cast<const char*>(__mem) + 16, size<_Tp> - 16); | ||
| const auto __c = _mm512_cvtepi8_epi32(__a); | ||
| return _mm512_test_epi32_mask(__b, __b) | ||
| | (_mm512_test_epi32_mask(__c, __c) << 16); | ||
|
|
@@ -4235,21 +4235,21 @@ struct _MaskImplX86 : _MaskImplX86Mixin, _MaskImplBuiltin<_Abi> | |
| __m128i __a = {}; | ||
| __builtin_memcpy(&__a, __mem, 16); | ||
| const auto __b = _mm512_cvtepi8_epi32(__a); | ||
| __builtin_memcpy(&__a, __mem + 16, 16); | ||
| __builtin_memcpy(&__a, static_cast<const char*>(__mem) + 16, 16); | ||
| const auto __c = _mm512_cvtepi8_epi32(__a); | ||
| if constexpr (size<_Tp> <= 48) | ||
| { | ||
| __builtin_memcpy(&__a, __mem + 32, size<_Tp> - 32); | ||
| __builtin_memcpy(&__a, static_cast<const char*>(__mem) + 32, size<_Tp> - 32); | ||
| const auto __d = _mm512_cvtepi8_epi32(__a); | ||
| return _mm512_test_epi32_mask(__b, __b) | ||
| | (_mm512_test_epi32_mask(__c, __c) << 16) | ||
| | (_ULLong(_mm512_test_epi32_mask(__d, __d)) << 32); | ||
| } | ||
| else | ||
| { | ||
| __builtin_memcpy(&__a, __mem + 16, 32); | ||
| __builtin_memcpy(&__a, static_cast<const char*>(__mem) + 16, 32); | ||
|
||
| const auto __d = _mm512_cvtepi8_epi32(__a); | ||
| __builtin_memcpy(&__a, __mem + 32, size<_Tp> - 48); | ||
| __builtin_memcpy(&__a, static_cast<const char*>(__mem) + 32, size<_Tp> - 48); | ||
| const auto __e = _mm512_cvtepi8_epi32(__a); | ||
| return _mm512_test_epi32_mask(__b, __b) | ||
| | (_mm512_test_epi32_mask(__c, __c) << 16) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is this needed?
_Tpshould never be a reference. And references are not vectorizable. (Pointers might be - needs a proposal)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
clang by some reason returns long long&& out of operator[] in my example:
https://godbolt.org/z/WJN7_M
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then whatever calls
__is_vectorizable<decltype(x[0])>is incorrect.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, thus a number of remove_references will be required to workaround that thing across other places.