-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[Clang] Add __builtin_selectvector and use it for AVX512 intrinsics #91306
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3019,6 +3019,26 @@ C-style cast applied to each element of the first argument. | |
|
|
||
| Query for this feature with ``__has_builtin(__builtin_convertvector)``. | ||
|
|
||
| ``__builtin_selectvector`` | ||
| -------------------------- | ||
|
|
||
| ``__builtin_selectvector`` is used to express generic vector element selection. | ||
|
|
||
| **Signature**: | ||
|
|
||
| .. code-block:: c++ | ||
|
|
||
| template <class T, size_t N> | ||
| simd_vec<T, N> __builtin_selectvector(simd_vec<T, N> lhs, simd_vec<T, N> rhs, | ||
| simd_vec<bool, N> cond) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe put |
||
|
|
||
| **Description**: | ||
|
|
||
| The returned vector is equivalent to | ||
| ``simd_vec<T, N>{cond[0] ? rhs[0] : lhs[0], ..., cond[N - 1] ? rhs[N - 1] : lhs[N - 1]}``. | ||
|
|
||
| Query for this feature with ``__has_builtin(__builtin_selectvector)``. | ||
|
|
||
| ``__builtin_bitreverse`` | ||
| ------------------------ | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1176,6 +1176,12 @@ def ConvertVector : Builtin { | |
| let Prototype = "void(...)"; | ||
| } | ||
|
|
||
| def SelectVector : Builtin { | ||
| let Spellings = ["__builtin_selectvector"]; | ||
| let Attributes = [NoThrow, Const, CustomTypeChecking]; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this also be constexpr? |
||
| let Prototype = "void(...)"; | ||
| } | ||
|
|
||
| def AllocaUninitialized : Builtin { | ||
| let Spellings = ["__builtin_alloca_uninitialized"]; | ||
| let Attributes = [FunctionWithBuiltinPrefix, NoThrow]; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3744,6 +3744,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, | |
| return RValue::get(Result); | ||
| } | ||
|
|
||
| case Builtin::BI__builtin_selectvector: { | ||
| return RValue::get(Builder.CreateSelect(EmitScalarExpr(E->getArg(2)), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we check all one (or all zero) like we did in |
||
| EmitScalarExpr(E->getArg(0)), | ||
| EmitScalarExpr(E->getArg(1)))); | ||
| } | ||
|
|
||
| case Builtin::BI__builtin_elementwise_abs: { | ||
| Value *Result; | ||
| QualType QT = E->getArg(0)->getType(); | ||
|
|
@@ -15513,31 +15519,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, | |
| case X86::BI__builtin_ia32_prorvq256: | ||
| case X86::BI__builtin_ia32_prorvq512: | ||
| return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true); | ||
| case X86::BI__builtin_ia32_selectb_128: | ||
| case X86::BI__builtin_ia32_selectb_256: | ||
| case X86::BI__builtin_ia32_selectb_512: | ||
| case X86::BI__builtin_ia32_selectw_128: | ||
| case X86::BI__builtin_ia32_selectw_256: | ||
| case X86::BI__builtin_ia32_selectw_512: | ||
| case X86::BI__builtin_ia32_selectd_128: | ||
| case X86::BI__builtin_ia32_selectd_256: | ||
| case X86::BI__builtin_ia32_selectd_512: | ||
| case X86::BI__builtin_ia32_selectq_128: | ||
| case X86::BI__builtin_ia32_selectq_256: | ||
| case X86::BI__builtin_ia32_selectq_512: | ||
| case X86::BI__builtin_ia32_selectph_128: | ||
| case X86::BI__builtin_ia32_selectph_256: | ||
| case X86::BI__builtin_ia32_selectph_512: | ||
| case X86::BI__builtin_ia32_selectpbf_128: | ||
| case X86::BI__builtin_ia32_selectpbf_256: | ||
| case X86::BI__builtin_ia32_selectpbf_512: | ||
| case X86::BI__builtin_ia32_selectps_128: | ||
| case X86::BI__builtin_ia32_selectps_256: | ||
| case X86::BI__builtin_ia32_selectps_512: | ||
| case X86::BI__builtin_ia32_selectpd_128: | ||
| case X86::BI__builtin_ia32_selectpd_256: | ||
| case X86::BI__builtin_ia32_selectpd_512: | ||
| return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); | ||
| case X86::BI__builtin_ia32_selectsh_128: | ||
| case X86::BI__builtin_ia32_selectsbf_128: | ||
| case X86::BI__builtin_ia32_selectss_128: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -77,9 +77,9 @@ _mm512_cvtne2ps_pbh(__m512 __A, __m512 __B) { | |
| /// conversion of __B, and higher 256 bits come from conversion of __A. | ||
| static __inline__ __m512bh __DEFAULT_FN_ATTRS512 | ||
| _mm512_mask_cvtne2ps_pbh(__m512bh __W, __mmask32 __U, __m512 __A, __m512 __B) { | ||
| return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, | ||
| (__v32bf)_mm512_cvtne2ps_pbh(__A, __B), | ||
| (__v32bf)__W); | ||
| return (__m512bh)__builtin_selectvector( | ||
| (__v32bf)_mm512_cvtne2ps_pbh(__A, __B), (__v32bf)__W, | ||
| __builtin_bit_cast(__vecmask32, __U)); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use (__vecmask32) dirctly like |
||
| } | ||
|
|
||
| /// Convert Two Packed Single Data to One Packed BF16 Data. | ||
|
|
@@ -99,9 +99,9 @@ _mm512_mask_cvtne2ps_pbh(__m512bh __W, __mmask32 __U, __m512 __A, __m512 __B) { | |
| /// conversion of __B, and higher 256 bits come from conversion of __A. | ||
| static __inline__ __m512bh __DEFAULT_FN_ATTRS512 | ||
| _mm512_maskz_cvtne2ps_pbh(__mmask32 __U, __m512 __A, __m512 __B) { | ||
| return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, | ||
| (__v32bf)_mm512_cvtne2ps_pbh(__A, __B), | ||
| (__v32bf)_mm512_setzero_si512()); | ||
| return (__m512bh)__builtin_selectvector( | ||
| (__v32bf)_mm512_cvtne2ps_pbh(__A, __B), (__v32bf)_mm512_setzero_si512(), | ||
| __builtin_bit_cast(__vecmask32, __U)); | ||
| } | ||
|
|
||
| /// Convert Packed Single Data to Packed BF16 Data. | ||
|
|
@@ -200,9 +200,9 @@ _mm512_dpbf16_ps(__m512 __D, __m512bh __A, __m512bh __B) { | |
| /// __A, __B and __D | ||
| static __inline__ __m512 __DEFAULT_FN_ATTRS512 | ||
| _mm512_mask_dpbf16_ps(__m512 __D, __mmask16 __U, __m512bh __A, __m512bh __B) { | ||
| return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, | ||
| (__v16sf)_mm512_dpbf16_ps(__D, __A, __B), | ||
| (__v16sf)__D); | ||
| return (__m512)__builtin_selectvector( | ||
| (__v16sf)_mm512_dpbf16_ps(__D, __A, __B), (__v16sf)__D, | ||
| __builtin_bit_cast(__vecmask16, __U)); | ||
| } | ||
|
|
||
| /// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. | ||
|
|
@@ -224,9 +224,9 @@ _mm512_mask_dpbf16_ps(__m512 __D, __mmask16 __U, __m512bh __A, __m512bh __B) { | |
| /// __A, __B and __D | ||
| static __inline__ __m512 __DEFAULT_FN_ATTRS512 | ||
| _mm512_maskz_dpbf16_ps(__mmask16 __U, __m512 __D, __m512bh __A, __m512bh __B) { | ||
| return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, | ||
| (__v16sf)_mm512_dpbf16_ps(__D, __A, __B), | ||
| (__v16sf)_mm512_setzero_si512()); | ||
| return (__m512)__builtin_selectvector( | ||
| (__v16sf)_mm512_dpbf16_ps(__D, __A, __B), (__v16sf)_mm512_setzero_si512(), | ||
| __builtin_bit_cast(__vecmask16, __U)); | ||
| } | ||
|
|
||
| /// Convert Packed BF16 Data to Packed float Data. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Extend this description to explicitly describe the input/output types and mechanism - don't just rely on the code snippet (although that's a nice accompaniment): The input must all be vectors of the same same number of elements, the 2 first operands must be the same type etc. etc. (basically everything in SemaChecking).