Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions clang/docs/LanguageExtensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3019,6 +3019,26 @@ C-style cast applied to each element of the first argument.
Query for this feature with ``__has_builtin(__builtin_convertvector)``.
``__builtin_selectvector``
--------------------------
``__builtin_selectvector`` is used to express generic vector element selection.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extend this description to explicitly describe the input/output types and mechanism - don't just rely on the code snippet (although that's a nice accompaniment): The input must all be vectors of the same same number of elements, the 2 first operands must be the same type etc. etc. (basically everything in SemaChecking).

**Signature**:
.. code-block:: c++
template <class T, size_t N>
simd_vec<T, N> __builtin_selectvector(simd_vec<T, N> lhs, simd_vec<T, N> rhs,
simd_vec<bool, N> cond)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe put cond the first operand to match with select and the old X86 builtins?

**Description**:
The returned vector is equivalent to
``simd_vec<T, N>{cond[0] ? rhs[0] : lhs[0], ..., cond[N - 1] ? rhs[N - 1] : lhs[N - 1]}``.
Query for this feature with ``__has_builtin(__builtin_selectvector)``.
``__builtin_bitreverse``
------------------------
Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -1176,6 +1176,12 @@ def ConvertVector : Builtin {
let Prototype = "void(...)";
}

def SelectVector : Builtin {
let Spellings = ["__builtin_selectvector"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this also be constexpr?

let Prototype = "void(...)";
}

def AllocaUninitialized : Builtin {
let Spellings = ["__builtin_alloca_uninitialized"];
let Attributes = [FunctionWithBuiltinPrefix, NoThrow];
Expand Down
24 changes: 0 additions & 24 deletions clang/include/clang/Basic/BuiltinsX86.def
Original file line number Diff line number Diff line change
Expand Up @@ -1973,30 +1973,6 @@ TARGET_BUILTIN(__builtin_ia32_vfcmulcph256_mask, "V8fV8fV8fV8fUc", "ncV:256:",
TARGET_BUILTIN(__builtin_ia32_vfcmulcph512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16,evex512")

// generic select intrinsics
TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectb_256, "V32cUiV32cV32c", "ncV:256:", "avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectb_512, "V64cUOiV64cV64c", "ncV:512:", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_selectw_128, "V8sUcV8sV8s", "ncV:128:", "avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectw_256, "V16sUsV16sV16s", "ncV:256:", "avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectw_512, "V32sUiV32sV32s", "ncV:512:", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_selectd_128, "V4iUcV4iV4i", "ncV:128:", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectd_256, "V8iUcV8iV8i", "ncV:256:", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectd_512, "V16iUsV16iV16i", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_selectph_128, "V8xUcV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectph_256, "V16xUsV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectph_512, "V32xUiV32xV32x", "ncV:512:", "avx512fp16,evex512")
TARGET_BUILTIN(__builtin_ia32_selectpbf_128, "V8yUcV8yV8y", "ncV:128:", "avx512bf16,avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectpbf_256, "V16yUsV16yV16y", "ncV:256:", "avx512bf16,avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectpbf_512, "V32yUiV32yV32y", "ncV:512:", "avx512bf16,evex512")
TARGET_BUILTIN(__builtin_ia32_selectq_128, "V2OiUcV2OiV2Oi", "ncV:128:", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectq_256, "V4OiUcV4OiV4Oi", "ncV:256:", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectq_512, "V8OiUcV8OiV8Oi", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_selectps_128, "V4fUcV4fV4f", "ncV:128:", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectps_256, "V8fUcV8fV8f", "ncV:256:", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectps_512, "V16fUsV16fV16f", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_selectpd_128, "V2dUcV2dV2d", "ncV:128:", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectpd_256, "V4dUcV4dV4d", "ncV:256:", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectpd_512, "V8dUcV8dV8d", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_selectsh_128, "V8xUcV8xV8x", "ncV:128:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_selectsbf_128, "V8yUcV8yV8y", "ncV:128:", "avx512bf16")
TARGET_BUILTIN(__builtin_ia32_selectss_128, "V4fUcV4fV4f", "ncV:128:", "avx512f")
Expand Down
4 changes: 3 additions & 1 deletion clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -12035,7 +12035,9 @@ def err_builtin_invalid_arg_type: Error <
"a floating point type|"
"a vector of integers|"
"an unsigned integer|"
"an 'int'}1 (was %2)">;
"an 'int'|"
"a vector of bools"
"}1 (was %2)">;

def err_builtin_matrix_disabled: Error<
"matrix types extension is disabled. Pass -fenable-matrix to enable it">;
Expand Down
31 changes: 6 additions & 25 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3744,6 +3744,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Result);
}

case Builtin::BI__builtin_selectvector: {
return RValue::get(Builder.CreateSelect(EmitScalarExpr(E->getArg(2)),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we check all one (or all zero) like we did in EmitX86Select?

EmitScalarExpr(E->getArg(0)),
EmitScalarExpr(E->getArg(1))));
}

case Builtin::BI__builtin_elementwise_abs: {
Value *Result;
QualType QT = E->getArg(0)->getType();
Expand Down Expand Up @@ -15513,31 +15519,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_prorvq256:
case X86::BI__builtin_ia32_prorvq512:
return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
case X86::BI__builtin_ia32_selectb_512:
case X86::BI__builtin_ia32_selectw_128:
case X86::BI__builtin_ia32_selectw_256:
case X86::BI__builtin_ia32_selectw_512:
case X86::BI__builtin_ia32_selectd_128:
case X86::BI__builtin_ia32_selectd_256:
case X86::BI__builtin_ia32_selectd_512:
case X86::BI__builtin_ia32_selectq_128:
case X86::BI__builtin_ia32_selectq_256:
case X86::BI__builtin_ia32_selectq_512:
case X86::BI__builtin_ia32_selectph_128:
case X86::BI__builtin_ia32_selectph_256:
case X86::BI__builtin_ia32_selectph_512:
case X86::BI__builtin_ia32_selectpbf_128:
case X86::BI__builtin_ia32_selectpbf_256:
case X86::BI__builtin_ia32_selectpbf_512:
case X86::BI__builtin_ia32_selectps_128:
case X86::BI__builtin_ia32_selectps_256:
case X86::BI__builtin_ia32_selectps_512:
case X86::BI__builtin_ia32_selectpd_128:
case X86::BI__builtin_ia32_selectpd_256:
case X86::BI__builtin_ia32_selectpd_512:
return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
case X86::BI__builtin_ia32_selectsh_128:
case X86::BI__builtin_ia32_selectsbf_128:
case X86::BI__builtin_ia32_selectss_128:
Expand Down
24 changes: 12 additions & 12 deletions clang/lib/Headers/avx512bf16intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ _mm512_cvtne2ps_pbh(__m512 __A, __m512 __B) {
/// conversion of __B, and higher 256 bits come from conversion of __A.
static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_mask_cvtne2ps_pbh(__m512bh __W, __mmask32 __U, __m512 __A, __m512 __B) {
return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U,
(__v32bf)_mm512_cvtne2ps_pbh(__A, __B),
(__v32bf)__W);
return (__m512bh)__builtin_selectvector(
(__v32bf)_mm512_cvtne2ps_pbh(__A, __B), (__v32bf)__W,
__builtin_bit_cast(__vecmask32, __U));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use (__vecmask32) dirctly like __v32bf etc?

}

/// Convert Two Packed Single Data to One Packed BF16 Data.
Expand All @@ -99,9 +99,9 @@ _mm512_mask_cvtne2ps_pbh(__m512bh __W, __mmask32 __U, __m512 __A, __m512 __B) {
/// conversion of __B, and higher 256 bits come from conversion of __A.
static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtne2ps_pbh(__mmask32 __U, __m512 __A, __m512 __B) {
return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U,
(__v32bf)_mm512_cvtne2ps_pbh(__A, __B),
(__v32bf)_mm512_setzero_si512());
return (__m512bh)__builtin_selectvector(
(__v32bf)_mm512_cvtne2ps_pbh(__A, __B), (__v32bf)_mm512_setzero_si512(),
__builtin_bit_cast(__vecmask32, __U));
}

/// Convert Packed Single Data to Packed BF16 Data.
Expand Down Expand Up @@ -200,9 +200,9 @@ _mm512_dpbf16_ps(__m512 __D, __m512bh __A, __m512bh __B) {
/// __A, __B and __D
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_dpbf16_ps(__m512 __D, __mmask16 __U, __m512bh __A, __m512bh __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_dpbf16_ps(__D, __A, __B),
(__v16sf)__D);
return (__m512)__builtin_selectvector(
(__v16sf)_mm512_dpbf16_ps(__D, __A, __B), (__v16sf)__D,
__builtin_bit_cast(__vecmask16, __U));
}

/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
Expand All @@ -224,9 +224,9 @@ _mm512_mask_dpbf16_ps(__m512 __D, __mmask16 __U, __m512bh __A, __m512bh __B) {
/// __A, __B and __D
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_dpbf16_ps(__mmask16 __U, __m512 __D, __m512bh __A, __m512bh __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_dpbf16_ps(__D, __A, __B),
(__v16sf)_mm512_setzero_si512());
return (__m512)__builtin_selectvector(
(__v16sf)_mm512_dpbf16_ps(__D, __A, __B), (__v16sf)_mm512_setzero_si512(),
__builtin_bit_cast(__vecmask16, __U));
}

/// Convert Packed BF16 Data to Packed float Data.
Expand Down
12 changes: 6 additions & 6 deletions clang/lib/Headers/avx512bitalgintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ _mm512_popcnt_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B)
{
return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U,
(__v32hi) _mm512_popcnt_epi16(__B),
(__v32hi) __A);
return (__m512i)__builtin_selectvector((__v32hi)_mm512_popcnt_epi16(__B),
(__v32hi)__A,
__builtin_bit_cast(__vecmask32, __U));
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
Expand All @@ -51,9 +51,9 @@ _mm512_popcnt_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B)
{
return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U,
(__v64qi) _mm512_popcnt_epi8(__B),
(__v64qi) __A);
return (__m512i)__builtin_selectvector((__v64qi)_mm512_popcnt_epi8(__B),
(__v64qi)__A,
__builtin_bit_cast(__vecmask64, __U));
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
Expand Down
Loading