Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -4117,15 +4117,15 @@ let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVecto
def selectpd_512 : X86Builtin<"_Vector<8, double>(unsigned char, _Vector<8, double>, _Vector<8, double>)">;
}

let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512fp16", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectsh_128 : X86Builtin<"_Vector<8, _Float16>(unsigned char, _Vector<8, _Float16>, _Vector<8, _Float16>)">;
}

let Features = "avx512bf16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512bf16", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectsbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectss_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">;
def selectsd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">;
}
Expand Down
29 changes: 29 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2838,6 +2838,30 @@ static bool interp__builtin_select(InterpState &S, CodePtr OpPC,
return true;
}

/// Scalar variant of AVX512 predicated select:
/// Result[i] = (Mask bit 0) ? LHS[i] : RHS[i], but only element 0 may change.
/// All other elements are taken from RHS.
static bool interp__builtin_select_scalar(InterpState &S,
const CallExpr *Call) {
unsigned N =
Call->getArg(1)->getType()->getAs<VectorType>()->getNumElements();

const Pointer &W = S.Stk.pop<Pointer>();
const Pointer &A = S.Stk.pop<Pointer>();
APSInt U = popToAPSInt(S, Call->getArg(0));
const Pointer &Dst = S.Stk.peek<Pointer>();

bool TakeA0 = U.getZExtValue() & 1ULL;

for (unsigned I = TakeA0; I != N; ++I)
Dst.elem<Floating>(I) = W.elem<Floating>(I);
if (TakeA0)
Dst.elem<Floating>(0) = A.elem<Floating>(0);

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
APSInt Mask = popToAPSInt(S, Call->getArg(2));
Expand Down Expand Up @@ -4151,6 +4175,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return APInt::getAllOnes(DstBits);
});

case clang::X86::BI__builtin_ia32_selectss_128:
case clang::X86::BI__builtin_ia32_selectsd_128:
case clang::X86::BI__builtin_ia32_selectsh_128:
case clang::X86::BI__builtin_ia32_selectsbf_128:
return interp__builtin_select_scalar(S, Call);
case clang::X86::BI__builtin_ia32_vprotbi:
case clang::X86::BI__builtin_ia32_vprotdi:
case clang::X86::BI__builtin_ia32_vprotqi:
Expand Down
25 changes: 25 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12202,6 +12202,24 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), SourceLen), E);
};

auto EvalSelectScalar = [&](unsigned Len) -> bool {
APSInt Mask;
APValue AVal, WVal;
if (!EvaluateInteger(E->getArg(0), Mask, Info) ||
!EvaluateAsRValue(Info, E->getArg(1), AVal) ||
!EvaluateAsRValue(Info, E->getArg(2), WVal))
return false;

bool TakeA0 = (Mask.getZExtValue() & 1u) != 0;
SmallVector<APValue, 4> Res;
Res.reserve(Len);
Res.push_back(TakeA0 ? AVal.getVectorElt(0) : WVal.getVectorElt(0));
for (unsigned I = 1; I < Len; ++I)
Res.push_back(WVal.getVectorElt(I));
APValue V(Res.data(), Res.size());
return Success(V, E);
};

switch (E->getBuiltinCallee()) {
default:
return false;
Expand Down Expand Up @@ -12505,6 +12523,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return APInt((Src).trunc(DstBits));
return APInt::getAllOnes(DstBits);
});
case clang::X86::BI__builtin_ia32_selectss_128:
return EvalSelectScalar(4);
case clang::X86::BI__builtin_ia32_selectsd_128:
return EvalSelectScalar(2);
case clang::X86::BI__builtin_ia32_selectsh_128:
case clang::X86::BI__builtin_ia32_selectsbf_128:
return EvalSelectScalar(8);
case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
case clang::X86::BI__builtin_ia32_pmuldq512:
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Headers/avx10_2bf16intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,12 +221,12 @@ static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_move_sbh(__m128bh __a,
return __a;
}

static __inline__ __m128bh __DEFAULT_FN_ATTRS128
static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B), __W);
}

static __inline__ __m128bh __DEFAULT_FN_ATTRS128
static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_move_sbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B),
_mm_setzero_pbh());
Expand Down
64 changes: 32 additions & 32 deletions clang/lib/Headers/avx512fintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1820,14 +1820,14 @@ _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A) {
(__v16si)_mm512_setzero_si512());
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_add_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_add_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
Expand All @@ -1850,14 +1850,14 @@ _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_add_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_add_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
Expand Down Expand Up @@ -1935,14 +1935,14 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_add_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps()))

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_sub_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_sub_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
Expand All @@ -1964,14 +1964,14 @@ _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_sub_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_sub_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
Expand Down Expand Up @@ -2050,14 +2050,14 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps()))

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_mul_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_mul_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
Expand All @@ -2079,14 +2079,14 @@ _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_mul_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_mul_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
Expand Down Expand Up @@ -2165,14 +2165,14 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps()))

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_div_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_div_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
Expand All @@ -2195,14 +2195,14 @@ _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_div_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_div_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
Expand Down
Loading
Loading