Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -4107,15 +4107,15 @@ let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVecto
def selectpd_512 : X86Builtin<"_Vector<8, double>(unsigned char, _Vector<8, double>, _Vector<8, double>)">;
}

let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512fp16", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectsh_128 : X86Builtin<"_Vector<8, _Float16>(unsigned char, _Vector<8, _Float16>, _Vector<8, _Float16>)">;
}

let Features = "avx512bf16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512bf16", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectsbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectss_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">;
def selectsd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">;
}
Expand Down
26 changes: 26 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2419,6 +2419,27 @@ static bool interp__builtin_elementwise_int_unaryop(
return false;
}

static bool interp__builtin_select_scalar(InterpState &S,
const CallExpr *Call) {
unsigned N =
Call->getArg(1)->getType()->getAs<VectorType>()->getNumElements();

const Pointer &W = S.Stk.pop<Pointer>();
const Pointer &A = S.Stk.pop<Pointer>();
APSInt U = popToAPSInt(S, Call->getArg(0));
const Pointer &Dst = S.Stk.peek<Pointer>();

bool TakeA0 = U.getZExtValue() & 1ULL;

for (unsigned I = TakeA0; I != N; ++I)
Dst.elem<Floating>(I) = W.elem<Floating>(I);
if (TakeA0)
Dst.elem<Floating>(0) = A.elem<Floating>(0);

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_elementwise_int_binop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
Expand Down Expand Up @@ -4121,6 +4142,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return APInt::getAllOnes(DstBits);
});

case clang::X86::BI__builtin_ia32_selectss_128:
case clang::X86::BI__builtin_ia32_selectsd_128:
case clang::X86::BI__builtin_ia32_selectsh_128:
case clang::X86::BI__builtin_ia32_selectsbf_128:
return interp__builtin_select_scalar(S, Call);
case clang::X86::BI__builtin_ia32_vprotbi:
case clang::X86::BI__builtin_ia32_vprotdi:
case clang::X86::BI__builtin_ia32_vprotqi:
Expand Down
25 changes: 25 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12202,6 +12202,24 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), SourceLen), E);
};

auto EvalSelectScalar = [&](unsigned Len) -> bool {
APSInt Mask;
APValue AVal, WVal;
if (!EvaluateInteger(E->getArg(0), Mask, Info) ||
!EvaluateAsRValue(Info, E->getArg(1), AVal) ||
!EvaluateAsRValue(Info, E->getArg(2), WVal))
return false;

bool TakeA0 = (Mask.getZExtValue() & 1u) != 0;
SmallVector<APValue, 4> Res;
Res.reserve(Len);
Res.push_back(TakeA0 ? AVal.getVectorElt(0) : WVal.getVectorElt(0));
for (unsigned i = 1; i < Len; ++i)
Res.push_back(WVal.getVectorElt(i));
APValue V(Res.data(), Res.size());
return Success(V, E);
};

switch (E->getBuiltinCallee()) {
default:
return false;
Expand Down Expand Up @@ -12505,6 +12523,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return APInt((Src).trunc(DstBits));
return APInt::getAllOnes(DstBits);
});
case clang::X86::BI__builtin_ia32_selectss_128:
return EvalSelectScalar(4);
case clang::X86::BI__builtin_ia32_selectsd_128:
return EvalSelectScalar(2);
case clang::X86::BI__builtin_ia32_selectsh_128:
case clang::X86::BI__builtin_ia32_selectsbf_128:
return EvalSelectScalar(8);
case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
case clang::X86::BI__builtin_ia32_pmuldq512:
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Headers/avx10_2bf16intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,12 +221,12 @@ static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_move_sbh(__m128bh __a,
return __a;
}

static __inline__ __m128bh __DEFAULT_FN_ATTRS128
static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B), __W);
}

static __inline__ __m128bh __DEFAULT_FN_ATTRS128
static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_move_sbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B),
_mm_setzero_pbh());
Expand Down
64 changes: 32 additions & 32 deletions clang/lib/Headers/avx512fintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1834,14 +1834,14 @@ _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A) {
(__v16si)_mm512_setzero_si512());
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_add_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_add_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
Expand All @@ -1864,14 +1864,14 @@ _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_add_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_add_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
Expand Down Expand Up @@ -1949,14 +1949,14 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_add_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps()))

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_sub_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_sub_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
Expand All @@ -1978,14 +1978,14 @@ _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_sub_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_sub_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
Expand Down Expand Up @@ -2064,14 +2064,14 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps()))

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_mul_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_mul_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
Expand All @@ -2093,14 +2093,14 @@ _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_mul_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_mul_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
Expand Down Expand Up @@ -2179,14 +2179,14 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps()))

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_div_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__A = _mm_div_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
Expand All @@ -2209,14 +2209,14 @@ _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_div_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__A = _mm_div_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
Expand Down
Loading
Loading