Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 20 additions & 7 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -323,14 +323,22 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
def ptestz128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
def ptestc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, "
"_Vector<2,double>, _Constant char)">;
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
}

let Features = "sse4.1",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def ptestz128
: X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
def ptestc128
: X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
def ptestnzc128
: X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
}

let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
Expand Down Expand Up @@ -520,8 +528,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def roundps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
}


let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vtestzpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
def vtestcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
def vtestnzcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
Expand All @@ -530,7 +538,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
}

let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vtestzpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
def vtestcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
def vtestnzcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
Expand All @@ -540,6 +549,10 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
}

let Features = "avx",
Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
}
Expand Down
68 changes: 67 additions & 1 deletion clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2756,6 +2756,45 @@ static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
return true;
}

static bool interp__builtin_ia32_test_op(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();

assert(LHS.getNumElems() == RHS.getNumElems());

unsigned SourceLen = LHS.getNumElems();
QualType ElemQT = getElemType(LHS);
OptPrimType ElemPT = S.getContext().classify(ElemQT);
unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT);

APInt AWide(LaneWidth * SourceLen, 0);
APInt BWide(LaneWidth * SourceLen, 0);

for (unsigned I = 0; I != SourceLen; ++I) {
APInt ALane;
APInt BLane;

if (ElemQT->isIntegerType()) { // Get value.
INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
ALane = LHS.elem<T>(I).toAPSInt();
BLane = RHS.elem<T>(I).toAPSInt();
});
} else if (ElemQT->isFloatingType()) { // Get only sign bit.
using T = PrimConv<PT_Float>::T;
ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
} else { // Must be integer or floating type.
return false;
}
AWide.insertBits(ALane, I * LaneWidth);
BWide.insertBits(BLane, I * LaneWidth);
}
pushInteger(S, Fn(AWide, BWide), Call->getType());
return true;
}

static bool interp__builtin_elementwise_triop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
Expand Down Expand Up @@ -3712,7 +3751,34 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
return ((APInt)C).isNegative() ? T : F;
});

case X86::BI__builtin_ia32_ptestz128:
case X86::BI__builtin_ia32_ptestz256:
case X86::BI__builtin_ia32_vtestzps:
case X86::BI__builtin_ia32_vtestzps256:
case X86::BI__builtin_ia32_vtestzpd:
case X86::BI__builtin_ia32_vtestzpd256:
return interp__builtin_ia32_test_op(
S, OpPC, Call,
[](const APInt &A, const APInt &B) { return (A & B) == 0; });
case X86::BI__builtin_ia32_ptestc128:
case X86::BI__builtin_ia32_ptestc256:
case X86::BI__builtin_ia32_vtestcps:
case X86::BI__builtin_ia32_vtestcps256:
case X86::BI__builtin_ia32_vtestcpd:
case X86::BI__builtin_ia32_vtestcpd256:
return interp__builtin_ia32_test_op(
S, OpPC, Call,
[](const APInt &A, const APInt &B) { return (~A & B) == 0; });
case X86::BI__builtin_ia32_ptestnzc128:
case X86::BI__builtin_ia32_ptestnzc256:
case X86::BI__builtin_ia32_vtestnzcps:
case X86::BI__builtin_ia32_vtestnzcps256:
case X86::BI__builtin_ia32_vtestnzcpd:
case X86::BI__builtin_ia32_vtestnzcpd256:
return interp__builtin_ia32_test_op(
S, OpPC, Call, [](const APInt &A, const APInt &B) {
return ((A & B) != 0) && ((~A & B) != 0);
});
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
case X86::BI__builtin_ia32_selectb_512:
Expand Down
63 changes: 62 additions & 1 deletion clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13905,6 +13905,40 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,

bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinOp) {
auto EvalTestOp = [&](llvm::function_ref<bool(const APInt &, const APInt &)>
Fn) {
APValue SourceLHS, SourceRHS;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
return false;

unsigned SourceLen = SourceLHS.getVectorLength();
const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
QualType ElemQT = VT->getElementType();
unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT);

APInt AWide(LaneWidth * SourceLen, 0);
APInt BWide(LaneWidth * SourceLen, 0);

for (unsigned I = 0; I != SourceLen; ++I) {
APInt ALane;
APInt BLane;
if (ElemQT->isIntegerType()) { // Get value.
ALane = SourceLHS.getVectorElt(I).getInt();
BLane = SourceRHS.getVectorElt(I).getInt();
} else if (ElemQT->isFloatingType()) { // Get only sign bit.
ALane =
SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt().isNegative();
BLane =
SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt().isNegative();
} else { // Must be integer or floating type.
return false;
}
AWide.insertBits(ALane, I * LaneWidth);
BWide.insertBits(BLane, I * LaneWidth);
}
return Success(Fn(AWide, BWide), E);
};

auto HandleMaskBinOp =
[&](llvm::function_ref<APSInt(const APSInt &, const APSInt &)> Fn)
Expand Down Expand Up @@ -15018,7 +15052,34 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
Result.setBitVal(P++, Val[I]);
return Success(Result, E);
}

case X86::BI__builtin_ia32_ptestz128:
case X86::BI__builtin_ia32_ptestz256:
case X86::BI__builtin_ia32_vtestzps:
case X86::BI__builtin_ia32_vtestzps256:
case X86::BI__builtin_ia32_vtestzpd:
case X86::BI__builtin_ia32_vtestzpd256: {
return EvalTestOp(
[](const APInt &A, const APInt &B) { return (A & B) == 0; });
}
case X86::BI__builtin_ia32_ptestc128:
case X86::BI__builtin_ia32_ptestc256:
case X86::BI__builtin_ia32_vtestcps:
case X86::BI__builtin_ia32_vtestcps256:
case X86::BI__builtin_ia32_vtestcpd:
case X86::BI__builtin_ia32_vtestcpd256: {
return EvalTestOp(
[](const APInt &A, const APInt &B) { return (~A & B) == 0; });
}
case X86::BI__builtin_ia32_ptestnzc128:
case X86::BI__builtin_ia32_ptestnzc256:
case X86::BI__builtin_ia32_vtestnzcps:
case X86::BI__builtin_ia32_vtestnzcps256:
case X86::BI__builtin_ia32_vtestnzcpd:
case X86::BI__builtin_ia32_vtestnzcpd256: {
return EvalTestOp([](const APInt &A, const APInt &B) {
return ((A & B) != 0) && ((~A & B) != 0);
});
}
case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
case X86::BI__builtin_ia32_kandsi:
Expand Down
75 changes: 30 additions & 45 deletions clang/lib/Headers/avxintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2539,9 +2539,8 @@ _mm256_unpacklo_ps(__m256 __a, __m256 __b) {
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns the ZF flag in the EFLAGS register.
static __inline int __DEFAULT_FN_ATTRS128
_mm_testz_pd(__m128d __a, __m128d __b)
{
static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_pd(__m128d __a,
__m128d __b) {
return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
}

Expand All @@ -2568,9 +2567,8 @@ _mm_testz_pd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns the CF flag in the EFLAGS register.
static __inline int __DEFAULT_FN_ATTRS128
_mm_testc_pd(__m128d __a, __m128d __b)
{
static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_pd(__m128d __a,
__m128d __b) {
return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
}

Expand Down Expand Up @@ -2598,9 +2596,8 @@ _mm_testc_pd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
static __inline int __DEFAULT_FN_ATTRS128
_mm_testnzc_pd(__m128d __a, __m128d __b)
{
static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_testnzc_pd(__m128d __a, __m128d __b) {
return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
}

Expand All @@ -2627,9 +2624,8 @@ _mm_testnzc_pd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns the ZF flag.
static __inline int __DEFAULT_FN_ATTRS128
_mm_testz_ps(__m128 __a, __m128 __b)
{
static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_ps(__m128 __a,
__m128 __b) {
return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
}

Expand All @@ -2656,9 +2652,8 @@ _mm_testz_ps(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns the CF flag.
static __inline int __DEFAULT_FN_ATTRS128
_mm_testc_ps(__m128 __a, __m128 __b)
{
static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_ps(__m128 __a,
__m128 __b) {
return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
}

Expand Down Expand Up @@ -2686,9 +2681,8 @@ _mm_testc_ps(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
static __inline int __DEFAULT_FN_ATTRS128
_mm_testnzc_ps(__m128 __a, __m128 __b)
{
static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testnzc_ps(__m128 __a,
__m128 __b) {
return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
}

Expand All @@ -2715,9 +2709,8 @@ _mm_testnzc_ps(__m128 __a, __m128 __b)
/// \param __b
/// A 256-bit vector of [4 x double].
/// \returns the ZF flag.
static __inline int __DEFAULT_FN_ATTRS
_mm256_testz_pd(__m256d __a, __m256d __b)
{
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_pd(__m256d __a,
__m256d __b) {
return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
}

Expand All @@ -2744,9 +2737,8 @@ _mm256_testz_pd(__m256d __a, __m256d __b)
/// \param __b
/// A 256-bit vector of [4 x double].
/// \returns the CF flag.
static __inline int __DEFAULT_FN_ATTRS
_mm256_testc_pd(__m256d __a, __m256d __b)
{
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_pd(__m256d __a,
__m256d __b) {
return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
}

Expand Down Expand Up @@ -2774,9 +2766,8 @@ _mm256_testc_pd(__m256d __a, __m256d __b)
/// \param __b
/// A 256-bit vector of [4 x double].
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
static __inline int __DEFAULT_FN_ATTRS
_mm256_testnzc_pd(__m256d __a, __m256d __b)
{
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_testnzc_pd(__m256d __a, __m256d __b) {
return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
}

Expand All @@ -2803,9 +2794,8 @@ _mm256_testnzc_pd(__m256d __a, __m256d __b)
/// \param __b
/// A 256-bit vector of [8 x float].
/// \returns the ZF flag.
static __inline int __DEFAULT_FN_ATTRS
_mm256_testz_ps(__m256 __a, __m256 __b)
{
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_ps(__m256 __a,
__m256 __b) {
return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
}

Expand All @@ -2832,9 +2822,8 @@ _mm256_testz_ps(__m256 __a, __m256 __b)
/// \param __b
/// A 256-bit vector of [8 x float].
/// \returns the CF flag.
static __inline int __DEFAULT_FN_ATTRS
_mm256_testc_ps(__m256 __a, __m256 __b)
{
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_ps(__m256 __a,
__m256 __b) {
return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
}

Expand Down Expand Up @@ -2862,9 +2851,8 @@ _mm256_testc_ps(__m256 __a, __m256 __b)
/// \param __b
/// A 256-bit vector of [8 x float].
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
static __inline int __DEFAULT_FN_ATTRS
_mm256_testnzc_ps(__m256 __a, __m256 __b)
{
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testnzc_ps(__m256 __a,
__m256 __b) {
return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
}

Expand All @@ -2888,9 +2876,8 @@ _mm256_testnzc_ps(__m256 __a, __m256 __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns the ZF flag.
static __inline int __DEFAULT_FN_ATTRS
_mm256_testz_si256(__m256i __a, __m256i __b)
{
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_testz_si256(__m256i __a, __m256i __b) {
return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
}

Expand All @@ -2914,9 +2901,8 @@ _mm256_testz_si256(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns the CF flag.
static __inline int __DEFAULT_FN_ATTRS
_mm256_testc_si256(__m256i __a, __m256i __b)
{
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_testc_si256(__m256i __a, __m256i __b) {
return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
}

Expand All @@ -2941,9 +2927,8 @@ _mm256_testc_si256(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
static __inline int __DEFAULT_FN_ATTRS
_mm256_testnzc_si256(__m256i __a, __m256i __b)
{
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_testnzc_si256(__m256i __a, __m256i __b) {
return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
}

Expand Down
Loading
Loading