Skip to content

Commit 4da5617

Browse files
committed
Squash
1 parent 9c118aa commit 4da5617

File tree

7 files changed

+296
-60
lines changed

7 files changed

+296
-60
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -319,14 +319,22 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
319319
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
320320
def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
321321
def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
322-
def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
323-
def ptestz128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
324-
def ptestc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
325-
def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
322+
def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, "
323+
"_Vector<2,double>, _Constant char)">;
326324
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
327325
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
328326
}
329327

328+
let Features = "sse4.1",
329+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
330+
def ptestz128
331+
: X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
332+
def ptestc128
333+
: X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
334+
def ptestnzc128
335+
: X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
336+
}
337+
330338
let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
331339
def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
332340
def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
@@ -516,8 +524,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
516524
def roundps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
517525
}
518526

519-
520-
let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
527+
let Features = "avx",
528+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
521529
def vtestzpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
522530
def vtestcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
523531
def vtestnzcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
@@ -526,7 +534,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
526534
def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
527535
}
528536

529-
let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
537+
let Features = "avx",
538+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
530539
def vtestzpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
531540
def vtestcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
532541
def vtestnzcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
@@ -536,6 +545,10 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
536545
def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
537546
def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
538547
def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
548+
}
549+
550+
let Features = "avx",
551+
Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
539552
def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
540553
def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
541554
}

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2817,6 +2817,66 @@ static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
28172817
return true;
28182818
}
28192819

2820+
static bool interp__builtin_ia32_test_op(
2821+
InterpState &S, CodePtr OpPC, const CallExpr *Call,
2822+
llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
2823+
const Pointer &RHS = S.Stk.pop<Pointer>();
2824+
const Pointer &LHS = S.Stk.pop<Pointer>();
2825+
2826+
assert(LHS.getNumElems() == RHS.getNumElems());
2827+
assert(LHS.getFieldDesc()->isPrimitiveArray() &&
2828+
RHS.getFieldDesc()->isPrimitiveArray());
2829+
2830+
if (!S.getASTContext().hasSameUnqualifiedType(getElemType(LHS),
2831+
getElemType(RHS)))
2832+
return false;
2833+
2834+
const unsigned SourceLen = LHS.getNumElems();
2835+
const QualType ElemQT = getElemType(LHS);
2836+
const OptPrimType ElemPT = S.getContext().classify(ElemQT);
2837+
2838+
if (ElemQT->isIntegerType()) {
2839+
APInt FirstElem;
2840+
INT_TYPE_SWITCH_NO_BOOL(*ElemPT,
2841+
{ FirstElem = LHS.elem<T>(0).toAPSInt(); });
2842+
const unsigned LaneWidth = FirstElem.getBitWidth();
2843+
2844+
APInt AWide(LaneWidth * SourceLen, 0);
2845+
APInt BWide(LaneWidth * SourceLen, 0);
2846+
2847+
for (unsigned I = 0; I != SourceLen; ++I) {
2848+
APInt ALane;
2849+
APInt BLane;
2850+
INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
2851+
ALane = LHS.elem<T>(I).toAPSInt();
2852+
BLane = RHS.elem<T>(I).toAPSInt();
2853+
});
2854+
AWide.insertBits(ALane, I * LaneWidth);
2855+
BWide.insertBits(BLane, I * LaneWidth);
2856+
}
2857+
pushInteger(S, Fn(AWide, BWide) ? 1 : 0, Call->getType());
2858+
return true;
2859+
} else if (ElemQT->isFloatingType()) {
2860+
APInt ASignBits(SourceLen, 0);
2861+
APInt BSignBits(SourceLen, 0);
2862+
2863+
for (unsigned I = 0; I != SourceLen; ++I) {
2864+
using T = PrimConv<PT_Float>::T;
2865+
APInt ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
2866+
APInt BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
2867+
const unsigned SignBit = ALane.getBitWidth() - 1;
2868+
const bool ALaneSign = ALane[SignBit];
2869+
const bool BLaneSign = BLane[SignBit];
2870+
ASignBits.setBitVal(I, ALaneSign);
2871+
BSignBits.setBitVal(I, BLaneSign);
2872+
}
2873+
pushInteger(S, Fn(ASignBits, BSignBits) ? 1 : 0, Call->getType());
2874+
return true;
2875+
} else { // Must be integer or float type
2876+
return false;
2877+
}
2878+
}
2879+
28202880
static bool interp__builtin_elementwise_triop(
28212881
InterpState &S, CodePtr OpPC, const CallExpr *Call,
28222882
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -3678,7 +3738,34 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
36783738
S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
36793739
return ((APInt)C).isNegative() ? T : F;
36803740
});
3681-
3741+
case X86::BI__builtin_ia32_ptestz128:
3742+
case X86::BI__builtin_ia32_ptestz256:
3743+
case X86::BI__builtin_ia32_vtestzps:
3744+
case X86::BI__builtin_ia32_vtestzps256:
3745+
case X86::BI__builtin_ia32_vtestzpd:
3746+
case X86::BI__builtin_ia32_vtestzpd256:
3747+
return interp__builtin_ia32_test_op(
3748+
S, OpPC, Call,
3749+
[](const APInt &A, const APInt &B) { return (A & B) == 0; });
3750+
case X86::BI__builtin_ia32_ptestc128:
3751+
case X86::BI__builtin_ia32_ptestc256:
3752+
case X86::BI__builtin_ia32_vtestcps:
3753+
case X86::BI__builtin_ia32_vtestcps256:
3754+
case X86::BI__builtin_ia32_vtestcpd:
3755+
case X86::BI__builtin_ia32_vtestcpd256:
3756+
return interp__builtin_ia32_test_op(
3757+
S, OpPC, Call,
3758+
[](const APInt &A, const APInt &B) { return (~A & B) == 0; });
3759+
case X86::BI__builtin_ia32_ptestnzc128:
3760+
case X86::BI__builtin_ia32_ptestnzc256:
3761+
case X86::BI__builtin_ia32_vtestnzcps:
3762+
case X86::BI__builtin_ia32_vtestnzcps256:
3763+
case X86::BI__builtin_ia32_vtestnzcpd:
3764+
case X86::BI__builtin_ia32_vtestnzcpd256:
3765+
return interp__builtin_ia32_test_op(
3766+
S, OpPC, Call, [](const APInt &A, const APInt &B) {
3767+
return ((A & B) != 0) && ((~A & B) != 0);
3768+
});
36823769
case X86::BI__builtin_ia32_selectb_128:
36833770
case X86::BI__builtin_ia32_selectb_256:
36843771
case X86::BI__builtin_ia32_selectb_512:

clang/lib/AST/ExprConstant.cpp

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13766,6 +13766,51 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
1376613766

1376713767
bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1376813768
unsigned BuiltinOp) {
13769+
auto EvalTestOp =
13770+
[&](llvm::function_ref<bool(const APInt &, const APInt &)> Fn) {
13771+
APValue SourceLHS, SourceRHS;
13772+
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
13773+
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
13774+
return false;
13775+
13776+
unsigned SourceLen = SourceLHS.getVectorLength();
13777+
13778+
const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
13779+
QualType ElemQT = VT->getElementType();
13780+
13781+
if (ElemQT->isIntegerType()) {
13782+
const unsigned LaneWidth =
13783+
SourceLHS.getVectorElt(0).getInt().getBitWidth();
13784+
APInt AWide(LaneWidth * SourceLen, 0);
13785+
APInt BWide(LaneWidth * SourceLen, 0);
13786+
13787+
for (unsigned I = 0; I != SourceLen; ++I) {
13788+
APInt ALane = SourceLHS.getVectorElt(I).getInt();
13789+
APInt BLane = SourceRHS.getVectorElt(I).getInt();
13790+
AWide.insertBits(ALane, I * LaneWidth);
13791+
BWide.insertBits(BLane, I * LaneWidth);
13792+
}
13793+
return Success(Fn(AWide, BWide), E);
13794+
13795+
} else if (ElemQT->isFloatingType()) {
13796+
APInt ASignBits(SourceLen, 0);
13797+
APInt BSignBits(SourceLen, 0);
13798+
13799+
for (unsigned I = 0; I != SourceLen; ++I) {
13800+
APInt ALane = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
13801+
APInt BLane = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
13802+
const unsigned SignBit = ALane.getBitWidth() - 1;
13803+
const bool ALaneSign = ALane[SignBit];
13804+
const bool BLaneSign = BLane[SignBit];
13805+
ASignBits.setBitVal(I, ALaneSign);
13806+
BSignBits.setBitVal(I, BLaneSign);
13807+
}
13808+
return Success(Fn(ASignBits, BSignBits), E);
13809+
13810+
} else { // Must be integer or float type
13811+
return false;
13812+
}
13813+
};
1376913814

1377013815
auto HandleMaskBinOp =
1377113816
[&](llvm::function_ref<APSInt(const APSInt &, const APSInt &)> Fn)
@@ -14879,7 +14924,34 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1487914924
Result.setBitVal(P++, Val[I]);
1488014925
return Success(Result, E);
1488114926
}
14882-
14927+
case X86::BI__builtin_ia32_ptestz128:
14928+
case X86::BI__builtin_ia32_ptestz256:
14929+
case X86::BI__builtin_ia32_vtestzps:
14930+
case X86::BI__builtin_ia32_vtestzps256:
14931+
case X86::BI__builtin_ia32_vtestzpd:
14932+
case X86::BI__builtin_ia32_vtestzpd256: {
14933+
return EvalTestOp(
14934+
[](const APInt &A, const APInt &B) { return (A & B) == 0; });
14935+
}
14936+
case X86::BI__builtin_ia32_ptestc128:
14937+
case X86::BI__builtin_ia32_ptestc256:
14938+
case X86::BI__builtin_ia32_vtestcps:
14939+
case X86::BI__builtin_ia32_vtestcps256:
14940+
case X86::BI__builtin_ia32_vtestcpd:
14941+
case X86::BI__builtin_ia32_vtestcpd256: {
14942+
return EvalTestOp(
14943+
[](const APInt &A, const APInt &B) { return (~A & B) == 0; });
14944+
}
14945+
case X86::BI__builtin_ia32_ptestnzc128:
14946+
case X86::BI__builtin_ia32_ptestnzc256:
14947+
case X86::BI__builtin_ia32_vtestnzcps:
14948+
case X86::BI__builtin_ia32_vtestnzcps256:
14949+
case X86::BI__builtin_ia32_vtestnzcpd:
14950+
case X86::BI__builtin_ia32_vtestnzcpd256: {
14951+
return EvalTestOp([](const APInt &A, const APInt &B) {
14952+
return ((A & B) != 0) && ((~A & B) != 0);
14953+
});
14954+
}
1488314955
case X86::BI__builtin_ia32_kandqi:
1488414956
case X86::BI__builtin_ia32_kandhi:
1488514957
case X86::BI__builtin_ia32_kandsi:

0 commit comments

Comments
 (0)