Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 36 additions & 18 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -1272,81 +1272,99 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
def knotdi : X86Builtin<"unsigned long long int(unsigned long long int)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl,avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
def cmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl,avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl,avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
def cmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl,avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
def cmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl,avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def ucmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def ucmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
def ucmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl,avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def ucmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl,avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def ucmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def ucmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
def ucmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl,avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def ucmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def ucmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def ucmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
def ucmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def ucmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
}

Expand Down
99 changes: 99 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3101,6 +3101,75 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
return true;
}

static bool interp__builtin_cmp_mask(InterpState &S, CodePtr OpPC,
const CallExpr *Call, unsigned ID,
bool IsUnsigned) {
assert(Call->getNumArgs() == 4);

APSInt Mask = popToAPSInt(S, Call->getArg(3));
APSInt Opcode = popToAPSInt(S, Call->getArg(2));
const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();

assert(LHS.getNumElems() == RHS.getNumElems());

APInt RetMask = APInt::getZero(LHS.getNumElems());
unsigned VectorLen = LHS.getNumElems();
PrimType ElemT = LHS.getFieldDesc()->getPrimType();

for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
APSInt B = RHS.elem<T>(ElemNum).toAPSInt();
bool Result = false;
switch (Opcode.getExtValue() & 0x7) {
case 0x00: // _MM_CMPINT_EQ
Result = (LHS.elem<T>(ElemNum).toAPSInt() ==
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull out the repeated elem(ElemNum).toAPSInt() calls to top of the loop - that should reduce the size of this code considerably.

RHS.elem<T>(ElemNum).toAPSInt());
break;
case 0x01: // _MM_CMPINT_LT
Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().ult(
RHS.elem<T>(ElemNum).toAPSInt())
: LHS.elem<T>(ElemNum).toAPSInt().slt(
RHS.elem<T>(ElemNum).toAPSInt());
break;
case 0x02: // _MM_CMPINT_LE
Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().ule(
RHS.elem<T>(ElemNum).toAPSInt())
: LHS.elem<T>(ElemNum).toAPSInt().sle(
RHS.elem<T>(ElemNum).toAPSInt());
break;
case 0x03: // _MM_CMPINT_FALSE
Result = false;
break;
case 0x04: // _MM_CMPINT_NE
Result = (LHS.elem<T>(ElemNum).toAPSInt() !=
RHS.elem<T>(ElemNum).toAPSInt());
break;
case 0x05: // _MM_CMPINT_NLT (>=)
Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().uge(
RHS.elem<T>(ElemNum).toAPSInt())
: LHS.elem<T>(ElemNum).toAPSInt().sge(
RHS.elem<T>(ElemNum).toAPSInt());
break;
case 0x06: // _MM_CMPINT_NLE (>)
Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().ugt(
RHS.elem<T>(ElemNum).toAPSInt())
: LHS.elem<T>(ElemNum).toAPSInt().sgt(
RHS.elem<T>(ElemNum).toAPSInt());
break;
case 0x07: // _MM_CMPINT_TRUE
Result = true;
break;
}

RetMask.setBitVal(ElemNum, Mask[ElemNum] && Result);
});
}

pushInteger(S, RetMask, Call->getType());
return true;
}

static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
assert(Call->getNumArgs() == 1);
Expand Down Expand Up @@ -4141,6 +4210,36 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_vec_set_v4di:
return interp__builtin_vec_set(S, OpPC, Call, BuiltinID);

case X86::BI__builtin_ia32_cmpb128_mask:
case X86::BI__builtin_ia32_cmpw128_mask:
case X86::BI__builtin_ia32_cmpd128_mask:
case X86::BI__builtin_ia32_cmpq128_mask:
case X86::BI__builtin_ia32_cmpb256_mask:
case X86::BI__builtin_ia32_cmpw256_mask:
case X86::BI__builtin_ia32_cmpd256_mask:
case X86::BI__builtin_ia32_cmpq256_mask:
case X86::BI__builtin_ia32_cmpb512_mask:
case X86::BI__builtin_ia32_cmpw512_mask:
case X86::BI__builtin_ia32_cmpd512_mask:
case X86::BI__builtin_ia32_cmpq512_mask:
return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID,
/*IsUnsigned=*/false);

case X86::BI__builtin_ia32_ucmpb128_mask:
case X86::BI__builtin_ia32_ucmpw128_mask:
case X86::BI__builtin_ia32_ucmpd128_mask:
case X86::BI__builtin_ia32_ucmpq128_mask:
case X86::BI__builtin_ia32_ucmpb256_mask:
case X86::BI__builtin_ia32_ucmpw256_mask:
case X86::BI__builtin_ia32_ucmpd256_mask:
case X86::BI__builtin_ia32_ucmpq256_mask:
case X86::BI__builtin_ia32_ucmpb512_mask:
case X86::BI__builtin_ia32_ucmpw512_mask:
case X86::BI__builtin_ia32_ucmpd512_mask:
case X86::BI__builtin_ia32_ucmpq512_mask:
return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID,
/*IsUnsigned=*/true);

default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
Expand Down
85 changes: 85 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15449,6 +15449,91 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1));
return Success(Vec.getVectorElt(Idx).getInt(), E);
}

case clang::X86::BI__builtin_ia32_cmpb128_mask:
case clang::X86::BI__builtin_ia32_cmpw128_mask:
case clang::X86::BI__builtin_ia32_cmpd128_mask:
case clang::X86::BI__builtin_ia32_cmpq128_mask:
case clang::X86::BI__builtin_ia32_cmpb256_mask:
case clang::X86::BI__builtin_ia32_cmpw256_mask:
case clang::X86::BI__builtin_ia32_cmpd256_mask:
case clang::X86::BI__builtin_ia32_cmpq256_mask:
case clang::X86::BI__builtin_ia32_cmpb512_mask:
case clang::X86::BI__builtin_ia32_cmpw512_mask:
case clang::X86::BI__builtin_ia32_cmpd512_mask:
case clang::X86::BI__builtin_ia32_cmpq512_mask:
case clang::X86::BI__builtin_ia32_ucmpb128_mask:
case clang::X86::BI__builtin_ia32_ucmpw128_mask:
case clang::X86::BI__builtin_ia32_ucmpd128_mask:
case clang::X86::BI__builtin_ia32_ucmpq128_mask:
case clang::X86::BI__builtin_ia32_ucmpb256_mask:
case clang::X86::BI__builtin_ia32_ucmpw256_mask:
case clang::X86::BI__builtin_ia32_ucmpd256_mask:
case clang::X86::BI__builtin_ia32_ucmpq256_mask:
case clang::X86::BI__builtin_ia32_ucmpb512_mask:
case clang::X86::BI__builtin_ia32_ucmpw512_mask:
case clang::X86::BI__builtin_ia32_ucmpd512_mask:
case clang::X86::BI__builtin_ia32_ucmpq512_mask: {
assert(E->getNumArgs() == 4);

bool IsUnsigned =
(BuiltinOp >= clang::X86::BI__builtin_ia32_ucmpb128_mask &&
BuiltinOp <= clang::X86::BI__builtin_ia32_ucmpq512_mask);

APValue LHS, RHS;
APSInt Mask, Opcode;
if (!EvaluateVector(E->getArg(0), LHS, Info) ||
!EvaluateVector(E->getArg(1), RHS, Info) ||
!EvaluateInteger(E->getArg(2), Opcode, Info) ||
!EvaluateInteger(E->getArg(3), Mask, Info))
return false;

assert(LHS.getVectorLength() == RHS.getVectorLength());

unsigned VectorLen = LHS.getVectorLength();
unsigned RetWidth = VectorLen ? VectorLen : 1;
if (Mask.getBitWidth() > RetWidth)
RetWidth = Mask.getBitWidth();

APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
APSInt A = LHS.getVectorElt(ElemNum).getInt();
APSInt B = RHS.getVectorElt(ElemNum).getInt();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

      const APSInt &A = LHS.getVectorElt(ElemNum).getInt();
      const APSInt &B = RHS.getVectorElt(ElemNum).getInt();

bool result = false;

switch (Opcode.getExtValue() & 0x7) {
case 0: // _MM_CMPINT_EQ
result = (A == B);
break;
case 1: // _MM_CMPINT_LT
result = IsUnsigned ? A.ult(B) : A.slt(B);
break;
case 2: // _MM_CMPINT_LE
result = IsUnsigned ? A.ule(B) : A.sle(B);
break;
case 3: // _MM_CMPINT_FALSE
result = false;
break;
case 4: // _MM_CMPINT_NE
result = (A != B);
break;
case 5: // _MM_CMPINT_NLT (>=)
result = IsUnsigned ? A.uge(B) : A.sge(B);
break;
case 6: // _MM_CMPINT_NLE (>)
result = IsUnsigned ? A.ugt(B) : A.sgt(B);
break;
case 7: // _MM_CMPINT_TRUE
result = true;
break;
}

RetMask.setBitVal(ElemNum, Mask[ElemNum] && result);
}

RetMask.setIsUnsigned(true);
return Success(APValue(RetMask), E);
}
}
}

Expand Down
20 changes: 8 additions & 12 deletions clang/lib/Headers/avx512vlbwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2385,22 +2385,19 @@ _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
(__mmask32) __U);
}

static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
_mm_test_epi8_mask (__m128i __A, __m128i __B)
{
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_test_epi8_mask(__m128i __A, __m128i __B) {
return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128());
}

static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
{
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B),
_mm_setzero_si128());
}

static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
_mm256_test_epi8_mask (__m256i __A, __m256i __B)
{
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_test_epi8_mask(__m256i __A, __m256i __B) {
return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B),
_mm256_setzero_si256());
}
Expand Down Expand Up @@ -2439,9 +2436,8 @@ _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
_mm256_setzero_si256());
}

static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
_mm_testn_epi8_mask (__m128i __A, __m128i __B)
{
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_testn_epi8_mask(__m128i __A, __m128i __B) {
return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
}

Expand Down
Loading
Loading