Skip to content
54 changes: 36 additions & 18 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -1272,81 +1272,99 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
def knotdi : X86Builtin<"unsigned long long int(unsigned long long int)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl,avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
def cmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl,avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl,avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
def cmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl,avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
def cmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl,avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def ucmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def ucmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
def ucmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl,avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def ucmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl,avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def ucmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def ucmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
def ucmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl,avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def ucmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def ucmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def ucmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
def ucmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def ucmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
}

Expand Down
92 changes: 92 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3101,6 +3101,68 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
return true;
}

static bool evalICmpImm(const uint8_t imm, const llvm::APSInt &A,
const llvm::APSInt &B, bool IsUnsigned) {
switch (imm & 0x7) {
case 0x00:
return (A == B);
break;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(style) no break after a return

case 0x01:
return IsUnsigned ? A.ult(B) : A.slt(B);
break;
case 0x02:
return IsUnsigned ? A.ule(B) : A.sle(B);
break;
case 0x03:
return false;
break;
case 0x04:
return (A != B);
break;
case 0x05:
return IsUnsigned ? A.ugt(B) : A.sgt(B);
break;
case 0x06:
return IsUnsigned ? A.uge(B) : A.sge(B);
break;
case 0x07:
return true;
break;
default:
llvm_unreachable("Invalid Op");
}
}

static bool interp__builtin_cmp_mask(InterpState &S, CodePtr OpPC,
const CallExpr *Call, unsigned ID,
bool IsUnsigned) {
assert(Call->getNumArgs() == 4);

APSInt Mask = popToAPSInt(S, Call->getArg(3));
APSInt Opcode = popToAPSInt(S, Call->getArg(2));
unsigned CmpOp = static_cast<unsigned>(Opcode.getZExtValue());
const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();

assert(LHS.getNumElems() == RHS.getNumElems());

APInt RetMask = APInt::getZero(LHS.getNumElems());
unsigned VectorLen = LHS.getNumElems();
PrimType ElemT = LHS.getFieldDesc()->getPrimType();

for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
RetMask.setBitVal(ElemNum,
Mask[ElemNum] &&
evalICmpImm(CmpOp, LHS.elem<T>(ElemNum).toAPSInt(),
RHS.elem<T>(ElemNum).toAPSInt(),
IsUnsigned));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just do

APSInt A;
APSInt B;
TYPE_SWITCH_NO_BOOL(ElemT, {A = LHS.elem<T>().toAPSInt(); B = LHS.elem<T>().toAPSINt(); });
Rest of the code

?

Copy link
Contributor Author

@sskzakaria sskzakaria Oct 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought I should keep A and B const ref and :

INT_TYPE_SWITCH_NO_BOOL(ElemT, {
  const APSInt &A = ...;
  const APSInt &B = ...;
});
bool Result = false;
...

Woudnt work, do I just remove the const ref and do that?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, they don't have to be const ref here, it's just a convention.

});
}
pushInteger(S, RetMask, Call->getType());
return true;
}

static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
assert(Call->getNumArgs() == 1);
Expand Down Expand Up @@ -4141,6 +4203,36 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_vec_set_v4di:
return interp__builtin_vec_set(S, OpPC, Call, BuiltinID);

case X86::BI__builtin_ia32_cmpb128_mask:
case X86::BI__builtin_ia32_cmpw128_mask:
case X86::BI__builtin_ia32_cmpd128_mask:
case X86::BI__builtin_ia32_cmpq128_mask:
case X86::BI__builtin_ia32_cmpb256_mask:
case X86::BI__builtin_ia32_cmpw256_mask:
case X86::BI__builtin_ia32_cmpd256_mask:
case X86::BI__builtin_ia32_cmpq256_mask:
case X86::BI__builtin_ia32_cmpb512_mask:
case X86::BI__builtin_ia32_cmpw512_mask:
case X86::BI__builtin_ia32_cmpd512_mask:
case X86::BI__builtin_ia32_cmpq512_mask:
return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID,
/*IsUnsigned=*/false);

case X86::BI__builtin_ia32_ucmpb128_mask:
case X86::BI__builtin_ia32_ucmpw128_mask:
case X86::BI__builtin_ia32_ucmpd128_mask:
case X86::BI__builtin_ia32_ucmpq128_mask:
case X86::BI__builtin_ia32_ucmpb256_mask:
case X86::BI__builtin_ia32_ucmpw256_mask:
case X86::BI__builtin_ia32_ucmpd256_mask:
case X86::BI__builtin_ia32_ucmpq256_mask:
case X86::BI__builtin_ia32_ucmpb512_mask:
case X86::BI__builtin_ia32_ucmpw512_mask:
case X86::BI__builtin_ia32_ucmpd512_mask:
case X86::BI__builtin_ia32_ucmpq512_mask:
return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID,
/*IsUnsigned=*/true);

default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
Expand Down
84 changes: 84 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15449,6 +15449,90 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1));
return Success(Vec.getVectorElt(Idx).getInt(), E);
}

case clang::X86::BI__builtin_ia32_cmpb128_mask:
case clang::X86::BI__builtin_ia32_cmpw128_mask:
case clang::X86::BI__builtin_ia32_cmpd128_mask:
case clang::X86::BI__builtin_ia32_cmpq128_mask:
case clang::X86::BI__builtin_ia32_cmpb256_mask:
case clang::X86::BI__builtin_ia32_cmpw256_mask:
case clang::X86::BI__builtin_ia32_cmpd256_mask:
case clang::X86::BI__builtin_ia32_cmpq256_mask:
case clang::X86::BI__builtin_ia32_cmpb512_mask:
case clang::X86::BI__builtin_ia32_cmpw512_mask:
case clang::X86::BI__builtin_ia32_cmpd512_mask:
case clang::X86::BI__builtin_ia32_cmpq512_mask:
case clang::X86::BI__builtin_ia32_ucmpb128_mask:
case clang::X86::BI__builtin_ia32_ucmpw128_mask:
case clang::X86::BI__builtin_ia32_ucmpd128_mask:
case clang::X86::BI__builtin_ia32_ucmpq128_mask:
case clang::X86::BI__builtin_ia32_ucmpb256_mask:
case clang::X86::BI__builtin_ia32_ucmpw256_mask:
case clang::X86::BI__builtin_ia32_ucmpd256_mask:
case clang::X86::BI__builtin_ia32_ucmpq256_mask:
case clang::X86::BI__builtin_ia32_ucmpb512_mask:
case clang::X86::BI__builtin_ia32_ucmpw512_mask:
case clang::X86::BI__builtin_ia32_ucmpd512_mask:
case clang::X86::BI__builtin_ia32_ucmpq512_mask: {
assert(E->getNumArgs() == 4);

bool IsUnsigned =
(BuiltinOp >= clang::X86::BI__builtin_ia32_ucmpb128_mask &&
BuiltinOp <= clang::X86::BI__builtin_ia32_ucmpq512_mask);

APValue LHS, RHS;
APSInt Mask, Opcode;
if (!EvaluateVector(E->getArg(0), LHS, Info) ||
!EvaluateVector(E->getArg(1), RHS, Info) ||
!EvaluateInteger(E->getArg(2), Opcode, Info) ||
!EvaluateInteger(E->getArg(3), Mask, Info))
return false;

assert(LHS.getVectorLength() == RHS.getVectorLength());

unsigned VectorLen = LHS.getVectorLength();
unsigned RetWidth = VectorLen ? VectorLen : 1;
if (Mask.getBitWidth() > RetWidth)
RetWidth = Mask.getBitWidth();

APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
const APSInt &A = LHS.getVectorElt(ElemNum).getInt();
const APSInt &B = RHS.getVectorElt(ElemNum).getInt();
bool result = false;

switch (Opcode.getExtValue() & 0x7) {
case 0: // _MM_CMPINT_EQ
result = (A == B);
break;
case 1: // _MM_CMPINT_LT
result = IsUnsigned ? A.ult(B) : A.slt(B);
break;
case 2: // _MM_CMPINT_LE
result = IsUnsigned ? A.ule(B) : A.sle(B);
break;
case 3: // _MM_CMPINT_FALSE
result = false;
break;
case 4: // _MM_CMPINT_NE
result = (A != B);
break;
case 5: // _MM_CMPINT_NLT (>=)
result = IsUnsigned ? A.uge(B) : A.sge(B);
break;
case 6: // _MM_CMPINT_NLE (>)
result = IsUnsigned ? A.ugt(B) : A.sgt(B);
break;
case 7: // _MM_CMPINT_TRUE
result = true;
break;
}

RetMask.setBitVal(ElemNum, Mask[ElemNum] && result);
}

return Success(APValue(RetMask), E);
}
}
}

Expand Down
20 changes: 8 additions & 12 deletions clang/lib/Headers/avx512vlbwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2385,22 +2385,19 @@ _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
(__mmask32) __U);
}

static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
_mm_test_epi8_mask (__m128i __A, __m128i __B)
{
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_test_epi8_mask(__m128i __A, __m128i __B) {
return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128());
}

static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
{
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B),
_mm_setzero_si128());
}

static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
_mm256_test_epi8_mask (__m256i __A, __m256i __B)
{
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_test_epi8_mask(__m256i __A, __m256i __B) {
return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B),
_mm256_setzero_si256());
}
Expand Down Expand Up @@ -2439,9 +2436,8 @@ _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
_mm256_setzero_si256());
}

static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
_mm_testn_epi8_mask (__m128i __A, __m128i __B)
{
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_testn_epi8_mask(__m128i __A, __m128i __B) {
return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
}

Expand Down
Loading