Skip to content

Commit 5e29b5b

Browse files
sskzakariagithub-actions[bot]
authored andcommitted
Automerge: [X86][Clang] Add AVX512 Integer Comparison Intrinsics for constexpr Evaluation (#164026)
Enables constexpr evaluation for the following AVX512 Integer Comparison Intrinsics: ``` _mm_cmp_epi8_mask _mm_cmp_epu8_mask _mm_cmp_epi16_mask _mm_cmp_epu16_mask _mm_cmp_epi32_mask _mm_cmp_epu32_mask _mm_cmp_epi64_mask _mm_cmp_epu64_mask _mm256_cmp_epi8_mask _mm256_cmp_epu8_mask _mm256_cmp_epi16_mask _mm256_cmp_epu16_mask _mm256_cmp_epi32_mask _mm256_cmp_epu32_mask _mm256_cmp_epi64_mask _mm256_cmp_epu64_mask _mm512_cmp_epi8_mask _mm512_cmp_epu8_mask _mm512_cmp_epi16_mask _mm512_cmp_epu16_mask _mm512_cmp_epi32_mask _mm512_cmp_epu32_mask _mm512_cmp_epi64_mask _mm512_cmp_epu64_mask ``` Part 1 of #162054
2 parents 67207f2 + 0b1ef8c commit 5e29b5b

File tree

5 files changed

+248
-30
lines changed

5 files changed

+248
-30
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1282,81 +1282,99 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
12821282
def knotdi : X86Builtin<"unsigned long long int(unsigned long long int)">;
12831283
}
12841284

1285-
let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1285+
let Features = "avx512vl,avx512bw",
1286+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
12861287
def cmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">;
12871288
}
12881289

1289-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1290+
let Features = "avx512vl",
1291+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
12901292
def cmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
12911293
def cmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
12921294
}
12931295

1294-
let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1296+
let Features = "avx512vl,avx512bw",
1297+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
12951298
def cmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">;
12961299
}
12971300

1298-
let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1301+
let Features = "avx512vl,avx512bw",
1302+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
12991303
def cmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">;
13001304
}
13011305

1302-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1306+
let Features = "avx512vl",
1307+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
13031308
def cmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
13041309
def cmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
13051310
}
13061311

1307-
let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1312+
let Features = "avx512vl,avx512bw",
1313+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
13081314
def cmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">;
13091315
}
13101316

1311-
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1317+
let Features = "avx512bw",
1318+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
13121319
def cmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">;
13131320
}
13141321

1315-
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1322+
let Features = "avx512f",
1323+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
13161324
def cmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
13171325
def cmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
13181326
}
13191327

1320-
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1328+
let Features = "avx512bw",
1329+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
13211330
def cmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
13221331
}
13231332

1324-
let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1333+
let Features = "avx512vl,avx512bw",
1334+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
13251335
def ucmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">;
13261336
}
13271337

1328-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1338+
let Features = "avx512vl",
1339+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
13291340
def ucmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
13301341
def ucmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
13311342
}
13321343

1333-
let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1344+
let Features = "avx512vl,avx512bw",
1345+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
13341346
def ucmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">;
13351347
}
13361348

1337-
let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1349+
let Features = "avx512vl,avx512bw",
1350+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
13381351
def ucmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">;
13391352
}
13401353

1341-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1354+
let Features = "avx512vl",
1355+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
13421356
def ucmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
13431357
def ucmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
13441358
}
13451359

1346-
let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1360+
let Features = "avx512vl,avx512bw",
1361+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
13471362
def ucmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">;
13481363
}
13491364

1350-
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1365+
let Features = "avx512bw",
1366+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
13511367
def ucmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">;
13521368
}
13531369

1354-
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1370+
let Features = "avx512f",
1371+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
13551372
def ucmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
13561373
def ucmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
13571374
}
13581375

1359-
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1376+
let Features = "avx512bw",
1377+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
13601378
def ucmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
13611379
}
13621380

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3296,6 +3296,60 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
32963296
return true;
32973297
}
32983298

3299+
static bool evalICmpImm(uint8_t Imm, const APSInt &A, const APSInt &B,
3300+
bool IsUnsigned) {
3301+
switch (Imm & 0x7) {
3302+
case 0x00: // _MM_CMPINT_EQ
3303+
return (A == B);
3304+
case 0x01: // _MM_CMPINT_LT
3305+
return IsUnsigned ? A.ult(B) : A.slt(B);
3306+
case 0x02: // _MM_CMPINT_LE
3307+
return IsUnsigned ? A.ule(B) : A.sle(B);
3308+
case 0x03: // _MM_CMPINT_FALSE
3309+
return false;
3310+
case 0x04: // _MM_CMPINT_NE
3311+
return (A != B);
3312+
case 0x05: // _MM_CMPINT_NLT
3313+
return IsUnsigned ? A.ugt(B) : A.sgt(B);
3314+
case 0x06: // _MM_CMPINT_NLE
3315+
return IsUnsigned ? A.uge(B) : A.sge(B);
3316+
case 0x07: // _MM_CMPINT_TRUE
3317+
return true;
3318+
default:
3319+
llvm_unreachable("Invalid Op");
3320+
}
3321+
}
3322+
3323+
static bool interp__builtin_ia32_cmp_mask(InterpState &S, CodePtr OpPC,
3324+
const CallExpr *Call, unsigned ID,
3325+
bool IsUnsigned) {
3326+
assert(Call->getNumArgs() == 4);
3327+
3328+
APSInt Mask = popToAPSInt(S, Call->getArg(3));
3329+
APSInt Opcode = popToAPSInt(S, Call->getArg(2));
3330+
unsigned CmpOp = static_cast<unsigned>(Opcode.getZExtValue());
3331+
const Pointer &RHS = S.Stk.pop<Pointer>();
3332+
const Pointer &LHS = S.Stk.pop<Pointer>();
3333+
3334+
assert(LHS.getNumElems() == RHS.getNumElems());
3335+
3336+
APInt RetMask = APInt::getZero(LHS.getNumElems());
3337+
unsigned VectorLen = LHS.getNumElems();
3338+
PrimType ElemT = LHS.getFieldDesc()->getPrimType();
3339+
3340+
for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
3341+
APSInt A, B;
3342+
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3343+
A = LHS.elem<T>(ElemNum).toAPSInt();
3344+
B = RHS.elem<T>(ElemNum).toAPSInt();
3345+
});
3346+
RetMask.setBitVal(ElemNum,
3347+
Mask[ElemNum] && evalICmpImm(CmpOp, A, B, IsUnsigned));
3348+
}
3349+
pushInteger(S, RetMask, Call->getType());
3350+
return true;
3351+
}
3352+
32993353
static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
33003354
const CallExpr *Call) {
33013355
assert(Call->getNumArgs() == 1);
@@ -4488,6 +4542,35 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
44884542
case X86::BI__builtin_ia32_vec_set_v4di:
44894543
return interp__builtin_vec_set(S, OpPC, Call, BuiltinID);
44904544

4545+
case X86::BI__builtin_ia32_cmpb128_mask:
4546+
case X86::BI__builtin_ia32_cmpw128_mask:
4547+
case X86::BI__builtin_ia32_cmpd128_mask:
4548+
case X86::BI__builtin_ia32_cmpq128_mask:
4549+
case X86::BI__builtin_ia32_cmpb256_mask:
4550+
case X86::BI__builtin_ia32_cmpw256_mask:
4551+
case X86::BI__builtin_ia32_cmpd256_mask:
4552+
case X86::BI__builtin_ia32_cmpq256_mask:
4553+
case X86::BI__builtin_ia32_cmpb512_mask:
4554+
case X86::BI__builtin_ia32_cmpw512_mask:
4555+
case X86::BI__builtin_ia32_cmpd512_mask:
4556+
case X86::BI__builtin_ia32_cmpq512_mask:
4557+
return interp__builtin_ia32_cmp_mask(S, OpPC, Call, BuiltinID,
4558+
/*IsUnsigned=*/false);
4559+
4560+
case X86::BI__builtin_ia32_ucmpb128_mask:
4561+
case X86::BI__builtin_ia32_ucmpw128_mask:
4562+
case X86::BI__builtin_ia32_ucmpd128_mask:
4563+
case X86::BI__builtin_ia32_ucmpq128_mask:
4564+
case X86::BI__builtin_ia32_ucmpb256_mask:
4565+
case X86::BI__builtin_ia32_ucmpw256_mask:
4566+
case X86::BI__builtin_ia32_ucmpd256_mask:
4567+
case X86::BI__builtin_ia32_ucmpq256_mask:
4568+
case X86::BI__builtin_ia32_ucmpb512_mask:
4569+
case X86::BI__builtin_ia32_ucmpw512_mask:
4570+
case X86::BI__builtin_ia32_ucmpd512_mask:
4571+
case X86::BI__builtin_ia32_ucmpq512_mask:
4572+
return interp__builtin_ia32_cmp_mask(S, OpPC, Call, BuiltinID,
4573+
/*IsUnsigned=*/true);
44914574
case X86::BI__builtin_ia32_pslldqi128_byteshift:
44924575
case X86::BI__builtin_ia32_pslldqi256_byteshift:
44934576
case X86::BI__builtin_ia32_pslldqi512_byteshift:

clang/lib/AST/ExprConstant.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15766,6 +15766,89 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1576615766
unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1));
1576715767
return Success(Vec.getVectorElt(Idx).getInt(), E);
1576815768
}
15769+
15770+
case clang::X86::BI__builtin_ia32_cmpb128_mask:
15771+
case clang::X86::BI__builtin_ia32_cmpw128_mask:
15772+
case clang::X86::BI__builtin_ia32_cmpd128_mask:
15773+
case clang::X86::BI__builtin_ia32_cmpq128_mask:
15774+
case clang::X86::BI__builtin_ia32_cmpb256_mask:
15775+
case clang::X86::BI__builtin_ia32_cmpw256_mask:
15776+
case clang::X86::BI__builtin_ia32_cmpd256_mask:
15777+
case clang::X86::BI__builtin_ia32_cmpq256_mask:
15778+
case clang::X86::BI__builtin_ia32_cmpb512_mask:
15779+
case clang::X86::BI__builtin_ia32_cmpw512_mask:
15780+
case clang::X86::BI__builtin_ia32_cmpd512_mask:
15781+
case clang::X86::BI__builtin_ia32_cmpq512_mask:
15782+
case clang::X86::BI__builtin_ia32_ucmpb128_mask:
15783+
case clang::X86::BI__builtin_ia32_ucmpw128_mask:
15784+
case clang::X86::BI__builtin_ia32_ucmpd128_mask:
15785+
case clang::X86::BI__builtin_ia32_ucmpq128_mask:
15786+
case clang::X86::BI__builtin_ia32_ucmpb256_mask:
15787+
case clang::X86::BI__builtin_ia32_ucmpw256_mask:
15788+
case clang::X86::BI__builtin_ia32_ucmpd256_mask:
15789+
case clang::X86::BI__builtin_ia32_ucmpq256_mask:
15790+
case clang::X86::BI__builtin_ia32_ucmpb512_mask:
15791+
case clang::X86::BI__builtin_ia32_ucmpw512_mask:
15792+
case clang::X86::BI__builtin_ia32_ucmpd512_mask:
15793+
case clang::X86::BI__builtin_ia32_ucmpq512_mask: {
15794+
assert(E->getNumArgs() == 4);
15795+
15796+
bool IsUnsigned =
15797+
(BuiltinOp >= clang::X86::BI__builtin_ia32_ucmpb128_mask &&
15798+
BuiltinOp <= clang::X86::BI__builtin_ia32_ucmpq512_mask);
15799+
15800+
APValue LHS, RHS;
15801+
APSInt Mask, Opcode;
15802+
if (!EvaluateVector(E->getArg(0), LHS, Info) ||
15803+
!EvaluateVector(E->getArg(1), RHS, Info) ||
15804+
!EvaluateInteger(E->getArg(2), Opcode, Info) ||
15805+
!EvaluateInteger(E->getArg(3), Mask, Info))
15806+
return false;
15807+
15808+
assert(LHS.getVectorLength() == RHS.getVectorLength());
15809+
15810+
unsigned VectorLen = LHS.getVectorLength();
15811+
unsigned RetWidth = Mask.getBitWidth();
15812+
15813+
APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
15814+
15815+
for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
15816+
const APSInt &A = LHS.getVectorElt(ElemNum).getInt();
15817+
const APSInt &B = RHS.getVectorElt(ElemNum).getInt();
15818+
bool Result = false;
15819+
15820+
switch (Opcode.getExtValue() & 0x7) {
15821+
case 0: // _MM_CMPINT_EQ
15822+
Result = (A == B);
15823+
break;
15824+
case 1: // _MM_CMPINT_LT
15825+
Result = IsUnsigned ? A.ult(B) : A.slt(B);
15826+
break;
15827+
case 2: // _MM_CMPINT_LE
15828+
Result = IsUnsigned ? A.ule(B) : A.sle(B);
15829+
break;
15830+
case 3: // _MM_CMPINT_FALSE
15831+
Result = false;
15832+
break;
15833+
case 4: // _MM_CMPINT_NE
15834+
Result = (A != B);
15835+
break;
15836+
case 5: // _MM_CMPINT_NLT (>=)
15837+
Result = IsUnsigned ? A.uge(B) : A.sge(B);
15838+
break;
15839+
case 6: // _MM_CMPINT_NLE (>)
15840+
Result = IsUnsigned ? A.ugt(B) : A.sgt(B);
15841+
break;
15842+
case 7: // _MM_CMPINT_TRUE
15843+
Result = true;
15844+
break;
15845+
}
15846+
15847+
RetMask.setBitVal(ElemNum, Mask[ElemNum] && Result);
15848+
}
15849+
15850+
return Success(APValue(RetMask), E);
15851+
}
1576915852
}
1577015853
}
1577115854

clang/lib/Headers/avx512vlbwintrin.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2385,22 +2385,19 @@ _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
23852385
(__mmask32) __U);
23862386
}
23872387

2388-
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2389-
_mm_test_epi8_mask (__m128i __A, __m128i __B)
2390-
{
2388+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
2389+
_mm_test_epi8_mask(__m128i __A, __m128i __B) {
23912390
return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128());
23922391
}
23932392

2394-
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2395-
_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
2396-
{
2393+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
2394+
_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
23972395
return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B),
23982396
_mm_setzero_si128());
23992397
}
24002398

2401-
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2402-
_mm256_test_epi8_mask (__m256i __A, __m256i __B)
2403-
{
2399+
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR
2400+
_mm256_test_epi8_mask(__m256i __A, __m256i __B) {
24042401
return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B),
24052402
_mm256_setzero_si256());
24062403
}
@@ -2439,9 +2436,8 @@ _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
24392436
_mm256_setzero_si256());
24402437
}
24412438

2442-
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2443-
_mm_testn_epi8_mask (__m128i __A, __m128i __B)
2444-
{
2439+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
2440+
_mm_testn_epi8_mask(__m128i __A, __m128i __B) {
24452441
return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
24462442
}
24472443

0 commit comments

Comments
 (0)