Skip to content

Commit 00115fd

Browse files
committed
VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow AVX512 mask predicate intrinsics
1 parent 8fe71e0 commit 00115fd

File tree

5 files changed

+91
-17
lines changed

5 files changed

+91
-17
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2502,24 +2502,28 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
25022502
def rsqrt14ps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char)">;
25032503
}
25042504

2505-
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
2505+
let Features = "avx512bw",
2506+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
25062507
def cvtb2mask512 : X86Builtin<"unsigned long long int(_Vector<64, char>)">;
25072508
def cvtmask2b512 : X86Builtin<"_Vector<64, char>(unsigned long long int)">;
25082509
def cvtmask2w512 : X86Builtin<"_Vector<32, short>(unsigned int)">;
25092510
}
25102511

2511-
let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
2512+
let Features = "avx512dq",
2513+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
25122514
def cvtd2mask512 : X86Builtin<"unsigned short(_Vector<16, int>)">;
25132515
def cvtmask2d512 : X86Builtin<"_Vector<16, int>(unsigned short)">;
25142516
def cvtmask2q512 : X86Builtin<"_Vector<8, long long int>(unsigned char)">;
25152517
def cvtq2mask512 : X86Builtin<"unsigned char(_Vector<8, long long int>)">;
25162518
}
25172519

2518-
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
2520+
let Features = "avx512bw,avx512vl",
2521+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
25192522
def cvtb2mask128 : X86Builtin<"unsigned short(_Vector<16, char>)">;
25202523
}
25212524

2522-
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
2525+
let Features = "avx512bw,avx512vl",
2526+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
25232527
def cvtb2mask256 : X86Builtin<"unsigned int(_Vector<32, char>)">;
25242528
}
25252529

@@ -2539,11 +2543,13 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVector
25392543
def cvtmask2w256 : X86Builtin<"_Vector<16, short>(unsigned short)">;
25402544
}
25412545

2542-
let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
2546+
let Features = "avx512dq,avx512vl",
2547+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
25432548
def cvtd2mask128 : X86Builtin<"unsigned char(_Vector<4, int>)">;
25442549
}
25452550

2546-
let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
2551+
let Features = "avx512dq,avx512vl",
2552+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
25472553
def cvtd2mask256 : X86Builtin<"unsigned char(_Vector<8, int>)">;
25482554
}
25492555

@@ -2563,11 +2569,13 @@ let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVector
25632569
def cvtmask2q256 : X86Builtin<"_Vector<4, long long int>(unsigned char)">;
25642570
}
25652571

2566-
let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
2572+
let Features = "avx512dq,avx512vl",
2573+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
25672574
def cvtq2mask128 : X86Builtin<"unsigned char(_Vector<2, long long int>)">;
25682575
}
25692576

2570-
let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
2577+
let Features = "avx512dq,avx512vl",
2578+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
25712579
def cvtq2mask256 : X86Builtin<"unsigned char(_Vector<4, long long int>)">;
25722580
}
25732581

@@ -3361,15 +3369,18 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
33613369
def vcvtps2ph256_mask : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int, _Vector<8, short>, unsigned char)">;
33623370
}
33633371

3364-
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
3372+
let Features = "avx512bw",
3373+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
33653374
def cvtw2mask512 : X86Builtin<"unsigned int(_Vector<32, short>)">;
33663375
}
33673376

3368-
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
3377+
let Features = "avx512bw,avx512vl",
3378+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
33693379
def cvtw2mask128 : X86Builtin<"unsigned char(_Vector<8, short>)">;
33703380
}
33713381

3372-
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
3382+
let Features = "avx512bw,avx512vl",
3383+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
33733384
def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">;
33743385
}
33753386

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3128,6 +3128,25 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
31283128
return true;
31293129
}
31303130

3131+
static bool interp__builtin_ia32_cvt_mask(InterpState &S, CodePtr OpPC,
3132+
const CallExpr *Call, unsigned ID) {
3133+
assert(Call->getNumArgs() == 1);
3134+
3135+
const Pointer &Vec = S.Stk.pop<Pointer>();
3136+
APInt RetMask(Vec.getNumElems(), 0);
3137+
unsigned VectorLen = Vec.getNumElems();
3138+
PrimType ElemT = Vec.getFieldDesc()->getPrimType();
3139+
3140+
for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
3141+
APSInt A;
3142+
INT_TYPE_SWITCH_NO_BOOL(ElemT, { A = Vec.elem<T>(ElemNum).toAPSInt(); });
3143+
unsigned MSB = A[A.getBitWidth() - 1];
3144+
RetMask.setBitVal(ElemNum, MSB);
3145+
}
3146+
pushInteger(S, RetMask, Call->getType());
3147+
return true;
3148+
}
3149+
31313150
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
31323151
uint32_t BuiltinID) {
31333152
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -4141,6 +4160,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
41414160
case X86::BI__builtin_ia32_vec_set_v4di:
41424161
return interp__builtin_vec_set(S, OpPC, Call, BuiltinID);
41434162

4163+
case X86::BI__builtin_ia32_cvtb2mask128:
4164+
case X86::BI__builtin_ia32_cvtb2mask256:
4165+
case X86::BI__builtin_ia32_cvtb2mask512:
4166+
case X86::BI__builtin_ia32_cvtw2mask128:
4167+
case X86::BI__builtin_ia32_cvtw2mask256:
4168+
case X86::BI__builtin_ia32_cvtw2mask512:
4169+
case X86::BI__builtin_ia32_cvtd2mask128:
4170+
case X86::BI__builtin_ia32_cvtd2mask256:
4171+
case X86::BI__builtin_ia32_cvtd2mask512:
4172+
case X86::BI__builtin_ia32_cvtq2mask128:
4173+
case X86::BI__builtin_ia32_cvtq2mask256:
4174+
case X86::BI__builtin_ia32_cvtq2mask512:
4175+
return interp__builtin_ia32_cvt_mask(S, OpPC, Call, BuiltinID);
4176+
41444177
default:
41454178
S.FFDiag(S.Current->getLocation(OpPC),
41464179
diag::note_invalid_subexpr_in_const_expr)

clang/lib/AST/ExprConstant.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15449,6 +15449,33 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1544915449
unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1));
1545015450
return Success(Vec.getVectorElt(Idx).getInt(), E);
1545115451
}
15452+
15453+
case clang::X86::BI__builtin_ia32_cvtb2mask128:
15454+
case clang::X86::BI__builtin_ia32_cvtb2mask256:
15455+
case clang::X86::BI__builtin_ia32_cvtb2mask512:
15456+
case clang::X86::BI__builtin_ia32_cvtw2mask128:
15457+
case clang::X86::BI__builtin_ia32_cvtw2mask256:
15458+
case clang::X86::BI__builtin_ia32_cvtw2mask512:
15459+
case clang::X86::BI__builtin_ia32_cvtd2mask128:
15460+
case clang::X86::BI__builtin_ia32_cvtd2mask256:
15461+
case clang::X86::BI__builtin_ia32_cvtd2mask512:
15462+
case clang::X86::BI__builtin_ia32_cvtq2mask128:
15463+
case clang::X86::BI__builtin_ia32_cvtq2mask256:
15464+
case clang::X86::BI__builtin_ia32_cvtq2mask512: {
15465+
assert(E->getNumArgs() == 1);
15466+
APValue Vec;
15467+
if (!EvaluateVector(E->getArg(0), Vec, Info))
15468+
return false;
15469+
15470+
unsigned VectorLen = Vec.getVectorLength();
15471+
APSInt RetMask(llvm::APInt(VectorLen, 0), /*isUnsigned=*/true);
15472+
for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
15473+
const APSInt &A = Vec.getVectorElt(ElemNum).getInt();
15474+
unsigned MSB = A[A.getBitWidth() - 1];
15475+
RetMask.setBitVal(ElemNum, MSB);
15476+
}
15477+
return Success(APValue(RetMask), E);
15478+
}
1545215479
}
1545315480
}
1545415481

clang/lib/Headers/avx512vlbwintrin.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2492,15 +2492,13 @@ _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
24922492
_mm256_setzero_si256());
24932493
}
24942494

2495-
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2496-
_mm_movepi8_mask (__m128i __A)
2497-
{
2495+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
2496+
_mm_movepi8_mask(__m128i __A) {
24982497
return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
24992498
}
25002499

2501-
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2502-
_mm256_movepi8_mask (__m256i __A)
2503-
{
2500+
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR
2501+
_mm256_movepi8_mask(__m256i __A) {
25042502
return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
25052503
}
25062504

clang/test/CodeGen/X86/avx512vlbw-builtins.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3009,6 +3009,11 @@ __mmask16 test_mm_movepi8_mask(__m128i __A) {
30093009
return _mm_movepi8_mask(__A);
30103010
}
30113011

3012+
TEST_CONSTEXPR(_mm_movepi8_mask(
3013+
((__m128i)(__v16qi){0, 1, 2, 3, 4, 5, 6, 7,
3014+
8, 9, 10, 11, 12, 13, 14, 15})
3015+
) == (__mmask16)0x0000);
3016+
30123017
__mmask32 test_mm256_movepi8_mask(__m256i __A) {
30133018
// CHECK-LABEL: test_mm256_movepi8_mask
30143019
// CHECK: [[CMP:%.*]] = icmp slt <32 x i8> %{{.*}}, zeroinitializer

0 commit comments

Comments
 (0)