Skip to content

Commit 5e0a754

Browse files
committed
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow AVX512 conflict intrinsics to be used in constexpr
Resolves #160524
1 parent 2424118 commit 5e0a754

File tree

7 files changed

+235
-174
lines changed

7 files changed

+235
-174
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1357,23 +1357,17 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVect
13571357
def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
13581358
}
13591359

1360-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1360+
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
13611361
def vpconflictdi_128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>)">;
1362-
}
1363-
1364-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1365-
def vpconflictdi_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">;
1366-
}
1367-
1368-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
13691362
def vpconflictsi_128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>)">;
13701363
}
13711364

1372-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1365+
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
1366+
def vpconflictdi_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">;
13731367
def vpconflictsi_256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>)">;
13741368
}
13751369

1376-
let Features = "avx512cd", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1370+
let Features = "avx512cd", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
13771371
def vpconflictdi_512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>)">;
13781372
def vpconflictsi_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">;
13791373
}

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3073,6 +3073,33 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
30733073
return true;
30743074
}
30753075

3076+
static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
3077+
const CallExpr *Call) {
3078+
assert(Call->getNumArgs() == 1);
3079+
3080+
QualType Arg0Type = Call->getArg(0)->getType();
3081+
const auto *VecT = Arg0Type->castAs<VectorType>();
3082+
PrimType ElemT = *S.getContext().classify(VecT->getElementType());
3083+
unsigned NumElems = VecT->getNumElements();
3084+
bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3085+
const Pointer &Src = S.Stk.pop<Pointer>();
3086+
const Pointer &Dst = S.Stk.peek<Pointer>();
3087+
3088+
for (unsigned I = 0; I != NumElems; ++I) {
3089+
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3090+
APSInt ElemI = Src.elem<T>(I).toAPSInt();
3091+
APInt ConflictMask(ElemI.getBitWidth(), 0);
3092+
for (unsigned J = 0; J != I; ++J) {
3093+
APSInt ElemJ = Src.elem<T>(J).toAPSInt();
3094+
ConflictMask.setBitVal(J, ElemI == ElemJ);
3095+
}
3096+
Dst.elem<T>(I) = static_cast<T>(APSInt(ConflictMask, DestUnsigned));
3097+
});
3098+
}
3099+
Dst.initializeAllElements();
3100+
return true;
3101+
}
3102+
30763103
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
30773104
uint32_t BuiltinID) {
30783105
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -3863,7 +3890,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
38633890
[](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) {
38643891
return llvm::APIntOps::fshr(Hi, Lo, Amt);
38653892
});
3866-
3893+
case X86::BI__builtin_ia32_vpconflictsi_128:
3894+
case X86::BI__builtin_ia32_vpconflictsi_256:
3895+
case X86::BI__builtin_ia32_vpconflictsi_512:
3896+
case X86::BI__builtin_ia32_vpconflictdi_128:
3897+
case X86::BI__builtin_ia32_vpconflictdi_256:
3898+
case X86::BI__builtin_ia32_vpconflictdi_512:
3899+
return interp__builtin_ia32_vpconflict(S, OpPC, Call);
38673900
case clang::X86::BI__builtin_ia32_blendpd:
38683901
case clang::X86::BI__builtin_ia32_blendpd256:
38693902
case clang::X86::BI__builtin_ia32_blendps:

clang/lib/AST/ExprConstant.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12142,6 +12142,37 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1214212142

1214312143
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1214412144
}
12145+
case X86::BI__builtin_ia32_vpconflictsi_128:
12146+
case X86::BI__builtin_ia32_vpconflictsi_256:
12147+
case X86::BI__builtin_ia32_vpconflictsi_512:
12148+
case X86::BI__builtin_ia32_vpconflictdi_128:
12149+
case X86::BI__builtin_ia32_vpconflictdi_256:
12150+
case X86::BI__builtin_ia32_vpconflictdi_512: {
12151+
APValue Source;
12152+
12153+
if (!EvaluateAsRValue(Info, E->getArg(0), Source))
12154+
return false;
12155+
12156+
unsigned SourceLen = Source.getVectorLength();
12157+
SmallVector<APValue, 32> ResultElements;
12158+
ResultElements.reserve(SourceLen);
12159+
12160+
const auto *VecT = E->getType()->castAs<VectorType>();
12161+
bool DestUnsigned =
12162+
VecT->getElementType()->isUnsignedIntegerOrEnumerationType();
12163+
12164+
for (unsigned I = 0; I != SourceLen; ++I) {
12165+
const APValue &EltI = Source.getVectorElt(I);
12166+
12167+
APInt ConflictMask(EltI.getInt().getBitWidth(), 0);
12168+
for (unsigned J = 0; J != I; ++J) {
12169+
const APValue &EltJ = Source.getVectorElt(J);
12170+
ConflictMask.setBitVal(J, EltI.getInt() == EltJ.getInt());
12171+
}
12172+
ResultElements.push_back(APValue(APSInt(ConflictMask, DestUnsigned)));
12173+
}
12174+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
12175+
}
1214512176
case X86::BI__builtin_ia32_blendpd:
1214612177
case X86::BI__builtin_ia32_blendpd256:
1214712178
case X86::BI__builtin_ia32_blendps:

clang/lib/Headers/avx512cdintrin.h

Lines changed: 35 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -15,114 +15,98 @@
1515
#define __AVX512CDINTRIN_H
1616

1717
/* Define the default attributes for the functions in this file. */
18+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
19+
#define __DEFAULT_FN_ATTRS \
20+
constexpr __attribute__((__always_inline__, __nodebug__, \
21+
__target__("avx512cd"), __min_vector_width__(512)))
22+
#else
1823
#define __DEFAULT_FN_ATTRS \
1924
__attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), \
2025
__min_vector_width__(512)))
21-
22-
#if defined(__cplusplus) && (__cplusplus >= 201103L)
23-
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
24-
#else
25-
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
2626
#endif
2727

2828
static __inline__ __m512i __DEFAULT_FN_ATTRS
29-
_mm512_conflict_epi64 (__m512i __A)
30-
{
31-
return (__m512i) __builtin_ia32_vpconflictdi_512 ((__v8di) __A);
29+
_mm512_conflict_epi64(__m512i __A) {
30+
return (__m512i)__builtin_ia32_vpconflictdi_512((__v8di)__A);
3231
}
3332

3433
static __inline__ __m512i __DEFAULT_FN_ATTRS
35-
_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
36-
{
37-
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
38-
(__v8di)_mm512_conflict_epi64(__A),
39-
(__v8di)__W);
34+
_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
35+
return (__m512i)__builtin_ia32_selectq_512(
36+
(__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), (__v8di)__W);
4037
}
4138

4239
static __inline__ __m512i __DEFAULT_FN_ATTRS
43-
_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
44-
{
40+
_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
4541
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4642
(__v8di)_mm512_conflict_epi64(__A),
47-
(__v8di)_mm512_setzero_si512 ());
43+
(__v8di)_mm512_setzero_si512());
4844
}
4945

5046
static __inline__ __m512i __DEFAULT_FN_ATTRS
51-
_mm512_conflict_epi32 (__m512i __A)
52-
{
53-
return (__m512i) __builtin_ia32_vpconflictsi_512 ((__v16si) __A);
47+
_mm512_conflict_epi32(__m512i __A) {
48+
return (__m512i)__builtin_ia32_vpconflictsi_512((__v16si)__A);
5449
}
5550

5651
static __inline__ __m512i __DEFAULT_FN_ATTRS
57-
_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
58-
{
59-
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
60-
(__v16si)_mm512_conflict_epi32(__A),
61-
(__v16si)__W);
52+
_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
53+
return (__m512i)__builtin_ia32_selectd_512(
54+
(__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), (__v16si)__W);
6255
}
6356

6457
static __inline__ __m512i __DEFAULT_FN_ATTRS
65-
_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
66-
{
67-
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
68-
(__v16si)_mm512_conflict_epi32(__A),
69-
(__v16si)_mm512_setzero_si512());
58+
_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
59+
return (__m512i)__builtin_ia32_selectd_512(
60+
(__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A),
61+
(__v16si)_mm512_setzero_si512());
7062
}
7163

72-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
73-
_mm512_lzcnt_epi32(__m512i __A) {
64+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi32(__m512i __A) {
7465
return (__m512i)__builtin_elementwise_clzg((__v16si)__A,
7566
(__v16si)_mm512_set1_epi32(32));
7667
}
7768

78-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
69+
static __inline__ __m512i __DEFAULT_FN_ATTRS
7970
_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
80-
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
81-
(__v16si)_mm512_lzcnt_epi32(__A),
82-
(__v16si)__W);
71+
return (__m512i)__builtin_ia32_selectd_512(
72+
(__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)__W);
8373
}
8474

85-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
75+
static __inline__ __m512i __DEFAULT_FN_ATTRS
8676
_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
8777
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
8878
(__v16si)_mm512_lzcnt_epi32(__A),
8979
(__v16si)_mm512_setzero_si512());
9080
}
9181

92-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
93-
_mm512_lzcnt_epi64(__m512i __A) {
82+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi64(__m512i __A) {
9483
return (__m512i)__builtin_elementwise_clzg(
9584
(__v8di)__A, (__v8di)_mm512_set1_epi64((long long)64));
9685
}
9786

98-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
87+
static __inline__ __m512i __DEFAULT_FN_ATTRS
9988
_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
100-
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
101-
(__v8di)_mm512_lzcnt_epi64(__A),
102-
(__v8di)__W);
89+
return (__m512i)__builtin_ia32_selectq_512(
90+
(__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), (__v8di)__W);
10391
}
10492

105-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
93+
static __inline__ __m512i __DEFAULT_FN_ATTRS
10694
_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
10795
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
10896
(__v8di)_mm512_lzcnt_epi64(__A),
10997
(__v8di)_mm512_setzero_si512());
11098
}
11199

112100
static __inline__ __m512i __DEFAULT_FN_ATTRS
113-
_mm512_broadcastmb_epi64 (__mmask8 __A)
114-
{
115-
return (__m512i) _mm512_set1_epi64((long long) __A);
101+
_mm512_broadcastmb_epi64(__mmask8 __A) {
102+
return (__m512i)_mm512_set1_epi64((long long)__A);
116103
}
117104

118105
static __inline__ __m512i __DEFAULT_FN_ATTRS
119-
_mm512_broadcastmw_epi32 (__mmask16 __A)
120-
{
121-
return (__m512i) _mm512_set1_epi32((int) __A);
122-
106+
_mm512_broadcastmw_epi32(__mmask16 __A) {
107+
return (__m512i)_mm512_set1_epi32((int)__A);
123108
}
124109

125110
#undef __DEFAULT_FN_ATTRS
126-
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
127111

128112
#endif

0 commit comments

Comments
 (0)