Skip to content

Commit d2a8486

Browse files
authored
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow AVX512 conflict intrinsics to be used in constexpr (llvm#163293)
Resolves llvm#160524
1 parent 65c24e5 commit d2a8486

File tree

7 files changed

+231
-162
lines changed

7 files changed

+231
-162
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1359,23 +1359,17 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVect
13591359
def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
13601360
}
13611361

1362-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1362+
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
13631363
def vpconflictdi_128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>)">;
1364-
}
1365-
1366-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1367-
def vpconflictdi_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">;
1368-
}
1369-
1370-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
13711364
def vpconflictsi_128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>)">;
13721365
}
13731366

1374-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1367+
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
1368+
def vpconflictdi_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">;
13751369
def vpconflictsi_256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>)">;
13761370
}
13771371

1378-
let Features = "avx512cd", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1372+
let Features = "avx512cd", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
13791373
def vpconflictdi_512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>)">;
13801374
def vpconflictsi_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">;
13811375
}

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3101,6 +3101,33 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
31013101
return true;
31023102
}
31033103

3104+
static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
3105+
const CallExpr *Call) {
3106+
assert(Call->getNumArgs() == 1);
3107+
3108+
QualType Arg0Type = Call->getArg(0)->getType();
3109+
const auto *VecT = Arg0Type->castAs<VectorType>();
3110+
PrimType ElemT = *S.getContext().classify(VecT->getElementType());
3111+
unsigned NumElems = VecT->getNumElements();
3112+
bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3113+
const Pointer &Src = S.Stk.pop<Pointer>();
3114+
const Pointer &Dst = S.Stk.peek<Pointer>();
3115+
3116+
for (unsigned I = 0; I != NumElems; ++I) {
3117+
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3118+
APSInt ElemI = Src.elem<T>(I).toAPSInt();
3119+
APInt ConflictMask(ElemI.getBitWidth(), 0);
3120+
for (unsigned J = 0; J != I; ++J) {
3121+
APSInt ElemJ = Src.elem<T>(J).toAPSInt();
3122+
ConflictMask.setBitVal(J, ElemI == ElemJ);
3123+
}
3124+
Dst.elem<T>(I) = static_cast<T>(APSInt(ConflictMask, DestUnsigned));
3125+
});
3126+
}
3127+
Dst.initializeAllElements();
3128+
return true;
3129+
}
3130+
31043131
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
31053132
uint32_t BuiltinID) {
31063133
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -3891,7 +3918,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
38913918
[](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) {
38923919
return llvm::APIntOps::fshr(Hi, Lo, Amt);
38933920
});
3894-
3921+
case X86::BI__builtin_ia32_vpconflictsi_128:
3922+
case X86::BI__builtin_ia32_vpconflictsi_256:
3923+
case X86::BI__builtin_ia32_vpconflictsi_512:
3924+
case X86::BI__builtin_ia32_vpconflictdi_128:
3925+
case X86::BI__builtin_ia32_vpconflictdi_256:
3926+
case X86::BI__builtin_ia32_vpconflictdi_512:
3927+
return interp__builtin_ia32_vpconflict(S, OpPC, Call);
38953928
case clang::X86::BI__builtin_ia32_blendpd:
38963929
case clang::X86::BI__builtin_ia32_blendpd256:
38973930
case clang::X86::BI__builtin_ia32_blendps:

clang/lib/AST/ExprConstant.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12179,6 +12179,37 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1217912179

1218012180
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1218112181
}
12182+
case X86::BI__builtin_ia32_vpconflictsi_128:
12183+
case X86::BI__builtin_ia32_vpconflictsi_256:
12184+
case X86::BI__builtin_ia32_vpconflictsi_512:
12185+
case X86::BI__builtin_ia32_vpconflictdi_128:
12186+
case X86::BI__builtin_ia32_vpconflictdi_256:
12187+
case X86::BI__builtin_ia32_vpconflictdi_512: {
12188+
APValue Source;
12189+
12190+
if (!EvaluateAsRValue(Info, E->getArg(0), Source))
12191+
return false;
12192+
12193+
unsigned SourceLen = Source.getVectorLength();
12194+
SmallVector<APValue, 32> ResultElements;
12195+
ResultElements.reserve(SourceLen);
12196+
12197+
const auto *VecT = E->getType()->castAs<VectorType>();
12198+
bool DestUnsigned =
12199+
VecT->getElementType()->isUnsignedIntegerOrEnumerationType();
12200+
12201+
for (unsigned I = 0; I != SourceLen; ++I) {
12202+
const APValue &EltI = Source.getVectorElt(I);
12203+
12204+
APInt ConflictMask(EltI.getInt().getBitWidth(), 0);
12205+
for (unsigned J = 0; J != I; ++J) {
12206+
const APValue &EltJ = Source.getVectorElt(J);
12207+
ConflictMask.setBitVal(J, EltI.getInt() == EltJ.getInt());
12208+
}
12209+
ResultElements.push_back(APValue(APSInt(ConflictMask, DestUnsigned)));
12210+
}
12211+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
12212+
}
1218212213
case X86::BI__builtin_ia32_blendpd:
1218312214
case X86::BI__builtin_ia32_blendpd256:
1218412215
case X86::BI__builtin_ia32_blendps:

clang/lib/Headers/avx512cdintrin.h

Lines changed: 34 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -15,111 +15,98 @@
1515
#define __AVX512CDINTRIN_H
1616

1717
/* Define the default attributes for the functions in this file. */
18+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
19+
#define __DEFAULT_FN_ATTRS \
20+
constexpr __attribute__((__always_inline__, __nodebug__, \
21+
__target__("avx512cd"), __min_vector_width__(512)))
22+
#else
1823
#define __DEFAULT_FN_ATTRS \
1924
__attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), \
2025
__min_vector_width__(512)))
21-
22-
#if defined(__cplusplus) && (__cplusplus >= 201103L)
23-
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
24-
#else
25-
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
2626
#endif
2727

2828
static __inline__ __m512i __DEFAULT_FN_ATTRS
29-
_mm512_conflict_epi64 (__m512i __A)
30-
{
31-
return (__m512i) __builtin_ia32_vpconflictdi_512 ((__v8di) __A);
29+
_mm512_conflict_epi64(__m512i __A) {
30+
return (__m512i)__builtin_ia32_vpconflictdi_512((__v8di)__A);
3231
}
3332

3433
static __inline__ __m512i __DEFAULT_FN_ATTRS
35-
_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
36-
{
37-
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
38-
(__v8di)_mm512_conflict_epi64(__A),
39-
(__v8di)__W);
34+
_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
35+
return (__m512i)__builtin_ia32_selectq_512(
36+
(__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), (__v8di)__W);
4037
}
4138

4239
static __inline__ __m512i __DEFAULT_FN_ATTRS
43-
_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
44-
{
40+
_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
4541
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4642
(__v8di)_mm512_conflict_epi64(__A),
47-
(__v8di)_mm512_setzero_si512 ());
43+
(__v8di)_mm512_setzero_si512());
4844
}
4945

5046
static __inline__ __m512i __DEFAULT_FN_ATTRS
51-
_mm512_conflict_epi32 (__m512i __A)
52-
{
53-
return (__m512i) __builtin_ia32_vpconflictsi_512 ((__v16si) __A);
47+
_mm512_conflict_epi32(__m512i __A) {
48+
return (__m512i)__builtin_ia32_vpconflictsi_512((__v16si)__A);
5449
}
5550

5651
static __inline__ __m512i __DEFAULT_FN_ATTRS
57-
_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
58-
{
59-
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
60-
(__v16si)_mm512_conflict_epi32(__A),
61-
(__v16si)__W);
52+
_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
53+
return (__m512i)__builtin_ia32_selectd_512(
54+
(__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), (__v16si)__W);
6255
}
6356

6457
static __inline__ __m512i __DEFAULT_FN_ATTRS
65-
_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
66-
{
67-
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
68-
(__v16si)_mm512_conflict_epi32(__A),
69-
(__v16si)_mm512_setzero_si512());
58+
_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
59+
return (__m512i)__builtin_ia32_selectd_512(
60+
(__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A),
61+
(__v16si)_mm512_setzero_si512());
7062
}
7163

72-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
73-
_mm512_lzcnt_epi32(__m512i __A) {
64+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi32(__m512i __A) {
7465
return (__m512i)__builtin_elementwise_clzg((__v16si)__A,
7566
(__v16si)_mm512_set1_epi32(32));
7667
}
7768

78-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
69+
static __inline__ __m512i __DEFAULT_FN_ATTRS
7970
_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
80-
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
81-
(__v16si)_mm512_lzcnt_epi32(__A),
82-
(__v16si)__W);
71+
return (__m512i)__builtin_ia32_selectd_512(
72+
(__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)__W);
8373
}
8474

85-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
75+
static __inline__ __m512i __DEFAULT_FN_ATTRS
8676
_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
8777
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
8878
(__v16si)_mm512_lzcnt_epi32(__A),
8979
(__v16si)_mm512_setzero_si512());
9080
}
9181

92-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
93-
_mm512_lzcnt_epi64(__m512i __A) {
82+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi64(__m512i __A) {
9483
return (__m512i)__builtin_elementwise_clzg(
9584
(__v8di)__A, (__v8di)_mm512_set1_epi64((long long)64));
9685
}
9786

98-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
87+
static __inline__ __m512i __DEFAULT_FN_ATTRS
9988
_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
100-
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
101-
(__v8di)_mm512_lzcnt_epi64(__A),
102-
(__v8di)__W);
89+
return (__m512i)__builtin_ia32_selectq_512(
90+
(__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), (__v8di)__W);
10391
}
10492

105-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
93+
static __inline__ __m512i __DEFAULT_FN_ATTRS
10694
_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
10795
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
10896
(__v8di)_mm512_lzcnt_epi64(__A),
10997
(__v8di)_mm512_setzero_si512());
11098
}
11199

112-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
100+
static __inline__ __m512i __DEFAULT_FN_ATTRS
113101
_mm512_broadcastmb_epi64(__mmask8 __A) {
114-
return (__m512i) _mm512_set1_epi64((long long) __A);
102+
return (__m512i)_mm512_set1_epi64((long long)__A);
115103
}
116104

117-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
105+
static __inline__ __m512i __DEFAULT_FN_ATTRS
118106
_mm512_broadcastmw_epi32(__mmask16 __A) {
119107
return (__m512i)_mm512_set1_epi32((int)__A);
120108
}
121109

122110
#undef __DEFAULT_FN_ATTRS
123-
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
124111

125112
#endif

0 commit comments

Comments
 (0)