Skip to content

Commit 3e8b31f

Browse files
committed
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow AVX512 conflict intrinsics to be used in constexpr
Resolves #160524
1 parent 3e22438 commit 3e8b31f

File tree

7 files changed

+252
-185
lines changed

7 files changed

+252
-185
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,23 +1341,17 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVect
13411341
def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
13421342
}
13431343

1344-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1344+
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
13451345
def vpconflictdi_128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>)">;
1346-
}
1347-
1348-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1349-
def vpconflictdi_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">;
1350-
}
1351-
1352-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
13531346
def vpconflictsi_128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>)">;
13541347
}
13551348

1356-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1349+
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
1350+
def vpconflictdi_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">;
13571351
def vpconflictsi_256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>)">;
13581352
}
13591353

1360-
let Features = "avx512cd", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1354+
let Features = "avx512cd", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
13611355
def vpconflictdi_512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>)">;
13621356
def vpconflictsi_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">;
13631357
}

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3725,7 +3725,41 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
37253725
[](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) {
37263726
return llvm::APIntOps::fshr(Hi, Lo, Amt);
37273727
});
3728+
case X86::BI__builtin_ia32_vpconflictsi_128:
3729+
case X86::BI__builtin_ia32_vpconflictsi_256:
3730+
case X86::BI__builtin_ia32_vpconflictsi_512:
3731+
case X86::BI__builtin_ia32_vpconflictdi_128:
3732+
case X86::BI__builtin_ia32_vpconflictdi_256:
3733+
case X86::BI__builtin_ia32_vpconflictdi_512: {
3734+
assert(Call->getNumArgs() == 1);
3735+
3736+
QualType Arg0Type = Call->getArg(0)->getType();
3737+
const auto *VecT = Arg0Type->castAs<VectorType>();
3738+
const PrimType &ElemT = *S.getContext().classify(VecT->getElementType());
3739+
unsigned NumElems = VecT->getNumElements();
3740+
bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3741+
const Pointer &Src = S.Stk.pop<Pointer>();
3742+
const Pointer &Dst = S.Stk.peek<Pointer>();
37283743

3744+
for (unsigned I = 0; I != NumElems; ++I) {
3745+
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3746+
APSInt ElemI = Src.elem<T>(I).toAPSInt();
3747+
unsigned ConflictMask = 0;
3748+
for (unsigned J = 0; J != NumElems; ++J) {
3749+
if (I == J)
3750+
continue;
3751+
APSInt ElemJ = Src.elem<T>(J).toAPSInt();
3752+
if (ElemI == ElemJ) {
3753+
ConflictMask |= (1 << J);
3754+
}
3755+
}
3756+
Dst.elem<T>(I) = static_cast<T>(
3757+
APSInt(APInt(ElemI.getBitWidth(), ConflictMask), DestUnsigned));
3758+
});
3759+
}
3760+
Dst.initializeAllElements();
3761+
return true;
3762+
}
37293763
case clang::X86::BI__builtin_ia32_blendpd:
37303764
case clang::X86::BI__builtin_ia32_blendpd256:
37313765
case clang::X86::BI__builtin_ia32_blendps:

clang/lib/AST/ExprConstant.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12094,6 +12094,42 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1209412094

1209512095
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1209612096
}
12097+
case X86::BI__builtin_ia32_vpconflictsi_128:
12098+
case X86::BI__builtin_ia32_vpconflictsi_256:
12099+
case X86::BI__builtin_ia32_vpconflictsi_512:
12100+
case X86::BI__builtin_ia32_vpconflictdi_128:
12101+
case X86::BI__builtin_ia32_vpconflictdi_256:
12102+
case X86::BI__builtin_ia32_vpconflictdi_512: {
12103+
APValue Source;
12104+
12105+
if (!EvaluateAsRValue(Info, E->getArg(0), Source))
12106+
return false;
12107+
12108+
unsigned SourceLen = Source.getVectorLength();
12109+
SmallVector<APValue, 32> ResultElements;
12110+
ResultElements.reserve(SourceLen);
12111+
12112+
const auto *VecT = E->getType()->castAs<VectorType>();
12113+
bool DestUnsigned =
12114+
VecT->getElementType()->isUnsignedIntegerOrEnumerationType();
12115+
12116+
for (unsigned I = 0; I != SourceLen; ++I) {
12117+
const APValue &EltI = Source.getVectorElt(I);
12118+
12119+
unsigned ConflictMask = 0;
12120+
for (unsigned J = 0; J != SourceLen; ++J) {
12121+
const APValue &EltJ = Source.getVectorElt(J);
12122+
if (I == J)
12123+
continue;
12124+
if (EltI.getInt() == EltJ.getInt())
12125+
ConflictMask |= (1 << J);
12126+
}
12127+
12128+
APInt ConflictMaskInt(EltI.getInt().getBitWidth(), ConflictMask);
12129+
ResultElements.push_back(APValue(APSInt(ConflictMaskInt, DestUnsigned)));
12130+
}
12131+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
12132+
}
1209712133
case X86::BI__builtin_ia32_blendpd:
1209812134
case X86::BI__builtin_ia32_blendpd256:
1209912135
case X86::BI__builtin_ia32_blendps:

clang/lib/Headers/avx512cdintrin.h

Lines changed: 35 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -15,114 +15,98 @@
1515
#define __AVX512CDINTRIN_H
1616

1717
/* Define the default attributes for the functions in this file. */
18+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
19+
#define __DEFAULT_FN_ATTRS \
20+
constexpr __attribute__((__always_inline__, __nodebug__, \
21+
__target__("avx512cd"), __min_vector_width__(512)))
22+
#else
1823
#define __DEFAULT_FN_ATTRS \
1924
__attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), \
2025
__min_vector_width__(512)))
21-
22-
#if defined(__cplusplus) && (__cplusplus >= 201103L)
23-
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
24-
#else
25-
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
2626
#endif
2727

2828
static __inline__ __m512i __DEFAULT_FN_ATTRS
29-
_mm512_conflict_epi64 (__m512i __A)
30-
{
31-
return (__m512i) __builtin_ia32_vpconflictdi_512 ((__v8di) __A);
29+
_mm512_conflict_epi64(__m512i __A) {
30+
return (__m512i)__builtin_ia32_vpconflictdi_512((__v8di)__A);
3231
}
3332

3433
static __inline__ __m512i __DEFAULT_FN_ATTRS
35-
_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
36-
{
37-
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
38-
(__v8di)_mm512_conflict_epi64(__A),
39-
(__v8di)__W);
34+
_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
35+
return (__m512i)__builtin_ia32_selectq_512(
36+
(__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), (__v8di)__W);
4037
}
4138

4239
static __inline__ __m512i __DEFAULT_FN_ATTRS
43-
_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
44-
{
40+
_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
4541
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4642
(__v8di)_mm512_conflict_epi64(__A),
47-
(__v8di)_mm512_setzero_si512 ());
43+
(__v8di)_mm512_setzero_si512());
4844
}
4945

5046
static __inline__ __m512i __DEFAULT_FN_ATTRS
51-
_mm512_conflict_epi32 (__m512i __A)
52-
{
53-
return (__m512i) __builtin_ia32_vpconflictsi_512 ((__v16si) __A);
47+
_mm512_conflict_epi32(__m512i __A) {
48+
return (__m512i)__builtin_ia32_vpconflictsi_512((__v16si)__A);
5449
}
5550

5651
static __inline__ __m512i __DEFAULT_FN_ATTRS
57-
_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
58-
{
59-
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
60-
(__v16si)_mm512_conflict_epi32(__A),
61-
(__v16si)__W);
52+
_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
53+
return (__m512i)__builtin_ia32_selectd_512(
54+
(__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), (__v16si)__W);
6255
}
6356

6457
static __inline__ __m512i __DEFAULT_FN_ATTRS
65-
_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
66-
{
67-
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
68-
(__v16si)_mm512_conflict_epi32(__A),
69-
(__v16si)_mm512_setzero_si512());
58+
_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
59+
return (__m512i)__builtin_ia32_selectd_512(
60+
(__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A),
61+
(__v16si)_mm512_setzero_si512());
7062
}
7163

72-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
73-
_mm512_lzcnt_epi32(__m512i __A) {
64+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi32(__m512i __A) {
7465
return (__m512i)__builtin_elementwise_clzg((__v16si)__A,
7566
(__v16si)_mm512_set1_epi32(32));
7667
}
7768

78-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
69+
static __inline__ __m512i __DEFAULT_FN_ATTRS
7970
_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
80-
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
81-
(__v16si)_mm512_lzcnt_epi32(__A),
82-
(__v16si)__W);
71+
return (__m512i)__builtin_ia32_selectd_512(
72+
(__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)__W);
8373
}
8474

85-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
75+
static __inline__ __m512i __DEFAULT_FN_ATTRS
8676
_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
8777
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
8878
(__v16si)_mm512_lzcnt_epi32(__A),
8979
(__v16si)_mm512_setzero_si512());
9080
}
9181

92-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
93-
_mm512_lzcnt_epi64(__m512i __A) {
82+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi64(__m512i __A) {
9483
return (__m512i)__builtin_elementwise_clzg(
9584
(__v8di)__A, (__v8di)_mm512_set1_epi64((long long)64));
9685
}
9786

98-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
87+
static __inline__ __m512i __DEFAULT_FN_ATTRS
9988
_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
100-
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
101-
(__v8di)_mm512_lzcnt_epi64(__A),
102-
(__v8di)__W);
89+
return (__m512i)__builtin_ia32_selectq_512(
90+
(__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), (__v8di)__W);
10391
}
10492

105-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
93+
static __inline__ __m512i __DEFAULT_FN_ATTRS
10694
_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
10795
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
10896
(__v8di)_mm512_lzcnt_epi64(__A),
10997
(__v8di)_mm512_setzero_si512());
11098
}
11199

112100
static __inline__ __m512i __DEFAULT_FN_ATTRS
113-
_mm512_broadcastmb_epi64 (__mmask8 __A)
114-
{
115-
return (__m512i) _mm512_set1_epi64((long long) __A);
101+
_mm512_broadcastmb_epi64(__mmask8 __A) {
102+
return (__m512i)_mm512_set1_epi64((long long)__A);
116103
}
117104

118105
static __inline__ __m512i __DEFAULT_FN_ATTRS
119-
_mm512_broadcastmw_epi32 (__mmask16 __A)
120-
{
121-
return (__m512i) _mm512_set1_epi32((int) __A);
122-
106+
_mm512_broadcastmw_epi32(__mmask16 __A) {
107+
return (__m512i)_mm512_set1_epi32((int)__A);
123108
}
124109

125110
#undef __DEFAULT_FN_ATTRS
126-
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
127111

128112
#endif

0 commit comments

Comments
 (0)