Skip to content

Commit dce7abe

Browse files
committed
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow AVX512 conflict intrinsics to be used in constexpr
Resolves #160524
1 parent e95cedd commit dce7abe

File tree

7 files changed

+230
-163
lines changed

7 files changed

+230
-163
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,23 +1341,17 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVect
13411341
def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
13421342
}
13431343

1344-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1344+
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
13451345
def vpconflictdi_128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>)">;
1346-
}
1347-
1348-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1349-
def vpconflictdi_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">;
1350-
}
1351-
1352-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
13531346
def vpconflictsi_128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>)">;
13541347
}
13551348

1356-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1349+
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
1350+
def vpconflictdi_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">;
13571351
def vpconflictsi_256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>)">;
13581352
}
13591353

1360-
let Features = "avx512cd", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1354+
let Features = "avx512cd", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
13611355
def vpconflictdi_512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>)">;
13621356
def vpconflictsi_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">;
13631357
}

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3725,7 +3725,41 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
37253725
[](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) {
37263726
return llvm::APIntOps::fshr(Hi, Lo, Amt);
37273727
});
3728+
case X86::BI__builtin_ia32_vpconflictsi_128:
3729+
case X86::BI__builtin_ia32_vpconflictsi_256:
3730+
case X86::BI__builtin_ia32_vpconflictsi_512:
3731+
case X86::BI__builtin_ia32_vpconflictdi_128:
3732+
case X86::BI__builtin_ia32_vpconflictdi_256:
3733+
case X86::BI__builtin_ia32_vpconflictdi_512: {
3734+
assert(Call->getNumArgs() == 1);
3735+
3736+
QualType Arg0Type = Call->getArg(0)->getType();
3737+
const auto *VecT = Arg0Type->castAs<VectorType>();
3738+
const PrimType &ElemT = *S.getContext().classify(VecT->getElementType());
3739+
unsigned NumElems = VecT->getNumElements();
3740+
bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3741+
const Pointer &Src = S.Stk.pop<Pointer>();
3742+
const Pointer &Dst = S.Stk.peek<Pointer>();
37283743

3744+
for (unsigned I = 0; I != NumElems; ++I) {
3745+
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3746+
APSInt ElemI = Src.elem<T>(I).toAPSInt();
3747+
unsigned ConflictMask = 0;
3748+
for (unsigned J = 0; J != NumElems; ++J) {
3749+
if (I == J)
3750+
continue;
3751+
APSInt ElemJ = Src.elem<T>(J).toAPSInt();
3752+
if (ElemI == ElemJ) {
3753+
ConflictMask |= (1 << J);
3754+
}
3755+
}
3756+
Dst.elem<T>(I) = static_cast<T>(
3757+
APSInt(APInt(ElemI.getBitWidth(), ConflictMask), DestUnsigned));
3758+
});
3759+
}
3760+
Dst.initializeAllElements();
3761+
return true;
3762+
}
37293763
case clang::X86::BI__builtin_ia32_blendpd:
37303764
case clang::X86::BI__builtin_ia32_blendpd256:
37313765
case clang::X86::BI__builtin_ia32_blendps:

clang/lib/AST/ExprConstant.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12094,6 +12094,38 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1209412094

1209512095
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1209612096
}
12097+
case X86::BI__builtin_ia32_vpconflictsi_128:
12098+
case X86::BI__builtin_ia32_vpconflictsi_256:
12099+
case X86::BI__builtin_ia32_vpconflictsi_512:
12100+
case X86::BI__builtin_ia32_vpconflictdi_128:
12101+
case X86::BI__builtin_ia32_vpconflictdi_256:
12102+
case X86::BI__builtin_ia32_vpconflictdi_512: {
12103+
APValue Source;
12104+
12105+
if (!EvaluateAsRValue(Info, E->getArg(0), Source))
12106+
return false;
12107+
12108+
unsigned SourceLen = Source.getVectorLength();
12109+
SmallVector<APValue, 32> ResultElements;
12110+
ResultElements.reserve(SourceLen);
12111+
12112+
for (unsigned I = 0; I != SourceLen; ++I) {
12113+
const APValue &EltI = Source.getVectorElt(I);
12114+
12115+
unsigned ConflictMask = 0;
12116+
for (unsigned J = 0; J != SourceLen; ++J) {
12117+
const APValue &EltJ = Source.getVectorElt(J);
12118+
if (I == J)
12119+
continue;
12120+
if (EltI.getInt() == EltJ.getInt())
12121+
ConflictMask |= (1 << J);
12122+
}
12123+
12124+
APInt ConflictMaskInt(EltI.getInt().getBitWidth(), ConflictMask);
12125+
ResultElements.push_back(APValue(APSInt(ConflictMaskInt, true)));
12126+
}
12127+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
12128+
}
1209712129
case X86::BI__builtin_ia32_blendpd:
1209812130
case X86::BI__builtin_ia32_blendpd256:
1209912131
case X86::BI__builtin_ia32_blendps:

clang/lib/Headers/avx512cdintrin.h

Lines changed: 29 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -15,58 +15,50 @@
1515
#define __AVX512CDINTRIN_H
1616

1717
/* Define the default attributes for the functions in this file. */
18+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
19+
#define __DEFAULT_FN_ATTRS \
20+
constexpr __attribute__((__always_inline__, __nodebug__, \
21+
__target__("avx512cd"), __min_vector_width__(512)))
22+
#else
1823
#define __DEFAULT_FN_ATTRS \
1924
__attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), \
2025
__min_vector_width__(512)))
21-
22-
#if defined(__cplusplus) && (__cplusplus >= 201103L)
23-
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
24-
#else
25-
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
2626
#endif
2727

2828
static __inline__ __m512i __DEFAULT_FN_ATTRS
29-
_mm512_conflict_epi64 (__m512i __A)
30-
{
31-
return (__m512i) __builtin_ia32_vpconflictdi_512 ((__v8di) __A);
29+
_mm512_conflict_epi64(__m512i __A) {
30+
return (__m512i)__builtin_ia32_vpconflictdi_512((__v8di)__A);
3231
}
3332

3433
static __inline__ __m512i __DEFAULT_FN_ATTRS
35-
_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
36-
{
37-
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
38-
(__v8di)_mm512_conflict_epi64(__A),
39-
(__v8di)__W);
34+
_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
35+
return (__m512i)__builtin_ia32_selectq_512(
36+
(__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), (__v8di)__W);
4037
}
4138

4239
static __inline__ __m512i __DEFAULT_FN_ATTRS
43-
_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
44-
{
40+
_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
4541
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4642
(__v8di)_mm512_conflict_epi64(__A),
47-
(__v8di)_mm512_setzero_si512 ());
43+
(__v8di)_mm512_setzero_si512());
4844
}
4945

5046
static __inline__ __m512i __DEFAULT_FN_ATTRS
51-
_mm512_conflict_epi32 (__m512i __A)
52-
{
53-
return (__m512i) __builtin_ia32_vpconflictsi_512 ((__v16si) __A);
47+
_mm512_conflict_epi32(__m512i __A) {
48+
return (__m512i)__builtin_ia32_vpconflictsi_512((__v16si)__A);
5449
}
5550

5651
static __inline__ __m512i __DEFAULT_FN_ATTRS
57-
_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
58-
{
59-
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
60-
(__v16si)_mm512_conflict_epi32(__A),
61-
(__v16si)__W);
52+
_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
53+
return (__m512i)__builtin_ia32_selectd_512(
54+
(__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), (__v16si)__W);
6255
}
6356

6457
static __inline__ __m512i __DEFAULT_FN_ATTRS
65-
_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
66-
{
67-
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
68-
(__v16si)_mm512_conflict_epi32(__A),
69-
(__v16si)_mm512_setzero_si512());
58+
_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
59+
return (__m512i)__builtin_ia32_selectd_512(
60+
(__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A),
61+
(__v16si)_mm512_setzero_si512());
7062
}
7163

7264
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
@@ -77,9 +69,8 @@ _mm512_lzcnt_epi32(__m512i __A) {
7769

7870
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
7971
_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
80-
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
81-
(__v16si)_mm512_lzcnt_epi32(__A),
82-
(__v16si)__W);
72+
return (__m512i)__builtin_ia32_selectd_512(
73+
(__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)__W);
8374
}
8475

8576
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
@@ -97,9 +88,8 @@ _mm512_lzcnt_epi64(__m512i __A) {
9788

9889
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
9990
_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
100-
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
101-
(__v8di)_mm512_lzcnt_epi64(__A),
102-
(__v8di)__W);
91+
return (__m512i)__builtin_ia32_selectq_512(
92+
(__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), (__v8di)__W);
10393
}
10494

10595
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
@@ -110,19 +100,15 @@ _mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
110100
}
111101

112102
static __inline__ __m512i __DEFAULT_FN_ATTRS
113-
_mm512_broadcastmb_epi64 (__mmask8 __A)
114-
{
115-
return (__m512i) _mm512_set1_epi64((long long) __A);
103+
_mm512_broadcastmb_epi64(__mmask8 __A) {
104+
return (__m512i)_mm512_set1_epi64((long long)__A);
116105
}
117106

118107
static __inline__ __m512i __DEFAULT_FN_ATTRS
119-
_mm512_broadcastmw_epi32 (__mmask16 __A)
120-
{
121-
return (__m512i) _mm512_set1_epi32((int) __A);
122-
108+
_mm512_broadcastmw_epi32(__mmask16 __A) {
109+
return (__m512i)_mm512_set1_epi32((int)__A);
123110
}
124111

125112
#undef __DEFAULT_FN_ATTRS
126-
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
127113

128114
#endif

0 commit comments

Comments
 (0)