Skip to content

Commit ce5276f

Browse files
authored
[Clang][X86] Add avx512 __builtin_ia32_select* constexpr handling (#152705)
This should allow us to constexpr many avx512 predicated intrinsics where they wrap basic intrinsics that are already constexpr Fixes #152321
1 parent ff68f71 commit ce5276f

14 files changed

+231
-206
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4130,99 +4130,99 @@ let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVecto
41304130
def vfcmulcph512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
41314131
}
41324132

4133-
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
4133+
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
41344134
def selectb_128 : X86Builtin<"_Vector<16, char>(unsigned short, _Vector<16, char>, _Vector<16, char>)">;
41354135
}
41364136

4137-
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
4137+
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
41384138
def selectb_256 : X86Builtin<"_Vector<32, char>(unsigned int, _Vector<32, char>, _Vector<32, char>)">;
41394139
}
41404140

4141-
let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
4141+
let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
41424142
def selectb_512 : X86Builtin<"_Vector<64, char>(unsigned long long int, _Vector<64, char>, _Vector<64, char>)">;
41434143
}
41444144

4145-
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
4145+
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
41464146
def selectw_128 : X86Builtin<"_Vector<8, short>(unsigned char, _Vector<8, short>, _Vector<8, short>)">;
41474147
}
41484148

4149-
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
4149+
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
41504150
def selectw_256 : X86Builtin<"_Vector<16, short>(unsigned short, _Vector<16, short>, _Vector<16, short>)">;
41514151
}
41524152

4153-
let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
4153+
let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
41544154
def selectw_512 : X86Builtin<"_Vector<32, short>(unsigned int, _Vector<32, short>, _Vector<32, short>)">;
41554155
}
41564156

4157-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
4157+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
41584158
def selectd_128 : X86Builtin<"_Vector<4, int>(unsigned char, _Vector<4, int>, _Vector<4, int>)">;
41594159
}
41604160

4161-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
4161+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
41624162
def selectd_256 : X86Builtin<"_Vector<8, int>(unsigned char, _Vector<8, int>, _Vector<8, int>)">;
41634163
}
41644164

4165-
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
4165+
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
41664166
def selectd_512 : X86Builtin<"_Vector<16, int>(unsigned short, _Vector<16, int>, _Vector<16, int>)">;
41674167
}
41684168

4169-
let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
4169+
let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
41704170
def selectph_128 : X86Builtin<"_Vector<8, _Float16>(unsigned char, _Vector<8, _Float16>, _Vector<8, _Float16>)">;
41714171
}
41724172

4173-
let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
4173+
let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
41744174
def selectph_256 : X86Builtin<"_Vector<16, _Float16>(unsigned short, _Vector<16, _Float16>, _Vector<16, _Float16>)">;
41754175
}
41764176

4177-
let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
4177+
let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
41784178
def selectph_512 : X86Builtin<"_Vector<32, _Float16>(unsigned int, _Vector<32, _Float16>, _Vector<32, _Float16>)">;
41794179
}
41804180

4181-
let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
4181+
let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
41824182
def selectpbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
41834183
}
41844184

4185-
let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
4185+
let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
41864186
def selectpbf_256 : X86Builtin<"_Vector<16, __bf16>(unsigned short, _Vector<16, __bf16>, _Vector<16, __bf16>)">;
41874187
}
41884188

4189-
let Features = "avx512bf16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
4189+
let Features = "avx512bf16,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
41904190
def selectpbf_512 : X86Builtin<"_Vector<32, __bf16>(unsigned int, _Vector<32, __bf16>, _Vector<32, __bf16>)">;
41914191
}
41924192

4193-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
4193+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
41944194
def selectq_128 : X86Builtin<"_Vector<2, long long int>(unsigned char, _Vector<2, long long int>, _Vector<2, long long int>)">;
41954195
}
41964196

4197-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
4197+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
41984198
def selectq_256 : X86Builtin<"_Vector<4, long long int>(unsigned char, _Vector<4, long long int>, _Vector<4, long long int>)">;
41994199
}
42004200

4201-
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
4201+
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
42024202
def selectq_512 : X86Builtin<"_Vector<8, long long int>(unsigned char, _Vector<8, long long int>, _Vector<8, long long int>)">;
42034203
}
42044204

4205-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
4205+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
42064206
def selectps_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">;
42074207
}
42084208

4209-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
4209+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
42104210
def selectps_256 : X86Builtin<"_Vector<8, float>(unsigned char, _Vector<8, float>, _Vector<8, float>)">;
42114211
}
42124212

4213-
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
4213+
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
42144214
def selectps_512 : X86Builtin<"_Vector<16, float>(unsigned short, _Vector<16, float>, _Vector<16, float>)">;
42154215
}
42164216

4217-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
4217+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
42184218
def selectpd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">;
42194219
}
42204220

4221-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
4221+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
42224222
def selectpd_256 : X86Builtin<"_Vector<4, double>(unsigned char, _Vector<4, double>, _Vector<4, double>)">;
42234223
}
42244224

4225-
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
4225+
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
42264226
def selectpd_512 : X86Builtin<"_Vector<8, double>(unsigned char, _Vector<8, double>, _Vector<8, double>)">;
42274227
}
42284228

clang/lib/AST/ExprConstant.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11783,6 +11783,51 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1178311783

1178411784
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1178511785
}
11786+
case X86::BI__builtin_ia32_selectb_128:
11787+
case X86::BI__builtin_ia32_selectb_256:
11788+
case X86::BI__builtin_ia32_selectb_512:
11789+
case X86::BI__builtin_ia32_selectw_128:
11790+
case X86::BI__builtin_ia32_selectw_256:
11791+
case X86::BI__builtin_ia32_selectw_512:
11792+
case X86::BI__builtin_ia32_selectd_128:
11793+
case X86::BI__builtin_ia32_selectd_256:
11794+
case X86::BI__builtin_ia32_selectd_512:
11795+
case X86::BI__builtin_ia32_selectq_128:
11796+
case X86::BI__builtin_ia32_selectq_256:
11797+
case X86::BI__builtin_ia32_selectq_512:
11798+
case X86::BI__builtin_ia32_selectph_128:
11799+
case X86::BI__builtin_ia32_selectph_256:
11800+
case X86::BI__builtin_ia32_selectph_512:
11801+
case X86::BI__builtin_ia32_selectpbf_128:
11802+
case X86::BI__builtin_ia32_selectpbf_256:
11803+
case X86::BI__builtin_ia32_selectpbf_512:
11804+
case X86::BI__builtin_ia32_selectps_128:
11805+
case X86::BI__builtin_ia32_selectps_256:
11806+
case X86::BI__builtin_ia32_selectps_512:
11807+
case X86::BI__builtin_ia32_selectpd_128:
11808+
case X86::BI__builtin_ia32_selectpd_256:
11809+
case X86::BI__builtin_ia32_selectpd_512: {
11810+
// AVX512 predicated move: "Result = Mask[] ? LHS[] : RHS[]".
11811+
APValue SourceMask, SourceLHS, SourceRHS;
11812+
if (!EvaluateAsRValue(Info, E->getArg(0), SourceMask) ||
11813+
!EvaluateAsRValue(Info, E->getArg(1), SourceLHS) ||
11814+
!EvaluateAsRValue(Info, E->getArg(2), SourceRHS))
11815+
return false;
11816+
11817+
APSInt Mask = SourceMask.getInt();
11818+
QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
11819+
unsigned SourceLen = SourceLHS.getVectorLength();
11820+
SmallVector<APValue, 4> ResultElements;
11821+
ResultElements.reserve(SourceLen);
11822+
11823+
for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
11824+
const APValue &LHS = SourceLHS.getVectorElt(EltNum);
11825+
const APValue &RHS = SourceRHS.getVectorElt(EltNum);
11826+
ResultElements.push_back(Mask[EltNum] ? LHS : RHS);
11827+
}
11828+
11829+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
11830+
}
1178611831
}
1178711832
}
1178811833

clang/lib/Headers/avx512bitalgintrin.h

Lines changed: 16 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -27,47 +27,35 @@
2727
#endif
2828

2929
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
30-
_mm512_popcnt_epi16(__m512i __A)
31-
{
30+
_mm512_popcnt_epi16(__m512i __A) {
3231
return (__m512i)__builtin_elementwise_popcount((__v32hu)__A);
3332
}
3433

35-
static __inline__ __m512i __DEFAULT_FN_ATTRS
36-
_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B)
37-
{
38-
return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U,
39-
(__v32hi) _mm512_popcnt_epi16(__B),
40-
(__v32hi) __A);
34+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
35+
_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) {
36+
return (__m512i)__builtin_ia32_selectw_512(
37+
(__mmask32)__U, (__v32hi)_mm512_popcnt_epi16(__B), (__v32hi)__A);
4138
}
4239

43-
static __inline__ __m512i __DEFAULT_FN_ATTRS
44-
_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
45-
{
46-
return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_si512(),
47-
__U,
48-
__B);
40+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
41+
_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) {
42+
return _mm512_mask_popcnt_epi16((__m512i)_mm512_setzero_si512(), __U, __B);
4943
}
5044

5145
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
52-
_mm512_popcnt_epi8(__m512i __A)
53-
{
46+
_mm512_popcnt_epi8(__m512i __A) {
5447
return (__m512i)__builtin_elementwise_popcount((__v64qu)__A);
5548
}
5649

57-
static __inline__ __m512i __DEFAULT_FN_ATTRS
58-
_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B)
59-
{
60-
return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U,
61-
(__v64qi) _mm512_popcnt_epi8(__B),
62-
(__v64qi) __A);
50+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
51+
_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) {
52+
return (__m512i)__builtin_ia32_selectb_512(
53+
(__mmask64)__U, (__v64qi)_mm512_popcnt_epi8(__B), (__v64qi)__A);
6354
}
6455

65-
static __inline__ __m512i __DEFAULT_FN_ATTRS
66-
_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B)
67-
{
68-
return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_si512(),
69-
__U,
70-
__B);
56+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
57+
_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) {
58+
return _mm512_mask_popcnt_epi8((__m512i)_mm512_setzero_si512(), __U, __B);
7159
}
7260

7361
static __inline__ __mmask64 __DEFAULT_FN_ATTRS

clang/lib/Headers/avx512fintrin.h

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8933,36 +8933,28 @@ _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
89338933
return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
89348934
}
89358935

8936-
static __inline__ __m512d __DEFAULT_FN_ATTRS512
8937-
_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
8938-
{
8939-
return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8940-
(__v8df) __A,
8941-
(__v8df) __W);
8936+
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8937+
_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) {
8938+
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8939+
(__v8df)__W);
89428940
}
89438941

8944-
static __inline__ __m512d __DEFAULT_FN_ATTRS512
8945-
_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
8946-
{
8947-
return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8948-
(__v8df) __A,
8949-
(__v8df) _mm512_setzero_pd ());
8942+
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
8943+
_mm512_maskz_mov_pd(__mmask8 __U, __m512d __A) {
8944+
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
8945+
(__v8df)_mm512_setzero_pd());
89508946
}
89518947

8952-
static __inline__ __m512 __DEFAULT_FN_ATTRS512
8953-
_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
8954-
{
8955-
return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8956-
(__v16sf) __A,
8957-
(__v16sf) __W);
8948+
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8949+
_mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A) {
8950+
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8951+
(__v16sf)__W);
89588952
}
89598953

8960-
static __inline__ __m512 __DEFAULT_FN_ATTRS512
8961-
_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
8962-
{
8963-
return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8964-
(__v16sf) __A,
8965-
(__v16sf) _mm512_setzero_ps ());
8954+
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
8955+
_mm512_maskz_mov_ps(__mmask16 __U, __m512 __A) {
8956+
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
8957+
(__v16sf)_mm512_setzero_ps());
89668958
}
89678959

89688960
static __inline__ void __DEFAULT_FN_ATTRS512

0 commit comments

Comments
 (0)