Skip to content

Commit 06d5e73

Browse files
authored
Merge branch 'main' into cir_x86_avx512_mask_builtin_lowering
2 parents c3a6337 + 0b16512 commit 06d5e73

31 files changed

+691
-369
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3310,15 +3310,15 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
33103310
def cvtusi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, unsigned int, _Constant int)">;
33113311
}
33123312

3313-
let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
3313+
let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
33143314
def vpmultishiftqb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
33153315
}
33163316

3317-
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
3317+
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
33183318
def vpmultishiftqb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
33193319
}
33203320

3321-
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
3321+
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
33223322
def vpmultishiftqb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
33233323
}
33243324

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3531,6 +3531,60 @@ static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
35313531
return true;
35323532
}
35333533

3534+
static bool interp__builtin_ia32_multishiftqb(InterpState &S, CodePtr OpPC,
3535+
const CallExpr *Call) {
3536+
assert(Call->getNumArgs() == 2);
3537+
3538+
QualType ATy = Call->getArg(0)->getType();
3539+
QualType BTy = Call->getArg(1)->getType();
3540+
if (!ATy->isVectorType() || !BTy->isVectorType()) {
3541+
return false;
3542+
}
3543+
3544+
const Pointer &BPtr = S.Stk.pop<Pointer>();
3545+
const Pointer &APtr = S.Stk.pop<Pointer>();
3546+
const auto *AVecT = ATy->castAs<VectorType>();
3547+
assert(AVecT->getNumElements() ==
3548+
BTy->castAs<VectorType>()->getNumElements());
3549+
3550+
PrimType ElemT = *S.getContext().classify(AVecT->getElementType());
3551+
3552+
unsigned NumBytesInQWord = 8;
3553+
unsigned NumBitsInByte = 8;
3554+
unsigned NumBytes = AVecT->getNumElements();
3555+
unsigned NumQWords = NumBytes / NumBytesInQWord;
3556+
const Pointer &Dst = S.Stk.peek<Pointer>();
3557+
3558+
for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
3559+
APInt BQWord(64, 0);
3560+
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
3561+
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
3562+
INT_TYPE_SWITCH(ElemT, {
3563+
uint64_t Byte = static_cast<uint64_t>(BPtr.elem<T>(Idx));
3564+
BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
3565+
});
3566+
}
3567+
3568+
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
3569+
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
3570+
uint64_t Ctrl = 0;
3571+
INT_TYPE_SWITCH(
3572+
ElemT, { Ctrl = static_cast<uint64_t>(APtr.elem<T>(Idx)) & 0x3F; });
3573+
3574+
APInt Byte(8, 0);
3575+
for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) {
3576+
Byte.setBitVal(BitIdx, BQWord[(Ctrl + BitIdx) & 0x3F]);
3577+
}
3578+
INT_TYPE_SWITCH(ElemT,
3579+
{ Dst.elem<T>(Idx) = T::from(Byte.getZExtValue()); });
3580+
}
3581+
}
3582+
3583+
Dst.initializeAllElements();
3584+
3585+
return true;
3586+
}
3587+
35343588
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
35353589
uint32_t BuiltinID) {
35363590
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -4756,6 +4810,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
47564810
return std::make_pair(0, static_cast<int>(LaneOffset + Index));
47574811
});
47584812

4813+
case X86::BI__builtin_ia32_vpmultishiftqb128:
4814+
case X86::BI__builtin_ia32_vpmultishiftqb256:
4815+
case X86::BI__builtin_ia32_vpmultishiftqb512:
4816+
return interp__builtin_ia32_multishiftqb(S, OpPC, Call);
47594817
case X86::BI__builtin_ia32_kandqi:
47604818
case X86::BI__builtin_ia32_kandhi:
47614819
case X86::BI__builtin_ia32_kandsi:

clang/lib/AST/ExprConstant.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13096,6 +13096,45 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1309613096
return Success(R, E);
1309713097
}
1309813098

13099+
case X86::BI__builtin_ia32_vpmultishiftqb128:
13100+
case X86::BI__builtin_ia32_vpmultishiftqb256:
13101+
case X86::BI__builtin_ia32_vpmultishiftqb512: {
13102+
assert(E->getNumArgs() == 2);
13103+
13104+
APValue A, B;
13105+
if (!Evaluate(A, Info, E->getArg(0)) || !Evaluate(B, Info, E->getArg(1)))
13106+
return false;
13107+
13108+
assert(A.getVectorLength() == B.getVectorLength());
13109+
unsigned NumBytesInQWord = 8;
13110+
unsigned NumBitsInByte = 8;
13111+
unsigned NumBytes = A.getVectorLength();
13112+
unsigned NumQWords = NumBytes / NumBytesInQWord;
13113+
SmallVector<APValue, 64> Result;
13114+
Result.reserve(NumBytes);
13115+
13116+
for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
13117+
APInt BQWord(64, 0);
13118+
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
13119+
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
13120+
uint64_t Byte = B.getVectorElt(Idx).getInt().getZExtValue();
13121+
BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
13122+
}
13123+
13124+
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
13125+
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
13126+
uint64_t Ctrl = A.getVectorElt(Idx).getInt().getZExtValue() & 0x3F;
13127+
13128+
APInt Byte(8, 0);
13129+
for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) {
13130+
Byte.setBitVal(BitIdx, BQWord[(Ctrl + BitIdx) & 0x3F]);
13131+
}
13132+
Result.push_back(APValue(APSInt(Byte, /*isUnsigned*/ true)));
13133+
}
13134+
}
13135+
return Success(APValue(Result.data(), Result.size()), E);
13136+
}
13137+
1309913138
case X86::BI__builtin_ia32_phminposuw128: {
1310013139
APValue Source;
1310113140
if (!Evaluate(Source, Info, E->getArg(0)))

clang/lib/Headers/avx512vbmiintrin.h

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,61 +15,57 @@
1515
#define __VBMIINTRIN_H
1616

1717
/* Define the default attributes for the functions in this file. */
18+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
1819
#define __DEFAULT_FN_ATTRS \
1920
__attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), \
20-
__min_vector_width__(512)))
21-
22-
#if defined(__cplusplus) && (__cplusplus >= 201103L)
23-
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
21+
__min_vector_width__(512))) constexpr
2422
#else
25-
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
23+
#define __DEFAULT_FN_ATTRS \
24+
__attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), \
25+
__min_vector_width__(512)))
2626
#endif
2727

28-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
28+
static __inline__ __m512i __DEFAULT_FN_ATTRS
2929
_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B) {
3030
return (__m512i)__builtin_ia32_vpermi2varqi512((__v64qi)__A, (__v64qi)__I,
3131
(__v64qi) __B);
3232
}
3333

34-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
35-
_mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __m512i __I,
36-
__m512i __B) {
34+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_epi8(
35+
__m512i __A, __mmask64 __U, __m512i __I, __m512i __B) {
3736
return (__m512i)__builtin_ia32_selectb_512(__U,
3837
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
3938
(__v64qi)__A);
4039
}
4140

42-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
43-
_mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U,
44-
__m512i __B) {
41+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_epi8(
42+
__m512i __A, __m512i __I, __mmask64 __U, __m512i __B) {
4543
return (__m512i)__builtin_ia32_selectb_512(__U,
4644
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
4745
(__v64qi)__I);
4846
}
4947

50-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
51-
_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I,
52-
__m512i __B) {
48+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_epi8(
49+
__mmask64 __U, __m512i __A, __m512i __I, __m512i __B) {
5350
return (__m512i)__builtin_ia32_selectb_512(__U,
5451
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
5552
(__v64qi)_mm512_setzero_si512());
5653
}
5754

58-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
55+
static __inline__ __m512i __DEFAULT_FN_ATTRS
5956
_mm512_permutexvar_epi8(__m512i __A, __m512i __B) {
6057
return (__m512i)__builtin_ia32_permvarqi512((__v64qi) __B, (__v64qi) __A);
6158
}
6259

63-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
60+
static __inline__ __m512i __DEFAULT_FN_ATTRS
6461
_mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
6562
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
6663
(__v64qi)_mm512_permutexvar_epi8(__A, __B),
6764
(__v64qi)_mm512_setzero_si512());
6865
}
6966

70-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
71-
_mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M, __m512i __A,
72-
__m512i __B) {
67+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_epi8(
68+
__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
7369
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
7470
(__v64qi)_mm512_permutexvar_epi8(__A, __B),
7571
(__v64qi)__W);
@@ -97,6 +93,6 @@ _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y)
9793
(__v64qi)_mm512_multishift_epi64_epi8(__X, __Y),
9894
(__v64qi)_mm512_setzero_si512());
9995
}
100-
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
96+
10197
#undef __DEFAULT_FN_ATTRS
10298
#endif

clang/lib/Headers/avx512vbmivlintrin.h

Lines changed: 32 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,16 @@
1515
#define __VBMIVLINTRIN_H
1616

1717
/* Define the default attributes for the functions in this file. */
18+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
19+
#define __DEFAULT_FN_ATTRS128 \
20+
__attribute__((__always_inline__, __nodebug__, \
21+
__target__("avx512vbmi,avx512vl"), \
22+
__min_vector_width__(128))) constexpr
23+
#define __DEFAULT_FN_ATTRS256 \
24+
__attribute__((__always_inline__, __nodebug__, \
25+
__target__("avx512vbmi,avx512vl"), \
26+
__min_vector_width__(256))) constexpr
27+
#else
1828
#define __DEFAULT_FN_ATTRS128 \
1929
__attribute__((__always_inline__, __nodebug__, \
2030
__target__("avx512vbmi,avx512vl"), \
@@ -23,111 +33,96 @@
2333
__attribute__((__always_inline__, __nodebug__, \
2434
__target__("avx512vbmi,avx512vl"), \
2535
__min_vector_width__(256)))
26-
27-
#if defined(__cplusplus) && (__cplusplus >= 201103L)
28-
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
29-
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
30-
#else
31-
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
32-
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
3336
#endif
3437

35-
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
38+
static __inline__ __m128i __DEFAULT_FN_ATTRS128
3639
_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) {
3740
return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A,
3841
(__v16qi)__I,
3942
(__v16qi)__B);
4043
}
4144

42-
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
43-
_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I,
44-
__m128i __B) {
45+
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi8(
46+
__m128i __A, __mmask16 __U, __m128i __I, __m128i __B) {
4547
return (__m128i)__builtin_ia32_selectb_128(__U,
4648
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
4749
(__v16qi)__A);
4850
}
4951

50-
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
51-
_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U,
52-
__m128i __B) {
52+
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi8(
53+
__m128i __A, __m128i __I, __mmask16 __U, __m128i __B) {
5354
return (__m128i)__builtin_ia32_selectb_128(__U,
5455
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
5556
(__v16qi)__I);
5657
}
5758

58-
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
59-
_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
60-
__m128i __B) {
59+
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi8(
60+
__mmask16 __U, __m128i __A, __m128i __I, __m128i __B) {
6161
return (__m128i)__builtin_ia32_selectb_128(__U,
6262
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
6363
(__v16qi)_mm_setzero_si128());
6464
}
6565

66-
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
66+
static __inline__ __m256i __DEFAULT_FN_ATTRS256
6767
_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) {
6868
return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I,
6969
(__v32qi)__B);
7070
}
7171

72-
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
73-
_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I,
74-
__m256i __B) {
72+
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi8(
73+
__m256i __A, __mmask32 __U, __m256i __I, __m256i __B) {
7574
return (__m256i)__builtin_ia32_selectb_256(__U,
7675
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
7776
(__v32qi)__A);
7877
}
7978

80-
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
81-
_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U,
82-
__m256i __B) {
79+
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi8(
80+
__m256i __A, __m256i __I, __mmask32 __U, __m256i __B) {
8381
return (__m256i)__builtin_ia32_selectb_256(__U,
8482
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
8583
(__v32qi)__I);
8684
}
8785

88-
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
89-
_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I,
90-
__m256i __B) {
86+
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi8(
87+
__mmask32 __U, __m256i __A, __m256i __I, __m256i __B) {
9188
return (__m256i)__builtin_ia32_selectb_256(__U,
9289
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
9390
(__v32qi)_mm256_setzero_si256());
9491
}
9592

96-
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
93+
static __inline__ __m128i __DEFAULT_FN_ATTRS128
9794
_mm_permutexvar_epi8(__m128i __A, __m128i __B) {
9895
return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A);
9996
}
10097

101-
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
98+
static __inline__ __m128i __DEFAULT_FN_ATTRS128
10299
_mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A, __m128i __B) {
103100
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
104101
(__v16qi)_mm_permutexvar_epi8(__A, __B),
105102
(__v16qi)_mm_setzero_si128());
106103
}
107104

108-
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
109-
_mm_mask_permutexvar_epi8(__m128i __W, __mmask16 __M, __m128i __A,
110-
__m128i __B) {
105+
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi8(
106+
__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) {
111107
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
112108
(__v16qi)_mm_permutexvar_epi8(__A, __B),
113109
(__v16qi)__W);
114110
}
115111

116-
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
112+
static __inline__ __m256i __DEFAULT_FN_ATTRS256
117113
_mm256_permutexvar_epi8(__m256i __A, __m256i __B) {
118114
return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A);
119115
}
120116

121-
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
117+
static __inline__ __m256i __DEFAULT_FN_ATTRS256
122118
_mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A, __m256i __B) {
123119
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
124120
(__v32qi)_mm256_permutexvar_epi8(__A, __B),
125121
(__v32qi)_mm256_setzero_si256());
126122
}
127123

128-
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
129-
_mm256_mask_permutexvar_epi8(__m256i __W, __mmask32 __M, __m256i __A,
130-
__m256i __B) {
124+
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi8(
125+
__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) {
131126
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
132127
(__v32qi)_mm256_permutexvar_epi8(__A, __B),
133128
(__v32qi)__W);
@@ -179,9 +174,6 @@ _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y)
179174
(__v32qi)_mm256_setzero_si256());
180175
}
181176

182-
#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
183-
#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
184177
#undef __DEFAULT_FN_ATTRS128
185178
#undef __DEFAULT_FN_ATTRS256
186-
187179
#endif

0 commit comments

Comments
 (0)