Skip to content

Commit 0126193

Browse files
authored
Merge branch 'main' into x86-bf16-cvt
2 parents 0542f5a + df80612 commit 0126193

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1608
-311
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 3 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,6 @@ let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
156156
def rcpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
157157
def rsqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
158158
def rsqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
159-
def sqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
160-
def sqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
161159
}
162160

163161
let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
@@ -170,8 +168,6 @@ let Features = "sse2", Attributes = [NoThrow] in {
170168

171169
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
172170
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
173-
def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
174-
def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
175171
def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">;
176172
def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
177173
def cvttpd2dq : X86Builtin<"_Vector<4, int>(_Vector<2, double>)">;
@@ -513,8 +509,6 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
513509
}
514510

515511
let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
516-
def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">;
517-
def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
518512
def rsqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
519513
def rcpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
520514
def roundpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
@@ -3310,15 +3304,15 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
33103304
def cvtusi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, unsigned int, _Constant int)">;
33113305
}
33123306

3313-
let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
3307+
let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
33143308
def vpmultishiftqb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
33153309
}
33163310

3317-
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
3311+
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
33183312
def vpmultishiftqb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
33193313
}
33203314

3321-
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
3315+
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
33223316
def vpmultishiftqb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
33233317
}
33243318

@@ -3535,14 +3529,6 @@ let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<1
35353529
def reducesh_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int, _Constant int)">;
35363530
}
35373531

3538-
let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
3539-
def sqrtph : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>)">;
3540-
}
3541-
3542-
let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
3543-
def sqrtph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>)">;
3544-
}
3545-
35463532
let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
35473533
def sqrtph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Constant int)">;
35483534
}
@@ -5061,15 +5047,3 @@ let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>
50615047
let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
50625048
def vgetmantbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Constant int, _Vector<32, __bf16>, unsigned int)">;
50635049
}
5064-
5065-
let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
5066-
def vsqrtbf16 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>)">;
5067-
}
5068-
5069-
let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
5070-
def vsqrtbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>)">;
5071-
}
5072-
5073-
let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
5074-
def vsqrtbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">;
5075-
}

clang/lib/AST/ByteCode/Interp.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,8 +1435,12 @@ static bool getField(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
14351435
return false;
14361436

14371437
if (Ptr.isIntegralPointer()) {
1438-
S.Stk.push<Pointer>(Ptr.asIntPointer().atOffset(S.getASTContext(), Off));
1439-
return true;
1438+
if (std::optional<IntPointer> IntPtr =
1439+
Ptr.asIntPointer().atOffset(S.getASTContext(), Off)) {
1440+
S.Stk.push<Pointer>(std::move(*IntPtr));
1441+
return true;
1442+
}
1443+
return false;
14401444
}
14411445

14421446
if (!Ptr.isBlockPointer()) {

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3531,6 +3531,60 @@ static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
35313531
return true;
35323532
}
35333533

3534+
static bool interp__builtin_ia32_multishiftqb(InterpState &S, CodePtr OpPC,
3535+
const CallExpr *Call) {
3536+
assert(Call->getNumArgs() == 2);
3537+
3538+
QualType ATy = Call->getArg(0)->getType();
3539+
QualType BTy = Call->getArg(1)->getType();
3540+
if (!ATy->isVectorType() || !BTy->isVectorType()) {
3541+
return false;
3542+
}
3543+
3544+
const Pointer &BPtr = S.Stk.pop<Pointer>();
3545+
const Pointer &APtr = S.Stk.pop<Pointer>();
3546+
const auto *AVecT = ATy->castAs<VectorType>();
3547+
assert(AVecT->getNumElements() ==
3548+
BTy->castAs<VectorType>()->getNumElements());
3549+
3550+
PrimType ElemT = *S.getContext().classify(AVecT->getElementType());
3551+
3552+
unsigned NumBytesInQWord = 8;
3553+
unsigned NumBitsInByte = 8;
3554+
unsigned NumBytes = AVecT->getNumElements();
3555+
unsigned NumQWords = NumBytes / NumBytesInQWord;
3556+
const Pointer &Dst = S.Stk.peek<Pointer>();
3557+
3558+
for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
3559+
APInt BQWord(64, 0);
3560+
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
3561+
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
3562+
INT_TYPE_SWITCH(ElemT, {
3563+
uint64_t Byte = static_cast<uint64_t>(BPtr.elem<T>(Idx));
3564+
BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
3565+
});
3566+
}
3567+
3568+
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
3569+
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
3570+
uint64_t Ctrl = 0;
3571+
INT_TYPE_SWITCH(
3572+
ElemT, { Ctrl = static_cast<uint64_t>(APtr.elem<T>(Idx)) & 0x3F; });
3573+
3574+
APInt Byte(8, 0);
3575+
for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) {
3576+
Byte.setBitVal(BitIdx, BQWord[(Ctrl + BitIdx) & 0x3F]);
3577+
}
3578+
INT_TYPE_SWITCH(ElemT,
3579+
{ Dst.elem<T>(Idx) = T::from(Byte.getZExtValue()); });
3580+
}
3581+
}
3582+
3583+
Dst.initializeAllElements();
3584+
3585+
return true;
3586+
}
3587+
35343588
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
35353589
uint32_t BuiltinID) {
35363590
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -4756,6 +4810,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
47564810
return std::make_pair(0, static_cast<int>(LaneOffset + Index));
47574811
});
47584812

4813+
case X86::BI__builtin_ia32_vpmultishiftqb128:
4814+
case X86::BI__builtin_ia32_vpmultishiftqb256:
4815+
case X86::BI__builtin_ia32_vpmultishiftqb512:
4816+
return interp__builtin_ia32_multishiftqb(S, OpPC, Call);
47594817
case X86::BI__builtin_ia32_kandqi:
47604818
case X86::BI__builtin_ia32_kandhi:
47614819
case X86::BI__builtin_ia32_kandsi:

clang/lib/AST/ByteCode/Pointer.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -895,8 +895,8 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
895895
return Result;
896896
}
897897

898-
IntPointer IntPointer::atOffset(const ASTContext &ASTCtx,
899-
unsigned Offset) const {
898+
std::optional<IntPointer> IntPointer::atOffset(const ASTContext &ASTCtx,
899+
unsigned Offset) const {
900900
if (!this->Desc)
901901
return *this;
902902
const Record *R = this->Desc->ElemRecord;
@@ -914,6 +914,9 @@ IntPointer IntPointer::atOffset(const ASTContext &ASTCtx,
914914
return *this;
915915

916916
const FieldDecl *FD = F->Decl;
917+
if (FD->getParent()->isInvalidDecl())
918+
return std::nullopt;
919+
917920
const ASTRecordLayout &Layout = ASTCtx.getASTRecordLayout(FD->getParent());
918921
unsigned FieldIndex = FD->getFieldIndex();
919922
uint64_t FieldOffset =

clang/lib/AST/ByteCode/Pointer.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ struct IntPointer {
4747
const Descriptor *Desc;
4848
uint64_t Value;
4949

50-
IntPointer atOffset(const ASTContext &ASTCtx, unsigned Offset) const;
50+
std::optional<IntPointer> atOffset(const ASTContext &ASTCtx,
51+
unsigned Offset) const;
5152
IntPointer baseCast(const ASTContext &ASTCtx, unsigned BaseOffset) const;
5253
};
5354

clang/lib/AST/ExprConstant.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13096,6 +13096,45 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1309613096
return Success(R, E);
1309713097
}
1309813098

13099+
case X86::BI__builtin_ia32_vpmultishiftqb128:
13100+
case X86::BI__builtin_ia32_vpmultishiftqb256:
13101+
case X86::BI__builtin_ia32_vpmultishiftqb512: {
13102+
assert(E->getNumArgs() == 2);
13103+
13104+
APValue A, B;
13105+
if (!Evaluate(A, Info, E->getArg(0)) || !Evaluate(B, Info, E->getArg(1)))
13106+
return false;
13107+
13108+
assert(A.getVectorLength() == B.getVectorLength());
13109+
unsigned NumBytesInQWord = 8;
13110+
unsigned NumBitsInByte = 8;
13111+
unsigned NumBytes = A.getVectorLength();
13112+
unsigned NumQWords = NumBytes / NumBytesInQWord;
13113+
SmallVector<APValue, 64> Result;
13114+
Result.reserve(NumBytes);
13115+
13116+
for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
13117+
APInt BQWord(64, 0);
13118+
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
13119+
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
13120+
uint64_t Byte = B.getVectorElt(Idx).getInt().getZExtValue();
13121+
BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
13122+
}
13123+
13124+
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
13125+
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
13126+
uint64_t Ctrl = A.getVectorElt(Idx).getInt().getZExtValue() & 0x3F;
13127+
13128+
APInt Byte(8, 0);
13129+
for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) {
13130+
Byte.setBitVal(BitIdx, BQWord[(Ctrl + BitIdx) & 0x3F]);
13131+
}
13132+
Result.push_back(APValue(APSInt(Byte, /*isUnsigned*/ true)));
13133+
}
13134+
}
13135+
return Success(APValue(Result.data(), Result.size()), E);
13136+
}
13137+
1309913138
case X86::BI__builtin_ia32_phminposuw128: {
1310013139
APValue Source;
1310113140
if (!Evaluate(Source, Info, E->getArg(0)))

clang/lib/CodeGen/TargetBuiltins/X86.cpp

Lines changed: 19 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2171,21 +2171,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
21712171
return Builder.CreateBitCast(Res, Ops[0]->getType());
21722172
}
21732173

2174-
case X86::BI__builtin_ia32_sqrtss:
2175-
case X86::BI__builtin_ia32_sqrtsd: {
2176-
Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
2177-
Function *F;
2178-
if (Builder.getIsFPConstrained()) {
2179-
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2180-
F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
2181-
A->getType());
2182-
A = Builder.CreateConstrainedFPCall(F, {A});
2183-
} else {
2184-
F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
2185-
A = Builder.CreateCall(F, {A});
2186-
}
2187-
return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
2188-
}
21892174
case X86::BI__builtin_ia32_sqrtsh_round_mask:
21902175
case X86::BI__builtin_ia32_sqrtsd_round_mask:
21912176
case X86::BI__builtin_ia32_sqrtss_round_mask: {
@@ -2225,40 +2210,29 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
22252210
A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
22262211
return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
22272212
}
2228-
case X86::BI__builtin_ia32_sqrtpd256:
2229-
case X86::BI__builtin_ia32_sqrtpd:
2230-
case X86::BI__builtin_ia32_sqrtps256:
2231-
case X86::BI__builtin_ia32_sqrtps:
2232-
case X86::BI__builtin_ia32_sqrtph256:
2233-
case X86::BI__builtin_ia32_sqrtph:
22342213
case X86::BI__builtin_ia32_sqrtph512:
2235-
case X86::BI__builtin_ia32_vsqrtbf16256:
2236-
case X86::BI__builtin_ia32_vsqrtbf16:
2237-
case X86::BI__builtin_ia32_vsqrtbf16512:
22382214
case X86::BI__builtin_ia32_sqrtps512:
22392215
case X86::BI__builtin_ia32_sqrtpd512: {
2240-
if (Ops.size() == 2) {
2241-
unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
2242-
// Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
2243-
// otherwise keep the intrinsic.
2244-
if (CC != 4) {
2245-
Intrinsic::ID IID;
2246-
2247-
switch (BuiltinID) {
2248-
default:
2249-
llvm_unreachable("Unsupported intrinsic!");
2250-
case X86::BI__builtin_ia32_sqrtph512:
2251-
IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
2252-
break;
2253-
case X86::BI__builtin_ia32_sqrtps512:
2254-
IID = Intrinsic::x86_avx512_sqrt_ps_512;
2255-
break;
2256-
case X86::BI__builtin_ia32_sqrtpd512:
2257-
IID = Intrinsic::x86_avx512_sqrt_pd_512;
2258-
break;
2259-
}
2260-
return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
2216+
unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
2217+
// Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
2218+
// otherwise keep the intrinsic.
2219+
if (CC != 4) {
2220+
Intrinsic::ID IID;
2221+
2222+
switch (BuiltinID) {
2223+
default:
2224+
llvm_unreachable("Unsupported intrinsic!");
2225+
case X86::BI__builtin_ia32_sqrtph512:
2226+
IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
2227+
break;
2228+
case X86::BI__builtin_ia32_sqrtps512:
2229+
IID = Intrinsic::x86_avx512_sqrt_ps_512;
2230+
break;
2231+
case X86::BI__builtin_ia32_sqrtpd512:
2232+
IID = Intrinsic::x86_avx512_sqrt_pd_512;
2233+
break;
22612234
}
2235+
return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
22622236
}
22632237
if (Builder.getIsFPConstrained()) {
22642238
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);

clang/lib/Headers/avx10_2_512bf16intrin.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ _mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) {
429429
(__v32bf)_mm512_setzero_pbh(), (__mmask32)(__U)))
430430

431431
static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sqrt_pbh(__m512bh __A) {
432-
return (__m512bh)__builtin_ia32_vsqrtbf16512((__v32bf)__A);
432+
return __builtin_elementwise_sqrt(__A);
433433
}
434434

435435
static __inline__ __m512bh __DEFAULT_FN_ATTRS512

clang/lib/Headers/avx10_2bf16intrin.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -826,7 +826,7 @@ _mm_maskz_rsqrt_pbh(__mmask8 __U, __m128bh __A) {
826826
(__v8bf)_mm_setzero_pbh(), (__mmask8)(__U)))
827827

828828
static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_sqrt_pbh(__m256bh __A) {
829-
return (__m256bh)__builtin_ia32_vsqrtbf16256((__v16bf)__A);
829+
return __builtin_elementwise_sqrt(__A);
830830
}
831831

832832
static __inline__ __m256bh __DEFAULT_FN_ATTRS256
@@ -843,7 +843,7 @@ _mm256_maskz_sqrt_pbh(__mmask16 __U, __m256bh __A) {
843843
}
844844

845845
static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_sqrt_pbh(__m128bh __A) {
846-
return (__m128bh)__builtin_ia32_vsqrtbf16((__v8bf)__A);
846+
return __builtin_elementwise_sqrt(__A);
847847
}
848848

849849
static __inline__ __m128bh __DEFAULT_FN_ATTRS128

0 commit comments

Comments
 (0)