Skip to content

Commit f8b9446

Browse files
committed
Merge branch 'main' of https://github.com/llvm/llvm-project into fix/163090
2 parents 138c174 + 4ad625b commit f8b9446

File tree

103 files changed

+2565
-960
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

103 files changed

+2565
-960
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,7 @@ X86 Support
517517
driver.
518518
- Remove `[no-]evex512` feature request from intrinsics and builtins.
519519
- Change features `avx10.x-[256,512]` to `avx10.x`.
520+
- `-march=wildcatlake` is now supported.
520521

521522
Arm and AArch64 Support
522523
^^^^^^^^^^^^^^^^^^^^^^^

clang/docs/UsersManual.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2325,7 +2325,7 @@ are listed below.
23252325
devirtualization and virtual constant propagation, for classes with
23262326
:doc:`hidden LTO visibility <LTOVisibility>`. Requires ``-flto``.
23272327

2328-
.. option:: -f[no]split-lto-unit
2328+
.. option:: -f[no-]split-lto-unit
23292329

23302330
Controls splitting the :doc:`LTO unit <LTOVisibility>` into regular LTO and
23312331
:doc:`ThinLTO` portions, when compiling with -flto=thin. Defaults to false
@@ -2518,7 +2518,7 @@ are listed below.
25182518

25192519
.. _funique_internal_linkage_names:
25202520

2521-
.. option:: -f[no]-unique-internal-linkage-names
2521+
.. option:: -f[no-]unique-internal-linkage-names
25222522

25232523
Controls whether Clang emits a unique (best-effort) symbol name for internal
25242524
linkage symbols. When this option is set, compiler hashes the main source
@@ -2539,7 +2539,7 @@ are listed below.
25392539
$ cd $P/bar && clang -c -funique-internal-linkage-names name_conflict.c
25402540
$ cd $P && clang foo/name_conflict.o && bar/name_conflict.o
25412541
2542-
.. option:: -f[no]-basic-block-address-map:
2542+
.. option:: -f[no-]basic-block-address-map:
25432543
Emits a ``SHT_LLVM_BB_ADDR_MAP`` section which includes address offsets for each
25442544
basic block in the program, relative to the parent function address.
25452545

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,14 +125,14 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
125125

126126
let Features = "ssse3" in {
127127
def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
128-
def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
129128
def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
130129
def psignw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
131130
def psignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
132131
}
133132

134133
let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
135134
def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">;
135+
def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
136136
}
137137
}
138138

@@ -610,7 +610,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
610610
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
611611
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
612612
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
613-
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
614613
def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
615614
def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
616615
def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
@@ -649,6 +648,8 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
649648
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
650649
def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
651650

651+
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
652+
652653
def psllwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
653654
def pslldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
654655
def psllqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
@@ -1347,14 +1348,15 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
13471348

13481349
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
13491350
def ucmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
1350-
def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
13511351
}
13521352

13531353
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
13541354
def packsswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
13551355
def packssdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
13561356
def packuswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
13571357
def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
1358+
1359+
def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
13581360
}
13591361

13601362
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2790,6 +2790,34 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
27902790
return true;
27912791
}
27922792

2793+
static bool interp__builtin_ia32_pshufb(InterpState &S, CodePtr OpPC,
2794+
const CallExpr *Call) {
2795+
assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
2796+
const Pointer &Control = S.Stk.pop<Pointer>();
2797+
const Pointer &Src = S.Stk.pop<Pointer>();
2798+
const Pointer &Dst = S.Stk.peek<Pointer>();
2799+
2800+
unsigned NumElems = Dst.getNumElems();
2801+
assert(NumElems == Control.getNumElems());
2802+
assert(NumElems == Dst.getNumElems());
2803+
2804+
for (unsigned Idx = 0; Idx != NumElems; ++Idx) {
2805+
uint8_t Ctlb = static_cast<uint8_t>(Control.elem<int8_t>(Idx));
2806+
2807+
if (Ctlb & 0x80) {
2808+
Dst.elem<int8_t>(Idx) = 0;
2809+
} else {
2810+
unsigned LaneBase = (Idx / 16) * 16;
2811+
unsigned SrcOffset = Ctlb & 0x0F;
2812+
unsigned SrcIdx = LaneBase + SrcOffset;
2813+
2814+
Dst.elem<int8_t>(Idx) = Src.elem<int8_t>(SrcIdx);
2815+
}
2816+
}
2817+
Dst.initializeAllElements();
2818+
return true;
2819+
}
2820+
27932821
static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
27942822
const CallExpr *Call, bool IsShufHW) {
27952823
assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
@@ -3943,6 +3971,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
39433971
case X86::BI__builtin_ia32_selectpd_512:
39443972
return interp__builtin_select(S, OpPC, Call);
39453973

3974+
case X86::BI__builtin_ia32_pshufb128:
3975+
case X86::BI__builtin_ia32_pshufb256:
3976+
case X86::BI__builtin_ia32_pshufb512:
3977+
return interp__builtin_ia32_pshufb(S, OpPC, Call);
3978+
39463979
case X86::BI__builtin_ia32_pshuflw:
39473980
case X86::BI__builtin_ia32_pshuflw256:
39483981
case X86::BI__builtin_ia32_pshuflw512:

clang/lib/AST/DeclBase.cpp

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,11 @@ void *Decl::operator new(std::size_t Size, const ASTContext &Context,
7777
*PrefixPtr = ID.getRawValue();
7878

7979
// We leave the upper 16 bits to store the module IDs. 48 bits should be
80-
// sufficient to store a declaration ID.
81-
assert(*PrefixPtr < llvm::maskTrailingOnes<uint64_t>(48));
80+
// sufficient to store a declaration ID. See the comments in setOwningModuleID
81+
// for details.
82+
assert((*PrefixPtr < llvm::maskTrailingOnes<uint64_t>(48)) &&
83+
"Current Implementation limits the number of module files to not "
84+
"exceed 2^16. Contact Clang Developers to remove the limitation.");
8285

8386
return Result;
8487
}
@@ -122,6 +125,25 @@ unsigned Decl::getOwningModuleID() const {
122125

123126
void Decl::setOwningModuleID(unsigned ID) {
124127
assert(isFromASTFile() && "Only works on a deserialized declaration");
128+
// Currently, we use 64 bits to store the GlobalDeclID and the module ID
129+
// to save the space. See `Decl::operator new` for details. To make it,
130+
// we split the higher 32 bits to 2 16bits for the module file index of
131+
// GlobalDeclID and the module ID. This introduces a limitation that the
132+
// number of modules can't exceed 2^16. (The number of module files should be
133+
// less than the number of modules).
134+
//
135+
// It is counter-intuitive to store both the module file index and the
136+
// module ID as it seems redundant. However, this is not true.
137+
// The module ID may be different from the module file where it is serialized
138+
// from for implicit template instantiations. See
139+
// https://github.com/llvm/llvm-project/issues/101939
140+
//
141+
// If we reach the limitation, we have to remove the limitation by asking
142+
// every deserialized declaration to pay for yet another 32 bits, or we have
143+
// to review the above issue to decide what we should do for it.
144+
assert((ID < llvm::maskTrailingOnes<unsigned>(16)) &&
145+
"Current Implementation limits the number of modules to not exceed "
146+
"2^16. Contact Clang Developers to remove the limitation.");
125147
uint64_t *IDAddress = (uint64_t *)this - 1;
126148
*IDAddress &= llvm::maskTrailingOnes<uint64_t>(48);
127149
*IDAddress |= (uint64_t)ID << 48;

clang/lib/AST/ExprConstant.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11619,6 +11619,44 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
1161911619
return true;
1162011620
}
1162111621

11622+
static bool evalPshufbBuiltin(EvalInfo &Info, const CallExpr *Call,
11623+
APValue &Out) {
11624+
APValue SrcVec, ControlVec;
11625+
if (!EvaluateAsRValue(Info, Call->getArg(0), SrcVec))
11626+
return false;
11627+
if (!EvaluateAsRValue(Info, Call->getArg(1), ControlVec))
11628+
return false;
11629+
11630+
const auto *VT = Call->getType()->getAs<VectorType>();
11631+
if (!VT)
11632+
return false;
11633+
11634+
QualType ElemT = VT->getElementType();
11635+
unsigned NumElts = VT->getNumElements();
11636+
11637+
SmallVector<APValue, 64> ResultElements;
11638+
ResultElements.reserve(NumElts);
11639+
11640+
for (unsigned Idx = 0; Idx != NumElts; ++Idx) {
11641+
APValue CtlVal = ControlVec.getVectorElt(Idx);
11642+
APSInt CtlByte = CtlVal.getInt();
11643+
uint8_t Ctl = static_cast<uint8_t>(CtlByte.getZExtValue());
11644+
11645+
if (Ctl & 0x80) {
11646+
APValue Zero(Info.Ctx.MakeIntValue(0, ElemT));
11647+
ResultElements.push_back(Zero);
11648+
} else {
11649+
unsigned LaneBase = (Idx / 16) * 16;
11650+
unsigned SrcOffset = Ctl & 0x0F;
11651+
unsigned SrcIdx = LaneBase + SrcOffset;
11652+
11653+
ResultElements.push_back(SrcVec.getVectorElt(SrcIdx));
11654+
}
11655+
}
11656+
Out = APValue(ResultElements.data(), ResultElements.size());
11657+
return true;
11658+
}
11659+
1162211660
static bool evalPshufBuiltin(EvalInfo &Info, const CallExpr *Call,
1162311661
bool IsShufHW, APValue &Out) {
1162411662
APValue Vec;
@@ -12241,6 +12279,15 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1224112279
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1224212280
}
1224312281

12282+
case X86::BI__builtin_ia32_pshufb128:
12283+
case X86::BI__builtin_ia32_pshufb256:
12284+
case X86::BI__builtin_ia32_pshufb512: {
12285+
APValue R;
12286+
if (!evalPshufbBuiltin(Info, E, R))
12287+
return false;
12288+
return Success(R, E);
12289+
}
12290+
1224412291
case X86::BI__builtin_ia32_pshuflw:
1224512292
case X86::BI__builtin_ia32_pshuflw256:
1224612293
case X86::BI__builtin_ia32_pshuflw512: {

clang/lib/Basic/Targets/X86.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
625625
case CK_ArrowlakeS:
626626
case CK_Lunarlake:
627627
case CK_Pantherlake:
628+
case CK_Wildcatlake:
628629
case CK_Sierraforest:
629630
case CK_Grandridge:
630631
case CK_Graniterapids:
@@ -1613,6 +1614,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
16131614
case CK_ArrowlakeS:
16141615
case CK_Lunarlake:
16151616
case CK_Pantherlake:
1617+
case CK_Wildcatlake:
16161618
case CK_Sierraforest:
16171619
case CK_Grandridge:
16181620
case CK_Graniterapids:

clang/lib/Headers/avx2intrin.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1852,9 +1852,8 @@ _mm256_sad_epu8(__m256i __a, __m256i __b)
18521852
/// control byte specify the index (within the same 128-bit half) of \a __a
18531853
/// to copy to the result byte.
18541854
/// \returns A 256-bit integer vector containing the result.
1855-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1856-
_mm256_shuffle_epi8(__m256i __a, __m256i __b)
1857-
{
1855+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1856+
_mm256_shuffle_epi8(__m256i __a, __m256i __b) {
18581857
return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);
18591858
}
18601859

clang/lib/Headers/avx512bwintrin.h

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -866,23 +866,20 @@ _mm512_mask_min_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
866866
(__v32hi)__W);
867867
}
868868

869-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
870-
_mm512_shuffle_epi8(__m512i __A, __m512i __B)
871-
{
869+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
870+
_mm512_shuffle_epi8(__m512i __A, __m512i __B) {
872871
return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B);
873872
}
874873

875-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
876-
_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
877-
{
874+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
875+
_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
878876
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
879877
(__v64qi)_mm512_shuffle_epi8(__A, __B),
880878
(__v64qi)__W);
881879
}
882880

883-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
884-
_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B)
885-
{
881+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
882+
_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
886883
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
887884
(__v64qi)_mm512_shuffle_epi8(__A, __B),
888885
(__v64qi)_mm512_setzero_si512());

clang/lib/Headers/avx512cdintrin.h

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -109,17 +109,14 @@ _mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
109109
(__v8di)_mm512_setzero_si512());
110110
}
111111

112-
static __inline__ __m512i __DEFAULT_FN_ATTRS
113-
_mm512_broadcastmb_epi64 (__mmask8 __A)
114-
{
112+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
113+
_mm512_broadcastmb_epi64(__mmask8 __A) {
115114
return (__m512i) _mm512_set1_epi64((long long) __A);
116115
}
117116

118-
static __inline__ __m512i __DEFAULT_FN_ATTRS
119-
_mm512_broadcastmw_epi32 (__mmask16 __A)
120-
{
121-
return (__m512i) _mm512_set1_epi32((int) __A);
122-
117+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
118+
_mm512_broadcastmw_epi32(__mmask16 __A) {
119+
return (__m512i)_mm512_set1_epi32((int)__A);
123120
}
124121

125122
#undef __DEFAULT_FN_ATTRS

0 commit comments

Comments
 (0)