Skip to content

Commit 581591b

Browse files
committed
Merge "merge main into amd-staging" into amd-staging
2 parents 691d025 + 98566d3 commit 581591b

File tree

162 files changed

+6878
-1543
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

162 files changed

+6878
-1543
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,7 @@ Bug Fixes to C++ Support
624624
- Fixed an assertion failure in range calculations for conditional throw expressions. (#GH111854)
625625
- Clang now correctly ignores previous partial specializations of member templates explicitly specialized for
626626
an implicitly instantiated class template specialization. (#GH51051)
627+
- Fixed an assertion failure caused by invalid enum forward declarations. (#GH112208)
627628

628629
Bug Fixes to AST Handling
629630
^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -710,6 +711,7 @@ X86 Support
710711
- Supported intrinsics for ``MOVRS AND AVX10.2``.
711712
* Supported intrinsics of ``_mm(256|512)_(mask(z))_loadrs_epi(8|16|32|64)``.
712713
- Support ISA of ``AMX-FP8``.
714+
- Support ISA of ``AMX-TRANSPOSE``.
713715

714716
Arm and AArch64 Support
715717
^^^^^^^^^^^^^^^^^^^^^^^

clang/docs/analyzer/checkers.rst

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3584,7 +3584,7 @@ These are examples of cases that we consider safe:
35843584
RefCountable* uncounted = this; // ok
35853585
}
35863586
3587-
Here are some examples of situations that we warn about as they *might* be potentially unsafe. The logic is that either we're able to guarantee that an argument is safe or it's considered if not a bug then bug-prone.
3587+
Here are some examples of situations that we warn about as they *might* be potentially unsafe. The logic is that either we're able to guarantee that a local variable is safe or it's considered unsafe.
35883588
35893589
.. code-block:: cpp
35903590
@@ -3603,11 +3603,48 @@ Here are some examples of situations that we warn about as they *might* be poten
36033603
RefCountable* uncounted = counted.get(); // warn
36043604
}
36053605
3606-
We don't warn about these cases - we don't consider them necessarily safe but since they are very common and usually safe we'd introduce a lot of false positives otherwise:
3607-
- variable defined in condition part of an ```if``` statement
3608-
- variable defined in init statement condition of a ```for``` statement
3606+
alpha.webkit.UncheckedLocalVarsChecker
3607+
""""""""""""""""""""""""""""""""""""""
3608+
The goal of this rule is to make sure that any unchecked local variable is backed by a CheckedPtr or CheckedRef with lifetime that is strictly larger than the scope of the unchecked local variable. To be on the safe side we require the scope of an unchecked variable to be embedded in the scope of CheckedPtr/CheckRef object that backs it.
3609+
3610+
These are examples of cases that we consider safe:
3611+
3612+
.. code-block:: cpp
36093613
3610-
For the time being we also don't warn about uninitialized uncounted local variables.
3614+
void foo1() {
3615+
CheckedPtr<RefCountable> counted;
3616+
// The scope of uncounted is EMBEDDED in the scope of counted.
3617+
{
3618+
RefCountable* uncounted = counted.get(); // ok
3619+
}
3620+
}
3621+
3622+
void foo2(CheckedPtr<RefCountable> counted_param) {
3623+
RefCountable* uncounted = counted_param.get(); // ok
3624+
}
3625+
3626+
void FooClass::foo_method() {
3627+
RefCountable* uncounted = this; // ok
3628+
}
3629+
3630+
Here are some examples of situations that we warn about as they *might* be potentially unsafe. The logic is that either we're able to guarantee that a local variable is safe or it's considered unsafe.
3631+
3632+
.. code-block:: cpp
3633+
3634+
void foo1() {
3635+
RefCountable* uncounted = new RefCountable; // warn
3636+
}
3637+
3638+
RefCountable* global_uncounted;
3639+
void foo2() {
3640+
RefCountable* uncounted = global_uncounted; // warn
3641+
}
3642+
3643+
void foo3() {
3644+
RefPtr<RefCountable> counted;
3645+
// The scope of uncounted is not EMBEDDED in the scope of counted.
3646+
RefCountable* uncounted = counted.get(); // warn
3647+
}
36113648
36123649
Debug Checkers
36133650
---------------

clang/include/clang/Basic/BuiltinsX86_64.def

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,11 @@ TARGET_BUILTIN(__builtin_ia32_tdpbf16ps_internal, "V256iUsUsUsV256iV256iV256i",
128128
TARGET_BUILTIN(__builtin_ia32_tdpfp16ps_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-fp16")
129129
TARGET_BUILTIN(__builtin_ia32_tcmmimfp16ps_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-complex")
130130
TARGET_BUILTIN(__builtin_ia32_tcmmrlfp16ps_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-complex")
131+
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
132+
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0t1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
133+
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
134+
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
135+
TARGET_BUILTIN(__builtin_ia32_ttransposed_internal, "V256iUsUsV256i", "n", "amx-transpose")
131136
// AMX
132137
TARGET_BUILTIN(__builtin_ia32_tile_loadconfig, "vvC*", "n", "amx-tile")
133138
TARGET_BUILTIN(__builtin_ia32_tile_storeconfig, "vvC*", "n", "amx-tile")
@@ -148,9 +153,15 @@ TARGET_BUILTIN(__builtin_ia32_ptwrite64, "vUOi", "n", "ptwrite")
148153
TARGET_BUILTIN(__builtin_ia32_tcmmimfp16ps, "vIUcIUcIUc", "n", "amx-complex")
149154
TARGET_BUILTIN(__builtin_ia32_tcmmrlfp16ps, "vIUcIUcIUc", "n", "amx-complex")
150155

156+
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0, "vIUcvC*z", "n", "amx-transpose")
157+
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0t1, "vIUcvC*z", "n","amx-transpose")
158+
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1, "vIUcvC*z", "n", "amx-transpose")
159+
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1, "vIUcvC*z", "n","amx-transpose")
160+
TARGET_BUILTIN(__builtin_ia32_ttransposed, "vIUcIUc", "n", "amx-transpose")
161+
151162
TARGET_BUILTIN(__builtin_ia32_prefetchi, "vvC*Ui", "nc", "prefetchi")
152163
TARGET_BUILTIN(__builtin_ia32_cmpccxadd32, "Siv*SiSiIi", "n", "cmpccxadd")
153-
TARGET_BUILTIN(__builtin_ia32_cmpccxadd64, "SLLiv*SLLiSLLiIi", "n", "cmpccxadd")
164+
TARGET_BUILTIN(__builtin_ia32_cmpccxadd64, "SLLiSLLi*SLLiSLLiIi", "n", "cmpccxadd")
154165

155166
// AMX_FP16 FP16
156167
TARGET_BUILTIN(__builtin_ia32_tdpfp16ps, "vIUcIUcIUc", "n", "amx-fp16")

clang/include/clang/Driver/Options.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1791,6 +1791,12 @@ defm debug_info_for_profiling : BoolFOption<"debug-info-for-profiling",
17911791
PosFlag<SetTrue, [], [ClangOption, CC1Option],
17921792
"Emit extra debug info to make sample profile more accurate">,
17931793
NegFlag<SetFalse>>;
1794+
def fprofile_generate_cold_function_coverage : Flag<["-"], "fprofile-generate-cold-function-coverage">,
1795+
Group<f_Group>, Visibility<[ClangOption, CLOption]>,
1796+
HelpText<"Generate instrumented code to collect coverage info for cold functions into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
1797+
def fprofile_generate_cold_function_coverage_EQ : Joined<["-"], "fprofile-generate-cold-function-coverage=">,
1798+
Group<f_Group>, Visibility<[ClangOption, CLOption]>, MetaVarName<"<directory>">,
1799+
HelpText<"Generate instrumented code to collect coverage info for cold functions into <directory>/default.profraw (overridden by LLVM_PROFILE_FILE env var)">;
17941800
def fprofile_instr_generate : Flag<["-"], "fprofile-instr-generate">,
17951801
Group<f_Group>, Visibility<[ClangOption, CLOption]>,
17961802
HelpText<"Generate instrumented code to collect execution counts into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
@@ -6414,6 +6420,8 @@ def mamx_fp8 : Flag<["-"], "mamx-fp8">, Group<m_x86_Features_Group>;
64146420
def mno_amx_fp8 : Flag<["-"], "mno-amx-fp8">, Group<m_x86_Features_Group>;
64156421
def mamx_tile : Flag<["-"], "mamx-tile">, Group<m_x86_Features_Group>;
64166422
def mno_amx_tile : Flag<["-"], "mno-amx-tile">, Group<m_x86_Features_Group>;
6423+
def mamx_transpose : Flag<["-"], "mamx-transpose">, Group<m_x86_Features_Group>;
6424+
def mno_amx_transpose : Flag<["-"], "mno-amx-transpose">, Group<m_x86_Features_Group>;
64176425
def mcmpccxadd : Flag<["-"], "mcmpccxadd">, Group<m_x86_Features_Group>;
64186426
def mno_cmpccxadd : Flag<["-"], "mno-cmpccxadd">, Group<m_x86_Features_Group>;
64196427
def msse : Flag<["-"], "msse">, Group<m_x86_Features_Group>;

clang/include/clang/StaticAnalyzer/Checkers/Checkers.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1764,4 +1764,8 @@ def UncountedLocalVarsChecker : Checker<"UncountedLocalVarsChecker">,
17641764
HelpText<"Check uncounted local variables.">,
17651765
Documentation<HasDocumentation>;
17661766

1767+
def UncheckedLocalVarsChecker : Checker<"UncheckedLocalVarsChecker">,
1768+
HelpText<"Check unchecked local variables.">,
1769+
Documentation<HasDocumentation>;
1770+
17671771
} // end alpha.webkit

clang/lib/AST/ByteCode/IntegralAP.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,14 @@ template <bool Signed> class IntegralAP final {
171171
return IntegralAP<false>(Copy);
172172
}
173173

174-
void bitcastToMemory(std::byte *Dest) const { assert(false); }
174+
void bitcastToMemory(std::byte *Dest) const {
175+
llvm::StoreIntToMemory(V, (uint8_t *)Dest, bitWidth() / 8);
176+
}
175177

176178
static IntegralAP bitcastFromMemory(const std::byte *Src, unsigned BitWidth) {
177-
return IntegralAP();
179+
APInt V(BitWidth, static_cast<uint64_t>(0), Signed);
180+
llvm::LoadIntFromMemory(V, (const uint8_t *)Src, BitWidth / 8);
181+
return IntegralAP(V);
178182
}
179183

180184
ComparisonCategoryResult compare(const IntegralAP &RHS) const {

clang/lib/Basic/DiagnosticIDs.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,12 @@ DiagnosticIDs::getDiagnosticSeverity(unsigned DiagID, SourceLocation Loc,
575575
DiagID != diag::fatal_too_many_errors && Diag.FatalsAsError)
576576
Result = diag::Severity::Error;
577577

578+
// Rest of the mappings are only applicable for diagnostics associated with a
579+
// SourceLocation, bail out early for others.
580+
if (!Diag.hasSourceManager())
581+
return Result;
582+
583+
const auto &SM = Diag.getSourceManager();
578584
// Custom diagnostics always are emitted in system headers.
579585
bool ShowInSystemHeader =
580586
!GetDiagInfo(DiagID) || GetDiagInfo(DiagID)->WarnShowInSystemHeader;
@@ -583,15 +589,14 @@ DiagnosticIDs::getDiagnosticSeverity(unsigned DiagID, SourceLocation Loc,
583589
// because we also want to ignore extensions and warnings in -Werror and
584590
// -pedantic-errors modes, which *map* warnings/extensions to errors.
585591
if (State->SuppressSystemWarnings && !ShowInSystemHeader && Loc.isValid() &&
586-
Diag.getSourceManager().isInSystemHeader(
587-
Diag.getSourceManager().getExpansionLoc(Loc)))
592+
SM.isInSystemHeader(SM.getExpansionLoc(Loc)))
588593
return diag::Severity::Ignored;
589594

590595
// We also ignore warnings due to system macros
591596
bool ShowInSystemMacro =
592597
!GetDiagInfo(DiagID) || GetDiagInfo(DiagID)->WarnShowInSystemMacro;
593598
if (State->SuppressSystemWarnings && !ShowInSystemMacro && Loc.isValid() &&
594-
Diag.getSourceManager().isInSystemMacro(Loc))
599+
SM.isInSystemMacro(Loc))
595600
return diag::Severity::Ignored;
596601

597602
return Result;

clang/lib/Basic/Targets/X86.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
430430
HasAMXCOMPLEX = true;
431431
} else if (Feature == "+amx-fp8") {
432432
HasAMXFP8 = true;
433+
} else if (Feature == "+amx-transpose") {
434+
HasAMXTRANSPOSE = true;
433435
} else if (Feature == "+cmpccxadd") {
434436
HasCMPCCXADD = true;
435437
} else if (Feature == "+raoint") {
@@ -951,6 +953,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
951953
Builder.defineMacro("__AMX_COMPLEX__");
952954
if (HasAMXFP8)
953955
Builder.defineMacro("__AMX_FP8__");
956+
if (HasAMXTRANSPOSE)
957+
Builder.defineMacro("__AMX_TRANSPOSE__");
954958
if (HasCMPCCXADD)
955959
Builder.defineMacro("__CMPCCXADD__");
956960
if (HasRAOINT)
@@ -1079,9 +1083,10 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
10791083
.Case("amx-bf16", true)
10801084
.Case("amx-complex", true)
10811085
.Case("amx-fp16", true)
1086+
.Case("amx-fp8", true)
10821087
.Case("amx-int8", true)
10831088
.Case("amx-tile", true)
1084-
.Case("amx-fp8", true)
1089+
.Case("amx-transpose", true)
10851090
.Case("avx", true)
10861091
.Case("avx10.1-256", true)
10871092
.Case("avx10.1-512", true)
@@ -1198,9 +1203,10 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
11981203
.Case("amx-bf16", HasAMXBF16)
11991204
.Case("amx-complex", HasAMXCOMPLEX)
12001205
.Case("amx-fp16", HasAMXFP16)
1206+
.Case("amx-fp8", HasAMXFP8)
12011207
.Case("amx-int8", HasAMXINT8)
12021208
.Case("amx-tile", HasAMXTILE)
1203-
.Case("amx-fp8", HasAMXFP8)
1209+
.Case("amx-transpose", HasAMXTRANSPOSE)
12041210
.Case("avx", SSELevel >= AVX)
12051211
.Case("avx10.1-256", HasAVX10_1)
12061212
.Case("avx10.1-512", HasAVX10_1_512)

clang/lib/Basic/Targets/X86.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
158158
bool HasAMXBF16 = false;
159159
bool HasAMXCOMPLEX = false;
160160
bool HasAMXFP8 = false;
161+
bool HasAMXTRANSPOSE = false;
161162
bool HasSERIALIZE = false;
162163
bool HasTSXLDTRK = false;
163164
bool HasUSERMSR = false;

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16991,6 +16991,58 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1699116991
// instruction, but it will create a memset that won't be optimized away.
1699216992
return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
1699316993
}
16994+
// Corresponding to intrisics which will return 2 tiles (tile0_tile1).
16995+
case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
16996+
case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
16997+
case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
16998+
case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal: {
16999+
Intrinsic::ID IID;
17000+
switch (BuiltinID) {
17001+
default:
17002+
llvm_unreachable("Unsupported intrinsic!");
17003+
case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17004+
IID = Intrinsic::x86_t2rpntlvwz0_internal;
17005+
break;
17006+
case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17007+
IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
17008+
break;
17009+
case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17010+
IID = Intrinsic::x86_t2rpntlvwz1_internal;
17011+
break;
17012+
case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17013+
IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
17014+
break;
17015+
}
17016+
17017+
// Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
17018+
Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
17019+
{Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]});
17020+
17021+
auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>();
17022+
assert(PtrTy && "arg3 must be of pointer type");
17023+
QualType PtreeTy = PtrTy->getPointeeType();
17024+
llvm::Type *TyPtee = ConvertType(PtreeTy);
17025+
17026+
// Bitcast amx type (x86_amx) to vector type (256 x i32)
17027+
// Then store tile0 into DstPtr0
17028+
Value *T0 = Builder.CreateExtractValue(Call, 0);
17029+
Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17030+
{TyPtee}, {T0});
17031+
Builder.CreateDefaultAlignedStore(VecT0, Ops[3]);
17032+
17033+
// Then store tile1 into DstPtr1
17034+
Value *T1 = Builder.CreateExtractValue(Call, 1);
17035+
Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17036+
{TyPtee}, {T1});
17037+
Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]);
17038+
17039+
// Note: Here we escape directly use x86_tilestored64_internal to store
17040+
// the results due to it can't make sure the Mem written scope. This may
17041+
// cause shapes reloads after first amx intrinsic, which current amx reg-
17042+
// ister allocation has no ability to handle it.
17043+
17044+
return Store;
17045+
}
1699417046
case X86::BI__ud2:
1699517047
// llvm.trap makes a ud2a instruction on x86.
1699617048
return EmitTrapCall(Intrinsic::trap);

0 commit comments

Comments
 (0)