Skip to content

Commit 8938c0a

Browse files
authored
Merge branch 'main' into x86-avx2-avx512-constexpr-intrinsics
2 parents 2a18010 + 4931c3a commit 8938c0a

File tree

113 files changed

+5283
-1555
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+5283
-1555
lines changed

clang-tools-extra/test/clang-tidy/check_clang_tidy.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -391,9 +391,7 @@ def parse_arguments() -> Tuple[argparse.Namespace, List[str]]:
391391
args, extra_args = parser.parse_known_args()
392392
if args.std is None:
393393
_, extension = os.path.splitext(args.assume_filename or args.input_file_name)
394-
args.std = [
395-
"c++11-or-later" if extension in [".cpp", ".hpp", ".mm"] else "c99-or-later"
396-
]
394+
args.std = ["c99-or-later" if extension in [".c", ".m"] else "c++11-or-later"]
397395

398396
return (args, extra_args)
399397

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ Deprecated Compiler Flags
219219

220220
Modified Compiler Flags
221221
-----------------------
222+
- The `-gkey-instructions` compiler flag is now enabled by default when DWARF is emitted for plain C/C++ and optimizations are enabled. (#GH149509)
222223

223224
Removed Compiler Flags
224225
-------------------------

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -835,5 +835,15 @@ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x128_iu8, "V8iIbV8iIbV16iV8iiIbI
835835
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x64_f16, "V8fIbV16hIbV32hV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
836836
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x64_f16, "V8hIbV16hIbV32hV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
837837

838+
// GFX12.5 128B cooperative atomics
839+
TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_32x4B, "ii*IicC*", "nc", "gfx1250-insts,wavefrontsize32")
840+
TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_32x4B, "vi*iIicC*", "nc", "gfx1250-insts,wavefrontsize32")
841+
842+
TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_16x8B, "V2iV2i*IicC*", "nc", "gfx1250-insts,wavefrontsize32")
843+
TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_16x8B, "vV2i*V2iIicC*", "nc", "gfx1250-insts,wavefrontsize32")
844+
845+
TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_8x16B, "V4iV4i*IicC*", "nc", "gfx1250-insts,wavefrontsize32")
846+
TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_8x16B, "vV4i*V4iIicC*", "nc", "gfx1250-insts,wavefrontsize32")
847+
838848
#undef BUILTIN
839849
#undef TARGET_BUILTIN

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13609,4 +13609,6 @@ def warn_acc_var_referenced_lacks_op
1360913609
// AMDGCN builtins diagnostics
1361013610
def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">;
1361113611
def note_amdgcn_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">;
13612+
13613+
def err_amdgcn_coop_atomic_invalid_as : Error<"cooperative atomic requires a global or generic pointer">;
1361213614
} // end of sema component.

clang/include/clang/Basic/arm_sve.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1828,7 +1828,7 @@ let SVETargetGuard = "sve2,lut", SMETargetGuard = "sme2,lut" in {
18281828
////////////////////////////////////////////////////////////////////////////////
18291829
// SVE2 - Optional
18301830

1831-
let SVETargetGuard = "sve2,sve-aes", SMETargetGuard = "ssve-aes" in {
1831+
let SVETargetGuard = "sve-aes", SMETargetGuard = "ssve-aes" in {
18321832
def SVAESD : SInst<"svaesd[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_aesd", [IsOverloadNone, VerifyRuntimeMode]>;
18331833
def SVAESIMC : SInst<"svaesimc[_{d}]", "dd", "Uc", MergeNone, "aarch64_sve_aesimc", [IsOverloadNone, VerifyRuntimeMode]>;
18341834
def SVAESE : SInst<"svaese[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_aese", [IsOverloadNone, VerifyRuntimeMode]>;
@@ -1845,12 +1845,12 @@ let SVETargetGuard = "sve-sha3", SMETargetGuard = "sve-sha3,sme2p1" in {
18451845
def SVRAX1 : SInst<"svrax1[_{d}]", "ddd", "lUl", MergeNone, "aarch64_sve_rax1", [IsOverloadNone, VerifyRuntimeMode]>;
18461846
}
18471847

1848-
let SVETargetGuard = "sve2-sm4", SMETargetGuard = InvalidMode in {
1848+
let SVETargetGuard = "sve-sm4", SMETargetGuard = InvalidMode in {
18491849
def SVSM4E : SInst<"svsm4e[_{d}]", "ddd", "Ui", MergeNone, "aarch64_sve_sm4e", [IsOverloadNone]>;
18501850
def SVSM4EKEY : SInst<"svsm4ekey[_{d}]", "ddd", "Ui", MergeNone, "aarch64_sve_sm4ekey", [IsOverloadNone]>;
18511851
}
18521852

1853-
let SVETargetGuard = "sve2,sve-bitperm", SMETargetGuard = "ssve-bitperm" in {
1853+
let SVETargetGuard = "sve-bitperm", SMETargetGuard = "ssve-bitperm" in {
18541854
def SVBDEP : SInst<"svbdep[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_bdep_x", [VerifyRuntimeMode]>;
18551855
def SVBDEP_N : SInst<"svbdep[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bdep_x", [VerifyRuntimeMode]>;
18561856
def SVBEXT : SInst<"svbext[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_bext_x", [VerifyRuntimeMode]>;
@@ -1979,7 +1979,7 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
19791979
def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone, VerifyRuntimeMode]>;
19801980
}
19811981

1982-
let SVETargetGuard = "sve2,sve-b16b16", SMETargetGuard = "sme2,sve-b16b16" in {
1982+
let SVETargetGuard = "sve-b16b16", SMETargetGuard = "sme2,sve-b16b16" in {
19831983
defm SVMUL_BF : SInstZPZZ<"svmul", "b", "aarch64_sve_fmul", "aarch64_sve_fmul_u", [VerifyRuntimeMode]>;
19841984
defm SVADD_BF : SInstZPZZ<"svadd", "b", "aarch64_sve_fadd", "aarch64_sve_fadd_u", [VerifyRuntimeMode]>;
19851985
defm SVSUB_BF : SInstZPZZ<"svsub", "b", "aarch64_sve_fsub", "aarch64_sve_fsub_u", [VerifyRuntimeMode]>;

clang/include/clang/Sema/SemaAMDGPU.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ class SemaAMDGPU : public SemaBase {
2626

2727
bool CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
2828

29+
bool checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore);
30+
2931
bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
3032
unsigned NumDataArgs);
3133

clang/lib/AST/ByteCode/Disasm.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,20 @@ inline static std::string printArg(Program &P, CodePtr &OpPC) {
4444
std::string Result;
4545
llvm::raw_string_ostream SS(Result);
4646
auto Arg = OpPC.read<T>();
47-
SS << Arg;
47+
// Make sure we print the integral value of chars.
48+
if constexpr (std::is_integral_v<T>) {
49+
if constexpr (sizeof(T) == 1) {
50+
if constexpr (std::is_signed_v<T>)
51+
SS << static_cast<int32_t>(Arg);
52+
else
53+
SS << static_cast<uint32_t>(Arg);
54+
} else {
55+
SS << Arg;
56+
}
57+
} else {
58+
SS << Arg;
59+
}
60+
4861
return Result;
4962
}
5063
}

clang/lib/AST/ByteCode/EvalEmitter.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,9 @@ template <> bool EvalEmitter::emitRet<PT_Ptr>(const SourceInfo &Info) {
213213
if (!Ptr.isZero() && !Ptr.isDereferencable())
214214
return false;
215215

216+
if (Ptr.pointsToStringLiteral() && Ptr.isArrayRoot())
217+
return false;
218+
216219
if (!Ptr.isZero() && !CheckFinalLoad(S, OpPC, Ptr))
217220
return false;
218221

clang/lib/AST/ByteCode/Interp.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -870,7 +870,7 @@ bool CheckFinalLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
870870
}
871871

872872
bool CheckStore(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
873-
if (!Ptr.isBlockPointer())
873+
if (!Ptr.isBlockPointer() || Ptr.isZero())
874874
return false;
875875

876876
if (!Ptr.block()->isAccessible()) {

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,49 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
701701
return emitBuiltinWithOneOverloadedType<5>(*this, E,
702702
Intrinsic::amdgcn_load_to_lds);
703703
}
704+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
705+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
706+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
707+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
708+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
709+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: {
710+
Intrinsic::ID IID;
711+
switch (BuiltinID) {
712+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
713+
IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B;
714+
break;
715+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
716+
IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B;
717+
break;
718+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
719+
IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B;
720+
break;
721+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
722+
IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B;
723+
break;
724+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
725+
IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B;
726+
break;
727+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
728+
IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B;
729+
break;
730+
}
731+
732+
LLVMContext &Ctx = CGM.getLLVMContext();
733+
SmallVector<Value *, 5> Args;
734+
// last argument is a MD string
735+
const unsigned ScopeArg = E->getNumArgs() - 1;
736+
for (unsigned i = 0; i != ScopeArg; ++i)
737+
Args.push_back(EmitScalarExpr(E->getArg(i)));
738+
StringRef Arg = cast<StringLiteral>(E->getArg(ScopeArg)->IgnoreParenCasts())
739+
->getString();
740+
llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)});
741+
Args.push_back(llvm::MetadataAsValue::get(Ctx, MD));
742+
// Intrinsic is typed based on the pointer AS. Pointer is always the first
743+
// argument.
744+
llvm::Function *F = CGM.getIntrinsic(IID, {Args[0]->getType()});
745+
return Builder.CreateCall(F, {Args});
746+
}
704747
case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
705748
Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
706749
{llvm::Type::getInt64Ty(getLLVMContext())});

0 commit comments

Comments
 (0)