Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,8 @@ Target Specific Changes
AMDGPU Support
^^^^^^^^^^^^^^

- Bump the default code object version to 6.

- Initial support for gfx950

- Added headers ``gpuintrin.h`` and ``amdgpuintrin.h`` that contains common
Expand Down
4 changes: 2 additions & 2 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -5140,12 +5140,12 @@ defm amdgpu_ieee : BoolMOption<"amdgpu-ieee",
NegFlag<SetFalse, [], [ClangOption, CC1Option]>>;

def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">,
HelpText<"Specify code object ABI version. Defaults to 6. (AMDGPU only)">,
Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
Values<"none,4,5,6">,
NormalizedValuesScope<"llvm::CodeObjectVersionKind">,
NormalizedValues<["COV_None", "COV_4", "COV_5", "COV_6"]>,
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_5">;
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_6">;

defm cumode : SimpleMFlag<"cumode",
"Specify CU wavefront", "Specify WGP wavefront",
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Driver/ToolChains/CommonArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2705,7 +2705,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D,

unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D,
const llvm::opt::ArgList &Args) {
unsigned CodeObjVer = 5; // default
unsigned CodeObjVer = 6; // default
if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args))
StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
return CodeObjVer;
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGen/amdgpu-address-spaces.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ int [[clang::address_space(999)]] bbb = 1234;
// CHECK: @u = addrspace(5) global i32 undef, align 4
// CHECK: @aaa = addrspace(6) global i32 1000, align 4
// CHECK: @bbb = addrspace(999) global i32 1234, align 4
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
// CHECK-LABEL: define dso_local amdgpu_kernel void @foo(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Create module flag for code object version.

// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -o - %s | FileCheck %s -check-prefix=V5
// RUN: -o - %s | FileCheck %s -check-prefix=V6

// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s
Expand Down
6 changes: 3 additions & 3 deletions clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ B fail;
// CHECK: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8
// CHECK: @_ZTVN10__cxxabiv120__si_class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)]
// CHECK: @_ZTS1B = linkonce_odr addrspace(1) constant [3 x i8] c"1B\00", comdat, align 1
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
// WITH-NONZERO-DEFAULT-AS: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8
// WITH-NONZERO-DEFAULT-AS: @fail = addrspace(1) global { ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV1B, i32 0, i32 0, i32 2) }, align 8
Expand Down Expand Up @@ -118,11 +118,11 @@ const B& f(A *a) {
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind }
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn }
//.
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
// WITH-NONZERO-DEFAULT-AS: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
// WITH-NONZERO-DEFAULT-AS: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// WITH-NONZERO-DEFAULT-AS: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// WITH-NONZERO-DEFAULT-AS: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
4 changes: 2 additions & 2 deletions clang/test/CodeGenHIP/default-attributes.hip
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
//.
// OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0
// OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
__device__ void extern_func();

Expand Down Expand Up @@ -39,7 +39,7 @@ __global__ void kernel() {
// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind }
//.
// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
//.
6 changes: 3 additions & 3 deletions clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ kernel void test_target_features_kernel(global int *i) {

//.
// CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
// NOCPU-LABEL: define {{[^@]+}}@callee
Expand Down Expand Up @@ -759,7 +759,7 @@ kernel void test_target_features_kernel(global int *i) {
// GFX900: attributes #[[ATTR8]] = { nounwind }
// GFX900: attributes #[[ATTR9]] = { convergent nounwind }
//.
// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// NOCPU: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// NOCPU: [[META2:![0-9]+]] = !{i32 2, i32 0}
// NOCPU: [[META3]] = !{i32 1, i32 0, i32 1, i32 0}
Expand All @@ -777,7 +777,7 @@ kernel void test_target_features_kernel(global int *i) {
// NOCPU: [[META15]] = !{i32 1}
// NOCPU: [[META16]] = !{!"int*"}
//.
// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// GFX900: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// GFX900: [[META2:![0-9]+]] = !{i32 2, i32 0}
// GFX900: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Driver/hip-device-libs.hip
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@
// Test default code object version.
// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI6

// Test default code object version with old device library without abi_version_400.bc
// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
Expand Down
4 changes: 2 additions & 2 deletions clang/test/OpenMP/amdgcn_target_global_constructor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ S A;
// CHECK: @A = addrspace(1) global %struct.S zeroinitializer, align 4
// CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_amdgcn_target_global_constructor.cpp, ptr null }]
// CHECK: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__dtor_A, ptr null }]
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
Expand Down Expand Up @@ -104,7 +104,7 @@ S A;
// CHECK: attributes #[[ATTR4]] = { convergent nounwind }
//.
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"A", i32 0, i32 0}
// CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
// CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 51}
// CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 51}
Expand Down
2 changes: 1 addition & 1 deletion libc/cmake/modules/prepare_libc_gpu_build.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
# The AMDGPU environment uses different code objects to encode the ABI for
# kernel calls and intrinsic functions. We want to specify this manually to
# conform to whatever the test suite was built to handle.
set(LIBC_GPU_CODE_OBJECT_VERSION 5)
set(LIBC_GPU_CODE_OBJECT_VERSION 6)
endif()

if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
Expand Down
2 changes: 2 additions & 0 deletions llvm/docs/ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ Changes to the AArch64 Backend
Changes to the AMDGPU Backend
-----------------------------

* Bump the default `.amdhsa_code_object_version` to 6.

* Removed `llvm.amdgcn.flat.atomic.fadd` and
`llvm.amdgcn.global.atomic.fadd` intrinsics. Users should use the
{ref}`atomicrmw <i_atomicrmw>` instruction with `fadd` and
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
"amdhsa-code-object-version", llvm::cl::Hidden,
llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5),
llvm::cl::init(llvm::AMDGPU::AMDHSA_COV6),
llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
"or asm directive still take priority if present)"));

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck %s

; CHECK: .amdhsa_code_object_version 6
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems we have no other tests checking this field, which seems hard to believe. We probably should have one that tests it with explicit flags etc.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already have multiple tests that have explicit module flag and checks for the match.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

git grep "\.amdhsa_code_object_version" llvm/test/CodeGen/AMDGPU/ does not find them

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, they check the IR match Lol. I will add one later then.


define amdgpu_kernel void @kernel() {
ret void
}
5 changes: 3 additions & 2 deletions offload/plugins-nextgen/common/src/Utils/ELF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,9 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA)
return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA");
if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V4 &&
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5)
return createError("Invalid AMD ABI version, must be version 4 or 5");
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
return createError("Invalid AMD ABI version, must be version above 4");
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
(Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX1201)
return createError("Unsupported AMDGPU architecture");
Expand Down
Loading