Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,8 @@ Target Specific Changes
AMDGPU Support
^^^^^^^^^^^^^^

- Bump the default code object version to 6. ROCm 6.3 is required to run any program compiled with COV6.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it really? What runtime changes actually were required. Figured it'd still work so long as you're not using the generic targets.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It needs the runtime to be able to recognize the COV. If the runtime doesn't support it, it will error out.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably should have a defined policy for version support. It's also a long standing frustration of mine that we do not version the amdhsa OS to correspond to the rocm runtime version

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I have no clue why we don't provide that in the HSA header. Probably some ill-conceived notion that HSA is independent to ROCm.


NVPTX Support
^^^^^^^^^^^^^^

Expand Down
4 changes: 2 additions & 2 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -5161,12 +5161,12 @@ defm amdgpu_ieee : BoolMOption<"amdgpu-ieee",
NegFlag<SetFalse, [], [ClangOption, CC1Option]>>;

def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">,
HelpText<"Specify code object ABI version. Defaults to 6. (AMDGPU only)">,
Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
Values<"none,4,5,6">,
NormalizedValuesScope<"llvm::CodeObjectVersionKind">,
NormalizedValues<["COV_None", "COV_4", "COV_5", "COV_6"]>,
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_5">;
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_6">;

defm cumode : SimpleMFlag<"cumode",
"Specify CU wavefront", "Specify WGP wavefront",
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Driver/ToolChains/CommonArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2760,7 +2760,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D,

unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D,
const llvm::opt::ArgList &Args) {
unsigned CodeObjVer = 5; // default
unsigned CodeObjVer = 6; // default
if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args))
StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
return CodeObjVer;
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGen/amdgpu-address-spaces.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ int [[clang::address_space(999)]] bbb = 1234;
// CHECK: @u = addrspace(5) global i32 undef, align 4
// CHECK: @aaa = addrspace(6) global i32 1000, align 4
// CHECK: @bbb = addrspace(999) global i32 1234, align 4
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
// CHECK-LABEL: define dso_local amdgpu_kernel void @foo(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Create module flag for code object version.

// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -o - %s | FileCheck %s -check-prefix=V5
// RUN: -o - %s | FileCheck %s -check-prefix=V6

// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s
Expand Down
6 changes: 3 additions & 3 deletions clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ B fail;
// CHECK: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8
// CHECK: @_ZTVN10__cxxabiv120__si_class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)]
// CHECK: @_ZTS1B = linkonce_odr addrspace(1) constant [3 x i8] c"1B\00", comdat, align 1
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
// WITH-NONZERO-DEFAULT-AS: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8
// WITH-NONZERO-DEFAULT-AS: @fail = addrspace(1) global { ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV1B, i32 0, i32 0, i32 2) }, align 8
Expand Down Expand Up @@ -118,11 +118,11 @@ const B& f(A *a) {
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind }
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn }
//.
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
// WITH-NONZERO-DEFAULT-AS: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
// WITH-NONZERO-DEFAULT-AS: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// WITH-NONZERO-DEFAULT-AS: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// WITH-NONZERO-DEFAULT-AS: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
4 changes: 2 additions & 2 deletions clang/test/CodeGenHIP/default-attributes.hip
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
//.
// OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0
// OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
__device__ void extern_func();

Expand Down Expand Up @@ -39,7 +39,7 @@ __global__ void kernel() {
// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind }
//.
// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
//.
6 changes: 3 additions & 3 deletions clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ kernel void test_target_features_kernel(global int *i) {
// CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0
// CHECK: @__test_target_features_kernel_block_invoke_kernel.runtime.handle = internal addrspace(1) externally_initialized constant %block.runtime.handle.t.3 zeroinitializer, section ".amdgpu.kernel.runtime.handle"
// CHECK: @llvm.used = appending addrspace(1) global [10 x ptr] [ptr @__test_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr @__test_block_invoke_2_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr @__test_block_invoke_3_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr @__test_block_invoke_4_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr)], section "llvm.metadata"
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
// NOCPU-LABEL: define {{[^@]+}}@callee
Expand Down Expand Up @@ -764,7 +764,7 @@ kernel void test_target_features_kernel(global int *i) {
// GFX900: attributes #[[ATTR7]] = { nounwind }
// GFX900: attributes #[[ATTR8]] = { convergent nounwind }
//.
// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// NOCPU: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// NOCPU: [[META2:![0-9]+]] = !{i32 2, i32 0}
// NOCPU: [[META3]] = !{i32 1, i32 0, i32 1, i32 0}
Expand All @@ -787,7 +787,7 @@ kernel void test_target_features_kernel(global int *i) {
// NOCPU: [[META20]] = !{!"int*"}
// NOCPU: [[META21]] = !{ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle}
//.
// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// GFX900: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// GFX900: [[META2:![0-9]+]] = !{i32 2, i32 0}
// GFX900: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Driver/hip-device-libs.hip
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@
// Test default code object version.
// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI6

// Test default code object version with old device library without abi_version_400.bc
// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
Expand Down
4 changes: 2 additions & 2 deletions clang/test/OpenMP/amdgcn_target_global_constructor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ S A;
// CHECK: @A = addrspace(1) global %struct.S zeroinitializer, align 4
// CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_amdgcn_target_global_constructor.cpp, ptr null }]
// CHECK: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__dtor_A, ptr null }]
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
Expand Down Expand Up @@ -104,7 +104,7 @@ S A;
// CHECK: attributes #[[ATTR4]] = { convergent nounwind }
//.
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"A", i32 0, i32 0}
// CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
// CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 51}
// CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 51}
Expand Down
2 changes: 1 addition & 1 deletion libc/cmake/modules/prepare_libc_gpu_build.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
# The AMDGPU environment uses different code objects to encode the ABI for
# kernel calls and intrinsic functions. We want to specify this manually to
# conform to whatever the test suite was built to handle.
set(LIBC_GPU_CODE_OBJECT_VERSION 5)
set(LIBC_GPU_CODE_OBJECT_VERSION 6)
endif()

if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
Expand Down
2 changes: 2 additions & 0 deletions llvm/docs/ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ Changes to the AMDGPU Backend
[FWD_PROGRESS bit](https://llvm.org/docs/AMDGPUUsage.html#code-object-v3-kernel-descriptor)
for all GFX ISAs greater or equal to 10, for the AMDHSA OS.

* Bump the default `.amdhsa_code_object_version` to 6. ROCm 6.3 is required to run any program compiled with COV6.

Changes to the ARM Backend
--------------------------

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
"amdhsa-code-object-version", llvm::cl::Hidden,
llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5),
llvm::cl::init(llvm::AMDGPU::AMDHSA_COV6),
llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
"or asm directive still take priority if present)"));

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck %s

; CHECK: .amdhsa_code_object_version 6

define amdgpu_kernel void @kernel() {
ret void
}
Loading