diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 259c02c0d49b2..1265ec40c06d6 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -4839,7 +4839,24 @@ Code object V5 metadata is the same as ====================== ============== ========= ================================ -.. +.. _amdgpu-amdhsa-code-object-metadata-v6: + +Code Object V6 Metadata ++++++++++++++++++++++++ + +Code object V6 metadata is the same as +:ref:`amdgpu-amdhsa-code-object-metadata-v5` with the changes defined in table +:ref:`amdgpu-amdhsa-code-object-kernel-metadata-map-table-v6`. + + .. table:: AMDHSA Code Object V6 Kernel Metadata Map Additions + :name: amdgpu-amdhsa-code-object-kernel-metadata-map-table-v6 + + ============================= ============= ========== ======================================= + String Key Value Type Required? Description + ============================= ============= ========== ======================================= + ".cluster_dims" sequence of The dimension of the cluster. + 3 integers + ============================= ============= ========== ======================================= Kernel Dispatch ~~~~~~~~~~~~~~~ diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp index 8737dc0fc7459..f2ada27cac01d 100644 --- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp +++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp @@ -281,7 +281,14 @@ bool MetadataVerifier::verifyKernel(msgpack::DocNode &Node) { return false; if (!verifyIntegerEntry(KernelMap, ".uniform_work_group_size", false)) return false; - + if (!verifyEntry( + KernelMap, ".cluster_dims", false, [this](msgpack::DocNode &Node) { + return verifyArray( + Node, + [this](msgpack::DocNode &Node) { return verifyInteger(Node); }, + 3); + })) + return false; return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 19b8757e6ad6e..3c88d1b8214f7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -254,9 +254,9 @@ void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func, } void MetadataStreamerMsgPackV4::emitKernelAttrs(const AMDGPUTargetMachine &TM, - const Function &Func, + const MachineFunction &MF, msgpack::MapDocNode Kern) { - + const Function &Func = MF.getFunction(); if (auto *Node = Func.getMetadata("reqd_work_group_size")) Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); if (auto *Node = Func.getMetadata("work_group_size_hint")) @@ -599,7 +599,7 @@ void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF, Kern[".symbol"] = Kern.getDocument()->getNode( (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true); emitKernelLanguage(Func, Kern); - emitKernelAttrs(TM, Func, Kern); + emitKernelAttrs(TM, MF, Kern); emitKernelArgs(MF, Kern); } @@ -726,10 +726,11 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( } void MetadataStreamerMsgPackV5::emitKernelAttrs(const AMDGPUTargetMachine &TM, - const Function &Func, + const MachineFunction &MF, msgpack::MapDocNode Kern) { - MetadataStreamerMsgPackV4::emitKernelAttrs(TM, Func, Kern); + MetadataStreamerMsgPackV4::emitKernelAttrs(TM, MF, Kern); + const Function &Func = MF.getFunction(); if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool()) Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1); } @@ -745,5 +746,21 @@ void MetadataStreamerMsgPackV6::emitVersion() { getRootMetadata("amdhsa.version") = Version; } +void MetadataStreamerMsgPackV6::emitKernelAttrs(const AMDGPUTargetMachine &TM, + const MachineFunction &MF, + msgpack::MapDocNode Kern) { + MetadataStreamerMsgPackV5::emitKernelAttrs(TM, MF, Kern); + + const SIMachineFunctionInfo &MFI = *MF.getInfo(); + ClusterDimsAttr Attr = MFI.getClusterDims(); + if (Attr.isFixedDims()) { + msgpack::ArrayDocNode ClusterDimsNode = HSAMetadataDoc->getArrayNode(); + ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[0])); + ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[1])); + ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[2])); + Kern[".cluster_dims"] = ClusterDimsNode; + } +} + } // end namespace AMDGPU::HSAMD } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h index 22dfcb4a4ec1d..1b4b113fad61c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -61,7 +61,7 @@ class MetadataStreamer { virtual void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) = 0; virtual void emitKernelAttrs(const AMDGPUTargetMachine &TM, - const Function &Func, + const MachineFunction &MF, msgpack::MapDocNode Kern) = 0; }; @@ -102,7 +102,7 @@ class LLVM_EXTERNAL_VISIBILITY MetadataStreamerMsgPackV4 void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern); - void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func, + void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF, msgpack::MapDocNode Kern) override; void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern); @@ -149,7 +149,7 @@ class MetadataStreamerMsgPackV5 : public MetadataStreamerMsgPackV4 { void emitVersion() override; void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) override; - void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func, + void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF, msgpack::MapDocNode Kern) override; public: @@ -164,6 +164,9 @@ class MetadataStreamerMsgPackV6 final : public MetadataStreamerMsgPackV5 { public: MetadataStreamerMsgPackV6() = default; ~MetadataStreamerMsgPackV6() = default; + + void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF, + msgpack::MapDocNode Kern) override; }; } // end namespace HSAMD diff --git a/llvm/test/CodeGen/AMDGPU/cluster-dims.ll b/llvm/test/CodeGen/AMDGPU/cluster-dims.ll new file mode 100644 index 0000000000000..62e8d9dc61293 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/cluster-dims.ll @@ -0,0 +1,47 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel %s -o - | FileCheck %s + +; CHECK: .cluster_dims: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 2 +define dso_local amdgpu_kernel void @_Z15test_literal_3dv() #0 { +entry: + ret void +} + +; CHECK: .cluster_dims: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 1 +define dso_local amdgpu_kernel void @_Z15test_literal_2dv() #1 { +entry: + ret void +} + +; CHECK: .cluster_dims: +; CHECK-NEXT: - 4 +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 1 +define dso_local amdgpu_kernel void @_Z15test_literal_1dv() #2 { +entry: + ret void +} + +; CHECK: .cluster_dims: +; CHECK-NEXT: - 4 +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 1 +define dso_local amdgpu_kernel void @_Z13test_constantv() #3 { +entry: + ret void +} + +attributes #0 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="2,2,2" } +attributes #1 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="2,2,1" } +attributes #2 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="4,1,1" } +attributes #3 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="4,2,1" } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"amdhsa_code_object_version", i32 600} diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v6.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v6.s new file mode 100644 index 0000000000000..b91888d0bba6f --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v6.s @@ -0,0 +1,35 @@ +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1250 --amdhsa-code-object-version=6 -show-encoding %s | FileCheck %s + +// CHECK: .amdgpu_metadata +// CHECK: amdhsa.kernels: +// CHECK: - .cluster_dims: +// CHECK-NEXT: - 4 +// CHECK-NEXT: - 2 +// CHECK-NEXT: - 1 +.amdgpu_metadata + amdhsa.version: + - 1 + - 0 + amdhsa.printf: + - '1:1:4:%d\n' + - '2:1:8:%g\n' + amdhsa.kernels: + - .name: test_kernel + .symbol: test_kernel@kd + .language: OpenCL C + .language_version: + - 2 + - 0 + .kernarg_segment_size: 8 + .group_segment_fixed_size: 16 + .private_segment_fixed_size: 32 + .kernarg_segment_align: 64 + .wavefront_size: 128 + .sgpr_count: 14 + .vgpr_count: 40 + .max_flat_workgroup_size: 256 + .cluster_dims: + - 4 + - 2 + - 1 +.end_amdgpu_metadata