Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4839,7 +4839,24 @@ Code object V5 metadata is the same as

====================== ============== ========= ================================

..
.. _amdgpu-amdhsa-code-object-metadata-v6:

Code Object V6 Metadata
+++++++++++++++++++++++

Code object V6 metadata is the same as
:ref:`amdgpu-amdhsa-code-object-metadata-v5` with the changes defined in table
:ref:`amdgpu-amdhsa-code-object-kernel-metadata-map-table-v6`.

.. table:: AMDHSA Code Object V6 Kernel Metadata Map Additions
:name: amdgpu-amdhsa-code-object-kernel-metadata-map-table-v6

============================= ============= ========== =======================================
String Key Value Type Required? Description
============================= ============= ========== =======================================
".cluster_dims" sequence of The dimension of the cluster.
3 integers
============================= ============= ========== =======================================

Kernel Dispatch
~~~~~~~~~~~~~~~
Expand Down
9 changes: 8 additions & 1 deletion llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,14 @@ bool MetadataVerifier::verifyKernel(msgpack::DocNode &Node) {
return false;
if (!verifyIntegerEntry(KernelMap, ".uniform_work_group_size", false))
return false;

if (!verifyEntry(
KernelMap, ".cluster_dims", false, [this](msgpack::DocNode &Node) {
return verifyArray(
Node,
[this](msgpack::DocNode &Node) { return verifyInteger(Node); },
3);
}))
return false;

return true;
}
Expand Down
27 changes: 22 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,9 +254,9 @@ void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func,
}

void MetadataStreamerMsgPackV4::emitKernelAttrs(const AMDGPUTargetMachine &TM,
const Function &Func,
const MachineFunction &MF,
msgpack::MapDocNode Kern) {

const Function &Func = MF.getFunction();
if (auto *Node = Func.getMetadata("reqd_work_group_size"))
Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
if (auto *Node = Func.getMetadata("work_group_size_hint"))
Expand Down Expand Up @@ -599,7 +599,7 @@ void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,
Kern[".symbol"] = Kern.getDocument()->getNode(
(Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
emitKernelLanguage(Func, Kern);
emitKernelAttrs(TM, Func, Kern);
emitKernelAttrs(TM, MF, Kern);
emitKernelArgs(MF, Kern);
}

Expand Down Expand Up @@ -726,10 +726,11 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
}

void MetadataStreamerMsgPackV5::emitKernelAttrs(const AMDGPUTargetMachine &TM,
const Function &Func,
const MachineFunction &MF,
msgpack::MapDocNode Kern) {
MetadataStreamerMsgPackV4::emitKernelAttrs(TM, Func, Kern);
MetadataStreamerMsgPackV4::emitKernelAttrs(TM, MF, Kern);

const Function &Func = MF.getFunction();
if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
}
Expand All @@ -745,5 +746,21 @@ void MetadataStreamerMsgPackV6::emitVersion() {
getRootMetadata("amdhsa.version") = Version;
}

void MetadataStreamerMsgPackV6::emitKernelAttrs(const AMDGPUTargetMachine &TM,
const MachineFunction &MF,
msgpack::MapDocNode Kern) {
MetadataStreamerMsgPackV5::emitKernelAttrs(TM, MF, Kern);

const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
ClusterDimsAttr Attr = MFI.getClusterDims();
if (Attr.isFixedDims()) {
msgpack::ArrayDocNode ClusterDimsNode = HSAMetadataDoc->getArrayNode();
ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[0]));
ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[1]));
ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[2]));
Kern[".cluster_dims"] = ClusterDimsNode;
}
}

} // end namespace AMDGPU::HSAMD
} // end namespace llvm
9 changes: 6 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class MetadataStreamer {
virtual void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
msgpack::ArrayDocNode Args) = 0;
virtual void emitKernelAttrs(const AMDGPUTargetMachine &TM,
const Function &Func,
const MachineFunction &MF,
msgpack::MapDocNode Kern) = 0;
};

Expand Down Expand Up @@ -102,7 +102,7 @@ class LLVM_EXTERNAL_VISIBILITY MetadataStreamerMsgPackV4

void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern);

void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func,
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
msgpack::MapDocNode Kern) override;

void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern);
Expand Down Expand Up @@ -149,7 +149,7 @@ class MetadataStreamerMsgPackV5 : public MetadataStreamerMsgPackV4 {
void emitVersion() override;
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
msgpack::ArrayDocNode Args) override;
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func,
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
msgpack::MapDocNode Kern) override;

public:
Expand All @@ -164,6 +164,9 @@ class MetadataStreamerMsgPackV6 final : public MetadataStreamerMsgPackV5 {
public:
MetadataStreamerMsgPackV6() = default;
~MetadataStreamerMsgPackV6() = default;

void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
msgpack::MapDocNode Kern) override;
};

} // end namespace HSAMD
Expand Down
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/AMDGPU/cluster-dims.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel %s -o - | FileCheck %s

; CHECK: .cluster_dims:
; CHECK-NEXT: - 2
; CHECK-NEXT: - 2
; CHECK-NEXT: - 2
define dso_local amdgpu_kernel void @_Z15test_literal_3dv() #0 {
entry:
ret void
}

; CHECK: .cluster_dims:
; CHECK-NEXT: - 2
; CHECK-NEXT: - 2
; CHECK-NEXT: - 1
define dso_local amdgpu_kernel void @_Z15test_literal_2dv() #1 {
entry:
ret void
}

; CHECK: .cluster_dims:
; CHECK-NEXT: - 4
; CHECK-NEXT: - 1
; CHECK-NEXT: - 1
define dso_local amdgpu_kernel void @_Z15test_literal_1dv() #2 {
entry:
ret void
}

; CHECK: .cluster_dims:
; CHECK-NEXT: - 4
; CHECK-NEXT: - 2
; CHECK-NEXT: - 1
define dso_local amdgpu_kernel void @_Z13test_constantv() #3 {
entry:
ret void
}

attributes #0 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="2,2,2" }
attributes #1 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="2,2,1" }
attributes #2 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="4,1,1" }
attributes #3 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="4,2,1" }

!llvm.module.flags = !{!0}

!0 = !{i32 1, !"amdhsa_code_object_version", i32 600}
35 changes: 35 additions & 0 deletions llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v6.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1250 --amdhsa-code-object-version=6 -show-encoding %s | FileCheck %s

// CHECK: .amdgpu_metadata
// CHECK: amdhsa.kernels:
// CHECK: - .cluster_dims:
// CHECK-NEXT: - 4
// CHECK-NEXT: - 2
// CHECK-NEXT: - 1
.amdgpu_metadata
amdhsa.version:
- 1
- 0
amdhsa.printf:
- '1:1:4:%d\n'
- '2:1:8:%g\n'
amdhsa.kernels:
- .name: test_kernel
.symbol: test_kernel@kd
.language: OpenCL C
.language_version:
- 2
- 0
.kernarg_segment_size: 8
.group_segment_fixed_size: 16
.private_segment_fixed_size: 32
.kernarg_segment_align: 64
.wavefront_size: 128
.sgpr_count: 14
.vgpr_count: 40
.max_flat_workgroup_size: 256
.cluster_dims:
- 4
- 2
- 1
.end_amdgpu_metadata