Skip to content

Commit 2dca7e1

Browse files
shiltiankosarev
andcommitted
[AMDGPU] Add the support for .cluster_dims code object metadata
Co-authored-by: Ivan Kosarev <[email protected]>
1 parent d9fa0de commit 2dca7e1

File tree

6 files changed

+136
-10
lines changed

6 files changed

+136
-10
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4839,7 +4839,24 @@ Code object V5 metadata is the same as
48394839

48404840
====================== ============== ========= ================================
48414841

4842-
..
4842+
.. _amdgpu-amdhsa-code-object-metadata-v6:
4843+
4844+
Code Object V6 Metadata
4845+
+++++++++++++++++++++++
4846+
4847+
Code object V6 metadata is the same as
4848+
:ref:`amdgpu-amdhsa-code-object-metadata-v5` with the changes defined in table
4849+
:ref:`amdgpu-amdhsa-code-object-kernel-metadata-map-table-v6`.
4850+
4851+
.. table:: AMDHSA Code Object V6 Kernel Metadata Map Additions
4852+
:name: amdgpu-amdhsa-code-object-kernel-metadata-map-table-v6
4853+
4854+
============================= ============= ========== =======================================
4855+
String Key Value Type Required? Description
4856+
============================= ============= ========== =======================================
4857+
".cluster_dims" sequence of The dimension of the cluster.
4858+
3 integers
4859+
============================= ============= ========== =======================================
48434860

48444861
Kernel Dispatch
48454862
~~~~~~~~~~~~~~~

llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,14 @@ bool MetadataVerifier::verifyKernel(msgpack::DocNode &Node) {
281281
return false;
282282
if (!verifyIntegerEntry(KernelMap, ".uniform_work_group_size", false))
283283
return false;
284-
284+
if (!verifyEntry(
285+
KernelMap, ".cluster_dims", false, [this](msgpack::DocNode &Node) {
286+
return verifyArray(
287+
Node,
288+
[this](msgpack::DocNode &Node) { return verifyInteger(Node); },
289+
3);
290+
}))
291+
return false;
285292

286293
return true;
287294
}

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -254,9 +254,9 @@ void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func,
254254
}
255255

256256
void MetadataStreamerMsgPackV4::emitKernelAttrs(const AMDGPUTargetMachine &TM,
257-
const Function &Func,
257+
const MachineFunction &MF,
258258
msgpack::MapDocNode Kern) {
259-
259+
const Function &Func = MF.getFunction();
260260
if (auto *Node = Func.getMetadata("reqd_work_group_size"))
261261
Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
262262
if (auto *Node = Func.getMetadata("work_group_size_hint"))
@@ -599,7 +599,7 @@ void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,
599599
Kern[".symbol"] = Kern.getDocument()->getNode(
600600
(Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
601601
emitKernelLanguage(Func, Kern);
602-
emitKernelAttrs(TM, Func, Kern);
602+
emitKernelAttrs(TM, MF, Kern);
603603
emitKernelArgs(MF, Kern);
604604
}
605605

@@ -726,10 +726,11 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
726726
}
727727

728728
void MetadataStreamerMsgPackV5::emitKernelAttrs(const AMDGPUTargetMachine &TM,
729-
const Function &Func,
729+
const MachineFunction &MF,
730730
msgpack::MapDocNode Kern) {
731-
MetadataStreamerMsgPackV4::emitKernelAttrs(TM, Func, Kern);
731+
MetadataStreamerMsgPackV4::emitKernelAttrs(TM, MF, Kern);
732732

733+
const Function &Func = MF.getFunction();
733734
if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
734735
Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
735736
}
@@ -745,5 +746,21 @@ void MetadataStreamerMsgPackV6::emitVersion() {
745746
getRootMetadata("amdhsa.version") = Version;
746747
}
747748

749+
void MetadataStreamerMsgPackV6::emitKernelAttrs(const AMDGPUTargetMachine &TM,
750+
const MachineFunction &MF,
751+
msgpack::MapDocNode Kern) {
752+
MetadataStreamerMsgPackV5::emitKernelAttrs(TM, MF, Kern);
753+
754+
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
755+
ClusterDimsAttr Attr = MFI.getClusterDims();
756+
if (Attr.isFixedDims()) {
757+
msgpack::ArrayDocNode ClusterDimsNode = HSAMetadataDoc->getArrayNode();
758+
ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[0]));
759+
ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[1]));
760+
ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[2]));
761+
Kern[".cluster_dims"] = ClusterDimsNode;
762+
}
763+
}
764+
748765
} // end namespace AMDGPU::HSAMD
749766
} // end namespace llvm

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ class MetadataStreamer {
6161
virtual void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
6262
msgpack::ArrayDocNode Args) = 0;
6363
virtual void emitKernelAttrs(const AMDGPUTargetMachine &TM,
64-
const Function &Func,
64+
const MachineFunction &MF,
6565
msgpack::MapDocNode Kern) = 0;
6666
};
6767

@@ -102,7 +102,7 @@ class LLVM_EXTERNAL_VISIBILITY MetadataStreamerMsgPackV4
102102

103103
void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern);
104104

105-
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func,
105+
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
106106
msgpack::MapDocNode Kern) override;
107107

108108
void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern);
@@ -149,7 +149,7 @@ class MetadataStreamerMsgPackV5 : public MetadataStreamerMsgPackV4 {
149149
void emitVersion() override;
150150
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
151151
msgpack::ArrayDocNode Args) override;
152-
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func,
152+
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
153153
msgpack::MapDocNode Kern) override;
154154

155155
public:
@@ -164,6 +164,9 @@ class MetadataStreamerMsgPackV6 final : public MetadataStreamerMsgPackV5 {
164164
public:
165165
MetadataStreamerMsgPackV6() = default;
166166
~MetadataStreamerMsgPackV6() = default;
167+
168+
void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
169+
msgpack::MapDocNode Kern) override;
167170
};
168171

169172
} // end namespace HSAMD
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel %s -o - | FileCheck %s
3+
4+
; CHECK: .cluster_dims:
5+
; CHECK-NEXT: - 2
6+
; CHECK-NEXT: - 2
7+
; CHECK-NEXT: - 2
8+
define dso_local amdgpu_kernel void @_Z15test_literal_3dv() #0 {
9+
entry:
10+
ret void
11+
}
12+
13+
; CHECK: .cluster_dims:
14+
; CHECK-NEXT: - 2
15+
; CHECK-NEXT: - 2
16+
; CHECK-NEXT: - 1
17+
define dso_local amdgpu_kernel void @_Z15test_literal_2dv() #1 {
18+
entry:
19+
ret void
20+
}
21+
22+
; CHECK: .cluster_dims:
23+
; CHECK-NEXT: - 4
24+
; CHECK-NEXT: - 1
25+
; CHECK-NEXT: - 1
26+
define dso_local amdgpu_kernel void @_Z15test_literal_1dv() #2 {
27+
entry:
28+
ret void
29+
}
30+
31+
; CHECK: .cluster_dims:
32+
; CHECK-NEXT: - 4
33+
; CHECK-NEXT: - 2
34+
; CHECK-NEXT: - 1
35+
define dso_local amdgpu_kernel void @_Z13test_constantv() #3 {
36+
entry:
37+
ret void
38+
}
39+
40+
attributes #0 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="2,2,2" }
41+
attributes #1 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="2,2,1" }
42+
attributes #2 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="4,1,1" }
43+
attributes #3 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="4,2,1" }
44+
45+
!llvm.module.flags = !{!0}
46+
47+
!0 = !{i32 1, !"amdhsa_code_object_version", i32 600}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1250 --amdhsa-code-object-version=6 -show-encoding %s | FileCheck %s
2+
3+
// CHECK: .amdgpu_metadata
4+
// CHECK: amdhsa.kernels:
5+
// CHECK: - .cluster_dims:
6+
// CHECK-NEXT: - 4
7+
// CHECK-NEXT: - 2
8+
// CHECK-NEXT: - 1
9+
.amdgpu_metadata
10+
amdhsa.version:
11+
- 1
12+
- 0
13+
amdhsa.printf:
14+
- '1:1:4:%d\n'
15+
- '2:1:8:%g\n'
16+
amdhsa.kernels:
17+
- .name: test_kernel
18+
.symbol: test_kernel@kd
19+
.language: OpenCL C
20+
.language_version:
21+
- 2
22+
- 0
23+
.kernarg_segment_size: 8
24+
.group_segment_fixed_size: 16
25+
.private_segment_fixed_size: 32
26+
.kernarg_segment_align: 64
27+
.wavefront_size: 128
28+
.sgpr_count: 14
29+
.vgpr_count: 40
30+
.max_flat_workgroup_size: 256
31+
.cluster_dims:
32+
- 4
33+
- 2
34+
- 1
35+
.end_amdgpu_metadata

0 commit comments

Comments
 (0)