Skip to content

Commit 8edaf25

Browse files
committed
AMDGPU: Emit metadata for the hidden_multigrid_sync_arg conditionally
Summary: Introduce a new function attribute, amdgpu-no-multigrid-sync-arg, which is default. We use implicitarg_ptr + offset to check whether the multigrid synchronization pointer is used. If yes, we remove this attribute and also remove amdgpu-no-implicitarg-ptr. We generate metadata for the hidden_multigrid_sync_arg only when the amdgpu-no-multigrid-sync-arg attribute is removed from the function. Reviewers: arsenm, sameerds, b-sumner and foad Differential Revision: https://reviews.llvm.org/D123548
1 parent 65b8a43 commit 8edaf25

23 files changed

+440
-123
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -947,6 +947,9 @@ The AMDGPU backend supports the following LLVM IR attributes.
947947
version implementation. If this attribute is absent, then the
948948
amdgpu-no-implicitarg-ptr is also removed.
949949

950+
"amdgpu-no-multigrid-sync-arg" Similar to amdgpu-no-implicitarg-ptr, except specific to the implicit
951+
kernel argument that holds the multigrid synchronization pointer. If this
952+
attribute is absent, then the amdgpu-no-implicitarg-ptr is also removed.
950953
======================================= ==========================================================
951954

952955
.. _amdgpu-elf-code-object:

llvm/lib/Target/AMDGPU/AMDGPUAttributes.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ AMDGPU_ATTRIBUTE(DISPATCH_PTR, "amdgpu-no-dispatch-ptr")
1818
AMDGPU_ATTRIBUTE(QUEUE_PTR, "amdgpu-no-queue-ptr")
1919
AMDGPU_ATTRIBUTE(DISPATCH_ID, "amdgpu-no-dispatch-id")
2020
AMDGPU_ATTRIBUTE(IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr")
21+
AMDGPU_ATTRIBUTE(MULTIGRID_SYNC_ARG, "amdgpu-no-multigrid-sync-arg")
2122
AMDGPU_ATTRIBUTE(HOSTCALL_PTR, "amdgpu-no-hostcall-ptr")
2223
AMDGPU_ATTRIBUTE(HEAP_PTR, "amdgpu-no-heap-ptr")
2324
AMDGPU_ATTRIBUTE(WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x")

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,12 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
436436
removeAssumedBits(QUEUE_PTR);
437437
}
438438

439+
if (funcRetrievesMultigridSyncArg(A)) {
440+
assert(!isAssumed(IMPLICIT_ARG_PTR) &&
441+
"multigrid_sync_arg needs implicitarg_ptr");
442+
removeAssumedBits(MULTIGRID_SYNC_ARG);
443+
}
444+
439445
if (funcRetrievesHostcallPtr(A)) {
440446
assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
441447
removeAssumedBits(HOSTCALL_PTR);
@@ -533,6 +539,12 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
533539
return false;
534540
}
535541

542+
bool funcRetrievesMultigridSyncArg(Attributor &A) {
543+
auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition();
544+
AAPointerInfo::OffsetAndSize OAS(Pos, 8);
545+
return funcRetrievesImplicitKernelArg(A, OAS);
546+
}
547+
536548
bool funcRetrievesHostcallPtr(Attributor &A) {
537549
auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition();
538550
AAPointerInfo::OffsetAndSize OAS(Pos, 8);

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -425,8 +425,12 @@ void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func,
425425
}
426426

427427
// Emit the pointer argument for multi-grid object.
428-
if (HiddenArgNumBytes >= 56)
429-
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
428+
if (HiddenArgNumBytes >= 56) {
429+
if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg"))
430+
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
431+
else
432+
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
433+
}
430434
}
431435

432436
bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
@@ -845,9 +849,14 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF,
845849
}
846850

847851
// Emit the pointer argument for multi-grid object.
848-
if (HiddenArgNumBytes >= 56)
849-
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
850-
Args);
852+
if (HiddenArgNumBytes >= 56) {
853+
if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
854+
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
855+
Args);
856+
} else {
857+
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
858+
}
859+
}
851860
}
852861

853862
msgpack::MapDocNode
@@ -1019,17 +1028,23 @@ void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
10191028
if (M->getNamedMetadata("llvm.printf.fmts")) {
10201029
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
10211030
Args);
1022-
} else
1031+
} else {
10231032
Offset += 8; // Skipped.
1033+
}
10241034

10251035
if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
10261036
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
10271037
Args);
1028-
} else
1038+
} else {
10291039
Offset += 8; // Skipped.
1040+
}
10301041

1031-
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
1042+
if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
1043+
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
10321044
Args);
1045+
} else {
1046+
Offset += 8; // Skipped.
1047+
}
10331048

10341049
if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
10351050
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
@@ -1041,8 +1056,9 @@ void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
10411056
Args);
10421057
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
10431058
Args);
1044-
} else
1059+
} else {
10451060
Offset += 16; // Skipped.
1061+
}
10461062

10471063
Offset += 72; // Reserved.
10481064

@@ -1051,8 +1067,9 @@ void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
10511067
if (!ST.hasApertureRegs()) {
10521068
emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
10531069
emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
1054-
} else
1070+
} else {
10551071
Offset += 8; // Skipped.
1072+
}
10561073

10571074
if (MFI.hasQueuePtr())
10581075
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);

llvm/lib/Target/AMDGPU/SIDefines.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,7 @@ namespace ImplicitArg {
782782
// Implicit kernel argument offset for code object version 5.
783783
enum Offset_COV5 : unsigned {
784784
HOSTCALL_PTR_OFFSET = 80,
785+
MULTIGRID_SYNC_ARG_OFFSET = 88,
785786
HEAP_PTR_OFFSET = 96,
786787
PRIVATE_BASE_OFFSET = 192,
787788
SHARED_BASE_OFFSET = 196,

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,21 @@ unsigned getAmdhsaCodeObjectVersion() {
140140
return AmdhsaCodeObjectVersion;
141141
}
142142

143+
unsigned getMultigridSyncArgImplicitArgPosition() {
144+
switch (AmdhsaCodeObjectVersion) {
145+
case 2:
146+
case 3:
147+
case 4:
148+
return 48;
149+
case 5:
150+
return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
151+
default:
152+
llvm_unreachable("Unexpected code object version");
153+
return 0;
154+
}
155+
}
156+
157+
143158
// FIXME: All such magic numbers about the ABI should be in a
144159
// central TD file.
145160
unsigned getHostcallImplicitArgPosition() {

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
5454
/// false otherwise.
5555
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
5656

57+
/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
58+
unsigned getMultigridSyncArgImplicitArgPosition();
59+
5760
/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
5861
unsigned getHostcallImplicitArgPosition();
5962

llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,6 @@ attributes #1 = { nounwind }
230230
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
231231
;.
232232
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn }
233-
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
234-
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
233+
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
234+
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
235235
;.

0 commit comments

Comments
 (0)