Skip to content

Commit 68318b2

Browse files
committed
[AMDGPU] Split struct kernel arguments
AMDGPU backend has a pass which does transformations to allow firmware to preload kernel arguments into sgpr's to avoid loading them from kernel arg segment. This pass can improve kernel latency but it cannot preload struct-type kernel arguments. This patch adds a pass to AMDGPU backend to split and flatten struct-type kernel arguments so that later passes can preload them into sgpr's. Basically, the pass collects load or GEP/load instructions with struct-type kenel args as operands and makes them new arguments as the kernel. If all uses of a struct-type kernel arg can be replaced, it will do the replacements and create a new kernel with the new signature, and translate all instructions of the old kernel to use the new arguments in the new kernel. It adds a function attribute to encode the mapping from the new kernel argument index to the old kernel argument index and offset. The streamer will generate kernel argument metadata based on that and runtime will process the kernel arguments based on the metadata. The pass is disabled by default and can be enabled by LLVM option `-amdgpu-enable-split-kernel-args`.
1 parent 5ebe22a commit 68318b2

File tree

9 files changed

+550
-5
lines changed

9 files changed

+550
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,15 @@ struct AMDGPUPromoteKernelArgumentsPass
125125
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
126126
};
127127

128+
ModulePass *createAMDGPUSplitKernelArgumentsPass();
129+
void initializeAMDGPUSplitKernelArgumentsPass(PassRegistry &);
130+
extern char &AMDGPUSplitKernelArgumentsID;
131+
132+
struct AMDGPUSplitKernelArgumentsPass
133+
: PassInfoMixin<AMDGPUSplitKernelArgumentsPass> {
134+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
135+
};
136+
128137
ModulePass *createAMDGPULowerKernelAttributesPass();
129138
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
130139
extern char &AMDGPULowerKernelAttributesID;

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -357,17 +357,38 @@ void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg,
357357
Align ArgAlign;
358358
std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
359359

360+
// Assuming the argument is not split from struct-type argument by default,
361+
// unless we find it in function attribute amdgpu-argument-mapping.
362+
unsigned OriginalArgIndex = ~0U;
363+
uint64_t OriginalArgOffset = 0;
364+
Attribute Attr =
365+
Func->getAttributes().getParamAttr(ArgNo, "amdgpu-original-arg");
366+
if (Attr.isValid()) {
367+
StringRef MappingStr = Attr.getValueAsString();
368+
SmallVector<StringRef, 2> Elements;
369+
MappingStr.split(Elements, ':');
370+
if (Elements.size() == 2) {
371+
if (Elements[0].getAsInteger(10, OriginalArgIndex))
372+
report_fatal_error(
373+
"Invalid original argument index in amdgpu-original-arg attribute");
374+
if (Elements[1].getAsInteger(10, OriginalArgOffset))
375+
report_fatal_error("Invalid original argument offset in "
376+
"amdgpu-original-arg attribute");
377+
}
378+
}
379+
360380
emitKernelArg(DL, ArgTy, ArgAlign,
361381
getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
362-
PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual,
363-
AccQual, TypeQual);
382+
PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual, AccQual,
383+
TypeQual, OriginalArgIndex, OriginalArgOffset);
364384
}
365385

366386
void MetadataStreamerMsgPackV4::emitKernelArg(
367387
const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
368388
unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
369389
StringRef Name, StringRef TypeName, StringRef BaseTypeName,
370-
StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) {
390+
StringRef ActAccQual, StringRef AccQual, StringRef TypeQual,
391+
unsigned OriginalArgIndex, uint64_t OriginalArgOffset) {
371392
auto Arg = Args.getDocument()->getMapNode();
372393

373394
if (!Name.empty())
@@ -409,6 +430,12 @@ void MetadataStreamerMsgPackV4::emitKernelArg(
409430
Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
410431
}
411432

433+
// Add original argument index and offset to the metadata
434+
if (OriginalArgIndex != ~0U) {
435+
Arg[".original_arg_index"] = Arg.getDocument()->getNode(OriginalArgIndex);
436+
Arg[".original_arg_offset"] = Arg.getDocument()->getNode(OriginalArgOffset);
437+
}
438+
412439
Args.push_back(Arg);
413440
}
414441

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,9 @@ class LLVM_EXTERNAL_VISIBILITY MetadataStreamerMsgPackV4
116116
MaybeAlign PointeeAlign = std::nullopt,
117117
StringRef Name = "", StringRef TypeName = "",
118118
StringRef BaseTypeName = "", StringRef ActAccQual = "",
119-
StringRef AccQual = "", StringRef TypeQual = "");
119+
StringRef AccQual = "", StringRef TypeQual = "",
120+
unsigned OriginalArgIndex = ~0U,
121+
uint64_t OriginalArgOffset = 0);
120122

121123
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
122124
msgpack::ArrayDocNode Args) override;

llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass())
2929
MODULE_PASS("amdgpu-remove-incompatible-functions", AMDGPURemoveIncompatibleFunctionsPass(*this))
3030
MODULE_PASS("amdgpu-sw-lower-lds", AMDGPUSwLowerLDSPass(*this))
3131
MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass())
32+
MODULE_PASS("amdgpu-split-kernel-arguments", AMDGPUSplitKernelArgumentsPass())
3233
#undef MODULE_PASS
3334

3435
#ifndef MODULE_PASS_WITH_PARAMS

0 commit comments

Comments
 (0)