Skip to content

Commit 70b5d18

Browse files
tyb0807aadeshps-mcw
authored andcommitted
[AMDGPU] Enable serializing of allocated preload kernarg SGPRs info (llvm#168374)
- Support serialization of the number of allocated preload kernarg SGPRs - Support serialization of the first preload kernarg SGPR allocated Together they enable reconstructing correctly MIR with preload kernarg SGPRs.
1 parent 8bbba82 commit 70b5d18

13 files changed

+382
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2027,6 +2027,42 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
20272027
MFI->ArgInfo.WorkItemIDZ, 0, 0)))
20282028
return true;
20292029

2030+
// Parse FirstKernArgPreloadReg separately, since it's a Register,
2031+
// not ArgDescriptor.
2032+
if (YamlMFI.ArgInfo && YamlMFI.ArgInfo->FirstKernArgPreloadReg) {
2033+
const yaml::SIArgument &A = *YamlMFI.ArgInfo->FirstKernArgPreloadReg;
2034+
2035+
if (!A.IsRegister) {
2036+
// For stack arguments, we don't have RegisterName.SourceRange,
2037+
// but we should have some location info from the YAML parser
2038+
const MemoryBuffer &Buffer =
2039+
*PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
2040+
// Create a minimal valid source range
2041+
SMLoc Loc = SMLoc::getFromPointer(Buffer.getBufferStart());
2042+
SMRange Range(Loc, Loc);
2043+
2044+
Error = SMDiagnostic(
2045+
*PFS.SM, Loc, Buffer.getBufferIdentifier(), 1, 0, SourceMgr::DK_Error,
2046+
"firstKernArgPreloadReg must be a register, not a stack location", "",
2047+
{}, {});
2048+
2049+
SourceRange = Range;
2050+
return true;
2051+
}
2052+
2053+
Register Reg;
2054+
if (parseNamedRegisterReference(PFS, Reg, A.RegisterName.Value, Error)) {
2055+
SourceRange = A.RegisterName.SourceRange;
2056+
return true;
2057+
}
2058+
2059+
if (!AMDGPU::SGPR_32RegClass.contains(Reg))
2060+
return diagnoseRegisterClass(A.RegisterName);
2061+
2062+
MFI->ArgInfo.FirstKernArgPreloadReg = Reg;
2063+
MFI->NumUserSGPRs += YamlMFI.NumKernargPreloadSGPRs;
2064+
}
2065+
20302066
if (ST.hasIEEEMode())
20312067
MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
20322068
if (ST.hasDX10ClampMode())

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -696,7 +696,6 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
696696
return true;
697697
};
698698

699-
// TODO: Need to serialize kernarg preloads.
700699
bool Any = false;
701700
Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
702701
Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
@@ -718,6 +717,21 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
718717
Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
719718
Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
720719

720+
// Write FirstKernArgPreloadReg separately, since it's a Register,
721+
// not ArgDescriptor.
722+
if (ArgInfo.FirstKernArgPreloadReg) {
723+
Register Reg = ArgInfo.FirstKernArgPreloadReg;
724+
assert(Reg.isPhysical() &&
725+
"FirstKernArgPreloadReg must be a physical register");
726+
727+
yaml::SIArgument SA = yaml::SIArgument::createArgument(true);
728+
raw_string_ostream OS(SA.RegisterName.Value);
729+
OS << printReg(Reg, &TRI);
730+
731+
AI.FirstKernArgPreloadReg = SA;
732+
Any = true;
733+
}
734+
721735
if (Any)
722736
return AI;
723737

@@ -750,7 +764,8 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
750764
Mode(MFI.getMode()), HasInitWholeWave(MFI.hasInitWholeWave()),
751765
IsWholeWaveFunction(MFI.isWholeWaveFunction()),
752766
DynamicVGPRBlockSize(MFI.getDynamicVGPRBlockSize()),
753-
ScratchReservedForDynamicVGPRs(MFI.getScratchReservedForDynamicVGPRs()) {
767+
ScratchReservedForDynamicVGPRs(MFI.getScratchReservedForDynamicVGPRs()),
768+
NumKernargPreloadSGPRs(MFI.getNumKernargPreloadedSGPRs()) {
754769
for (Register Reg : MFI.getSGPRSpillPhysVGPRs())
755770
SpillPhysVGPRS.push_back(regToString(Reg, TRI));
756771

@@ -799,6 +814,8 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
799814
ReturnsVoid = YamlMFI.ReturnsVoid;
800815
IsWholeWaveFunction = YamlMFI.IsWholeWaveFunction;
801816

817+
UserSGPRInfo.allocKernargPreloadSGPRs(YamlMFI.NumKernargPreloadSGPRs);
818+
802819
if (YamlMFI.ScavengeFI) {
803820
auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
804821
if (!FIOrErr) {

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ struct SIArgumentInfo {
170170
std::optional<SIArgument> DispatchID;
171171
std::optional<SIArgument> FlatScratchInit;
172172
std::optional<SIArgument> PrivateSegmentSize;
173+
std::optional<SIArgument> FirstKernArgPreloadReg;
173174

174175
std::optional<SIArgument> WorkGroupIDX;
175176
std::optional<SIArgument> WorkGroupIDY;
@@ -195,6 +196,7 @@ template <> struct MappingTraits<SIArgumentInfo> {
195196
YamlIO.mapOptional("dispatchID", AI.DispatchID);
196197
YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
197198
YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
199+
YamlIO.mapOptional("firstKernArgPreloadReg", AI.FirstKernArgPreloadReg);
198200

199201
YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
200202
YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
@@ -305,6 +307,8 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
305307
unsigned DynamicVGPRBlockSize = 0;
306308
unsigned ScratchReservedForDynamicVGPRs = 0;
307309

310+
unsigned NumKernargPreloadSGPRs = 0;
311+
308312
SIMachineFunctionInfo() = default;
309313
SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
310314
const TargetRegisterInfo &TRI,
@@ -361,6 +365,7 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
361365
YamlIO.mapOptional("dynamicVGPRBlockSize", MFI.DynamicVGPRBlockSize, false);
362366
YamlIO.mapOptional("scratchReservedForDynamicVGPRs",
363367
MFI.ScratchReservedForDynamicVGPRs, 0);
368+
YamlIO.mapOptional("numKernargPreloadSGPRs", MFI.NumKernargPreloadSGPRs, 0);
364369
YamlIO.mapOptional("isWholeWaveFunction", MFI.IsWholeWaveFunction, false);
365370
}
366371
};

llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
; CHECK-NEXT: hasInitWholeWave: false
4949
; CHECK-NEXT: dynamicVGPRBlockSize: 0
5050
; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
51+
; CHECK-NEXT: numKernargPreloadSGPRs: 0
5152
; CHECK-NEXT: isWholeWaveFunction: false
5253
; CHECK-NEXT: body:
5354
define amdgpu_kernel void @long_branch_used_all_sgprs(ptr addrspace(1) %arg, i32 %cnd) #0 {
@@ -320,6 +321,7 @@
320321
; CHECK-NEXT: hasInitWholeWave: false
321322
; CHECK-NEXT: dynamicVGPRBlockSize: 0
322323
; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
324+
; CHECK-NEXT: numKernargPreloadSGPRs: 0
323325
; CHECK-NEXT: isWholeWaveFunction: false
324326
; CHECK-NEXT: body:
325327
define amdgpu_kernel void @long_branch_high_num_sgprs_used(ptr addrspace(1) %arg, i32 %cnd) #0 {

llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
; AFTER-PEI-NEXT: hasInitWholeWave: false
4949
; AFTER-PEI-NEXT: dynamicVGPRBlockSize: 0
5050
; AFTER-PEI-NEXT: scratchReservedForDynamicVGPRs: 0
51+
; AFTER-PEI-NEXT: numKernargPreloadSGPRs: 0
5152
; AFTER-PEI-NEXT: isWholeWaveFunction: false
5253
; AFTER-PEI-NEXT: body:
5354
define amdgpu_kernel void @scavenge_fi(ptr addrspace(1) %out, i32 %in) #0 {

llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
; CHECK-NEXT: hasInitWholeWave: false
4949
; CHECK-NEXT: dynamicVGPRBlockSize: 0
5050
; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
51+
; CHECK-NEXT: numKernargPreloadSGPRs: 0
5152
; CHECK-NEXT: isWholeWaveFunction: false
5253
; CHECK-NEXT: body:
5354
define amdgpu_kernel void @uniform_long_forward_branch_debug(ptr addrspace(1) %arg, i32 %arg1) #0 !dbg !5 {

llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
; CHECK-NEXT: hasInitWholeWave: false
4949
; CHECK-NEXT: dynamicVGPRBlockSize: 0
5050
; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
51+
; CHECK-NEXT: numKernargPreloadSGPRs: 0
5152
; CHECK-NEXT: isWholeWaveFunction: false
5253
; CHECK-NEXT: body:
5354
define amdgpu_kernel void @uniform_long_forward_branch(ptr addrspace(1) %arg, i32 %arg1) #0 {

llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
# FULL-NEXT: hasInitWholeWave: false
5858
# FULL-NEXT: dynamicVGPRBlockSize: 0
5959
# FULL-NEXT: scratchReservedForDynamicVGPRs: 0
60+
# FULL-NEXT: numKernargPreloadSGPRs: 0
6061
# FULL-NEXT: isWholeWaveFunction: false
6162
# FULL-NEXT: body:
6263

@@ -167,6 +168,7 @@ body: |
167168
# FULL-NEXT: hasInitWholeWave: false
168169
# FULL-NEXT: dynamicVGPRBlockSize: 0
169170
# FULL-NEXT: scratchReservedForDynamicVGPRs: 0
171+
# FULL-NEXT: numKernargPreloadSGPRs: 0
170172
# FULL-NEXT: isWholeWaveFunction: false
171173
# FULL-NEXT: body:
172174

@@ -248,6 +250,7 @@ body: |
248250
# FULL-NEXT: hasInitWholeWave: false
249251
# FULL-NEXT: dynamicVGPRBlockSize: 0
250252
# FULL-NEXT: scratchReservedForDynamicVGPRs: 0
253+
# FULL-NEXT: numKernargPreloadSGPRs: 0
251254
# FULL-NEXT: isWholeWaveFunction: false
252255
# FULL-NEXT: body:
253256

@@ -330,6 +333,7 @@ body: |
330333
# FULL-NEXT: hasInitWholeWave: false
331334
# FULL-NEXT: dynamicVGPRBlockSize: 0
332335
# FULL-NEXT: scratchReservedForDynamicVGPRs: 0
336+
# FULL-NEXT: numKernargPreloadSGPRs: 0
333337
# FULL-NEXT: isWholeWaveFunction: false
334338
# FULL-NEXT: body:
335339

llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
; CHECK-NEXT: hasInitWholeWave: false
5959
; CHECK-NEXT: dynamicVGPRBlockSize: 0
6060
; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
61+
; CHECK-NEXT: numKernargPreloadSGPRs: 0
6162
; CHECK-NEXT: isWholeWaveFunction: false
6263
; CHECK-NEXT: body:
6364
define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
@@ -110,6 +111,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
110111
; CHECK-NEXT: hasInitWholeWave: false
111112
; CHECK-NEXT: dynamicVGPRBlockSize: 0
112113
; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
114+
; CHECK-NEXT: numKernargPreloadSGPRs: 0
113115
; CHECK-NEXT: isWholeWaveFunction: false
114116
; CHECK-NEXT: body:
115117
define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
@@ -186,6 +188,7 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 {
186188
; CHECK-NEXT: hasInitWholeWave: false
187189
; CHECK-NEXT: dynamicVGPRBlockSize: 0
188190
; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
191+
; CHECK-NEXT: numKernargPreloadSGPRs: 0
189192
; CHECK-NEXT: isWholeWaveFunction: false
190193
; CHECK-NEXT: body:
191194
define void @function() {
@@ -244,6 +247,7 @@ define void @function() {
244247
; CHECK-NEXT: hasInitWholeWave: false
245248
; CHECK-NEXT: dynamicVGPRBlockSize: 0
246249
; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
250+
; CHECK-NEXT: numKernargPreloadSGPRs: 0
247251
; CHECK-NEXT: isWholeWaveFunction: false
248252
; CHECK-NEXT: body:
249253
define void @function_nsz() #0 {
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=none -filetype=null %s 2>&1 | FileCheck %s
2+
3+
---
4+
# CHECK: error: {{.*}} incorrect register class for field
5+
name: kernarg_preload_wrong_register_class
6+
tracksRegLiveness: true
7+
machineFunctionInfo:
8+
explicitKernArgSize: 4
9+
maxKernArgAlign: 4
10+
numKernargPreloadSGPRs: 1
11+
isEntryFunction: true
12+
argumentInfo:
13+
kernargSegmentPtr: { reg: '$sgpr0_sgpr1' }
14+
firstKernArgPreloadReg: { reg: '$vgpr0' } # ERROR: VGPR instead of SGPR
15+
body: |
16+
bb.0:
17+
S_ENDPGM 0
18+
...

0 commit comments

Comments
 (0)