Skip to content

Commit 564a98b

Browse files
committed
Save NumWaveDispatchRegs in MFI for graphics entry points
1 parent 81366b6 commit 564a98b

File tree

6 files changed

+41
-69
lines changed

6 files changed

+41
-69
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 4 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -993,72 +993,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
993993
const Function &F = MF.getFunction();
994994

995995
// Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
996-
// dispatch registers are function args.
997-
unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
996+
// dispatch registers as function args.
997+
unsigned WaveDispatchNumSGPR = MFI->getNumWaveDispatchSGPRs(),
998+
WaveDispatchNumVGPR = MFI->getNumWaveDispatchVGPRs();
998999

1000+
// FIXME: Cleanup
9991001
if (isShader(F.getCallingConv())) {
1000-
bool IsPixelShader =
1001-
F.getCallingConv() == CallingConv::AMDGPU_PS && !STM.isAmdHsaOS();
1002-
1003-
// Calculate the number of VGPR registers based on the SPI input registers
1004-
uint32_t InputEna = 0;
1005-
uint32_t InputAddr = 0;
1006-
unsigned LastEna = 0;
1007-
1008-
if (IsPixelShader) {
1009-
// Note for IsPixelShader:
1010-
// By this stage, all enabled inputs are tagged in InputAddr as well.
1011-
// We will use InputAddr to determine whether the input counts against the
1012-
// vgpr total and only use the InputEnable to determine the last input
1013-
// that is relevant - if extra arguments are used, then we have to honour
1014-
// the InputAddr for any intermediate non-enabled inputs.
1015-
InputEna = MFI->getPSInputEnable();
1016-
InputAddr = MFI->getPSInputAddr();
1017-
1018-
// We only need to consider input args up to the last used arg.
1019-
assert((InputEna || InputAddr) &&
1020-
"PSInputAddr and PSInputEnable should "
1021-
"never both be 0 for AMDGPU_PS shaders");
1022-
// There are some rare circumstances where InputAddr is non-zero and
1023-
// InputEna can be set to 0. In this case we default to setting LastEna
1024-
// to 1.
1025-
LastEna = InputEna ? llvm::Log2_32(InputEna) + 1 : 1;
1026-
}
1027-
1028-
// FIXME: We should be using the number of registers determined during
1029-
// calling convention lowering to legalize the types.
1030-
const DataLayout &DL = F.getDataLayout();
1031-
unsigned PSArgCount = 0;
1032-
unsigned IntermediateVGPR = 0;
1033-
for (auto &Arg : F.args()) {
1034-
unsigned NumRegs = (DL.getTypeSizeInBits(Arg.getType()) + 31) / 32;
1035-
if (Arg.hasAttribute(Attribute::InReg)) {
1036-
WaveDispatchNumSGPR += NumRegs;
1037-
} else {
1038-
// If this is a PS shader and we're processing the PS Input args (first
1039-
// 16 VGPR), use the InputEna and InputAddr bits to define how many
1040-
// VGPRs are actually used.
1041-
// Any extra VGPR arguments are handled as normal arguments (and
1042-
// contribute to the VGPR count whether they're used or not).
1043-
if (IsPixelShader && PSArgCount < 16) {
1044-
if ((1 << PSArgCount) & InputAddr) {
1045-
if (PSArgCount < LastEna)
1046-
WaveDispatchNumVGPR += NumRegs;
1047-
else
1048-
IntermediateVGPR += NumRegs;
1049-
}
1050-
PSArgCount++;
1051-
} else {
1052-
// If there are extra arguments we have to include the allocation for
1053-
// the non-used (but enabled with InputAddr) input arguments
1054-
if (IntermediateVGPR) {
1055-
WaveDispatchNumVGPR += IntermediateVGPR;
1056-
IntermediateVGPR = 0;
1057-
}
1058-
WaveDispatchNumVGPR += NumRegs;
1059-
}
1060-
}
1061-
}
10621002
ProgInfo.NumSGPR = AMDGPUMCExpr::createMax(
10631003
{ProgInfo.NumSGPR,
10641004
MCBinaryExpr::createAdd(CreateExpr(WaveDispatchNumSGPR), ExtraSGPRs,

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -743,6 +743,15 @@ bool AMDGPUCallLowering::lowerFormalArguments(
743743
if (!determineAssignments(Assigner, SplitArgs, CCInfo))
744744
return false;
745745

746+
if (IsEntryFunc) {
747+
// This assumes the registers are allocated by CCInfo in ascending order
748+
// with no gaps.
749+
Info->setNumWaveDispatchSGPRs(
750+
CCInfo.getFirstUnallocated(AMDGPU::SGPR_32RegClass.getRegisters()));
751+
Info->setNumWaveDispatchVGPRs(
752+
CCInfo.getFirstUnallocated(AMDGPU::VGPR_32RegClass.getRegisters()));
753+
}
754+
746755
FormalArgHandler Handler(B, MRI);
747756
if (!handleAssignments(Handler, SplitArgs, CCInfo, ArgLocs, B))
748757
return false;

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3099,6 +3099,13 @@ SDValue SITargetLowering::LowerFormalArguments(
30993099
if (!IsKernel) {
31003100
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, isVarArg);
31013101
CCInfo.AnalyzeFormalArguments(Splits, AssignFn);
3102+
3103+
// This assumes the registers are allocated by CCInfo in ascending order
3104+
// with no gaps.
3105+
Info->setNumWaveDispatchSGPRs(
3106+
CCInfo.getFirstUnallocated(AMDGPU::SGPR_32RegClass.getRegisters()));
3107+
Info->setNumWaveDispatchVGPRs(
3108+
CCInfo.getFirstUnallocated(AMDGPU::VGPR_32RegClass.getRegisters()));
31023109
}
31033110

31043111
SmallVector<SDValue, 16> Chains;

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
465465
unsigned NumUserSGPRs = 0;
466466
unsigned NumSystemSGPRs = 0;
467467

468+
unsigned NumWaveDispatchSGPRs = 0;
469+
unsigned NumWaveDispatchVGPRs = 0;
470+
468471
bool HasSpilledSGPRs = false;
469472
bool HasSpilledVGPRs = false;
470473
bool HasNonSpillStackObjects = false;
@@ -991,6 +994,14 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
991994
return UserSGPRInfo.getNumKernargPreloadSGPRs();
992995
}
993996

997+
unsigned getNumWaveDispatchSGPRs() const { return NumWaveDispatchSGPRs; }
998+
999+
void setNumWaveDispatchSGPRs(unsigned Count) { NumWaveDispatchSGPRs = Count; }
1000+
1001+
unsigned getNumWaveDispatchVGPRs() const { return NumWaveDispatchVGPRs; }
1002+
1003+
void setNumWaveDispatchVGPRs(unsigned Count) { NumWaveDispatchVGPRs = Count; }
1004+
9941005
Register getPrivateSegmentWaveByteOffsetSystemSGPR() const {
9951006
return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
9961007
}

llvm/test/CodeGen/AMDGPU/ps-shader-arg-count.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
;RUN: llc < %s -mtriple=amdgcn-pal -mcpu=gfx1010 | FileCheck %s --check-prefixes=CHECK
2-
;RUN: llc < %s -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 | FileCheck %s --check-prefixes=CHECK
1+
;RUN: llc -global-isel=1 < %s -mtriple=amdgcn-pal -mcpu=gfx1010 | FileCheck %s --check-prefixes=CHECK
2+
;RUN: llc -global-isel=1 < %s -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 | FileCheck %s --check-prefixes=CHECK
3+
;RUN: llc -global-isel=0 < %s -mtriple=amdgcn-pal -mcpu=gfx1010 | FileCheck %s --check-prefixes=CHECK
4+
;RUN: llc -global-isel=0 < %s -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 | FileCheck %s --check-prefixes=CHECK
35

46
; ;CHECK-LABEL: {{^}}_amdgpu_ps_1_arg:
57
; ;CHECK: NumVgprs: 4

llvm/test/CodeGen/AMDGPU/wave_dispatch_regs.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
; RUN: llc -mtriple=amdgcn--amdpal < %s | FileCheck -check-prefix=GCN -check-prefix=SI -enable-var-scope %s
2-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=VI -enable-var-scope %s
3-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s
1+
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal < %s | FileCheck -check-prefix=GCN -check-prefix=SI -enable-var-scope %s
2+
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=VI -enable-var-scope %s
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s
4+
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal < %s | FileCheck -check-prefix=GCN -check-prefix=SI -enable-var-scope %s
5+
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=VI -enable-var-scope %s
6+
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s
47

58
; This compute shader has input args that claim that it has 17 sgprs and 5 vgprs
69
; in wave dispatch. Ensure that the sgpr and vgpr counts in COMPUTE_PGM_RSRC1

0 commit comments

Comments
 (0)