Skip to content

Commit ec59590

Browse files
committed
Skip call args for all calls
1 parent 561a792 commit ec59590

File tree

5 files changed

+50
-52
lines changed

5 files changed

+50
-52
lines changed

llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp

Lines changed: 20 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -146,13 +146,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
146146
/*IncludeCalls=*/false);
147147

148148
// A tail call isn't considered a call for MachineFrameInfo's purposes.
149-
bool HasCalls = FrameInfo.hasCalls() || FrameInfo.hasTailCall();
150-
// Functions that use the llvm.amdgcn.init.whole.wave intrinsic often have
151-
// VGPR arguments that are only added for the purpose of preserving the
152-
// inactive lanes. These should not be included in the number of used VGPRs.
153-
bool NeedsExplicitVGPRCount = MFI->hasInitWholeWave();
154-
if (!HasCalls && !NeedsExplicitVGPRCount) {
155-
149+
if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
156150
Info.NumVGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::VGPR_32RegClass,
157151
/*IncludeCalls=*/false);
158152
return Info;
@@ -163,34 +157,24 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
163157

164158
for (const MachineBasicBlock &MBB : MF) {
165159
for (const MachineInstr &MI : MBB) {
166-
if (NeedsExplicitVGPRCount) {
167-
for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
168-
const MachineOperand &MO = MI.getOperand(I);
169-
170-
if (!MO.isReg())
171-
continue;
172-
Register Reg = MO.getReg();
173-
const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(Reg);
174-
175-
if (!RC || !TRI.isVGPRClass(RC))
176-
continue;
177-
178-
// Skip inactive VGPRs in chain functions with the init.whole.wave
179-
// intrinsic. These will only appear as implicit use operands on the
180-
// chain call, and as the def of an IMPLICIT_DEF. We're going to skip
181-
// implicit defs unconditionally though because if they're important
182-
// in a different context then they will be counted when they are
183-
// used.
184-
bool IsChainCall =
185-
MFI->isChainFunction() && MI.getOpcode() == AMDGPU::SI_TCRETURN;
186-
if (IsChainCall || MI.isImplicitDef())
187-
continue;
188-
189-
unsigned Width = divideCeil(TRI.getRegSizeInBits(*RC), 32);
190-
unsigned HWReg = TRI.getHWRegIndex(Reg);
191-
int MaxUsed = HWReg + Width - 1;
192-
MaxVGPR = std::max(MaxUsed, MaxVGPR);
193-
}
160+
for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
161+
const MachineOperand &MO = MI.getOperand(I);
162+
163+
if (!MO.isReg())
164+
continue;
165+
Register Reg = MO.getReg();
166+
const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(Reg);
167+
168+
if (!RC || !TRI.isVGPRClass(RC))
169+
continue;
170+
171+
if (MI.isCall() || MI.isImplicitDef())
172+
continue;
173+
174+
unsigned Width = divideCeil(TRI.getRegSizeInBits(*RC), 32);
175+
unsigned HWReg = TRI.getHWRegIndex(Reg);
176+
int MaxUsed = HWReg + Width - 1;
177+
MaxVGPR = std::max(MaxUsed, MaxVGPR);
194178
}
195179

196180
if (MI.isCall()) {
@@ -252,10 +236,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
252236
}
253237
}
254238

255-
if (NeedsExplicitVGPRCount)
256-
Info.NumVGPR = MaxVGPR + 1;
257-
else
258-
Info.NumVGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::VGPR_32RegClass, false);
239+
Info.NumVGPR = MaxVGPR + 1;
259240

260241
return Info;
261242
}

llvm/test/CodeGen/AMDGPU/unnamed-function-resource-info.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ entry:
1616
}
1717

1818
; CHECK-LABEL: __unnamed_2:
19-
; CHECK: .set __unnamed_2.num_vgpr, max(32, __unnamed_1.num_vgpr)
19+
; CHECK: .set __unnamed_2.num_vgpr, max(1, __unnamed_1.num_vgpr)
2020
; CHECK: .set __unnamed_2.num_agpr, max(0, __unnamed_1.num_agpr)
2121
; CHECK: .set __unnamed_2.numbered_sgpr, max(34, __unnamed_1.numbered_sgpr)
2222
; CHECK: .set __unnamed_2.private_seg_size, 16+max(__unnamed_1.private_seg_size)

llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,9 +1264,9 @@ define amdgpu_kernel void @k1024_call_no_agprs_ub_callee() #1025 {
12641264
}
12651265

12661266
; GCN-LABEL: {{^}}f1024_0:
1267-
; GFX90A: NumVgprs: 32
1267+
; GFX90A: NumVgprs: 1
12681268
; GFX90A: NumAgprs: 1
1269-
; GFX90A: TotalNumVgprs: 33
1269+
; GFX90A: TotalNumVgprs: 5
12701270
define void @f1024_0() #1024 {
12711271
call void @foo()
12721272
ret void
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
; RUN: llc -mcpu=gfx1200 -o - < %s | FileCheck %s
2+
; Check that reads of a VGPR in kernels counts towards VGPR count, but in functions, only writes of VGPRs count towards VGPR count.
3+
target triple = "amdgcn--amdpal"
4+
5+
@global = addrspace(1) global i32 poison, align 4
6+
7+
; CHECK-LABEL: amdpal.pipelines:
8+
9+
; Shouldn't report the part of %vgpr_args that's not used
10+
; CHECK-LABEL: entry_point_symbol: cs_calling_chain
11+
; CHECK: .vgpr_count: 0xa
12+
define amdgpu_cs void @cs_calling_chain(i32 %vgpr, i32 inreg %sgpr) {
13+
%vgpr_args = insertvalue {i32, i32, i32, i32} poison, i32 %vgpr, 1
14+
call void (ptr, i32, i32, {i32, i32, i32, i32}, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.i32.s(
15+
ptr @chain_func, i32 0, i32 inreg %sgpr, {i32, i32, i32, i32} %vgpr_args, i32 0)
16+
unreachable
17+
}
18+
19+
; Neither uses not writes a VGPR
20+
; CHECK-LABEL: chain_func:
21+
; CHECK: .vgpr_count: 0x1
22+
define amdgpu_cs_chain void @chain_func([32 x i32] %args) {
23+
entry:
24+
call void (ptr, i32, {}, [32 x i32], i32, ...) @llvm.amdgcn.cs.chain.p0.i32.s.a(
25+
ptr @chain_func, i32 0, {} inreg {}, [32 x i32] %args, i32 0)
26+
unreachable
27+
}

llvm/test/CodeGen/AMDGPU/vgpr-count-graphics.ll

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,6 @@ entry:
1616
ret void
1717
}
1818

19-
; Neither uses not writes a VGPR
20-
; CHECK-LABEL: chain_func:
21-
; CHECK: .vgpr_count: 0x28
22-
define amdgpu_cs_chain void @chain_func([32 x i32] %args) {
23-
entry:
24-
call void (ptr, i32, {}, [32 x i32], i32, ...) @llvm.amdgcn.cs.chain.p0.i32.s.a(
25-
ptr @chain_func, i32 0, {} inreg {}, [32 x i32] %args, i32 0)
26-
unreachable
27-
}
28-
2919
; Neither uses not writes a VGPR
3020
; CHECK-LABEL: gfx_func:
3121
; CHECK: .vgpr_count: 0x20

0 commit comments

Comments
 (0)