@@ -146,13 +146,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
146146 /* IncludeCalls=*/ false );
147147
148148 // A tail call isn't considered a call for MachineFrameInfo's purposes.
149- bool HasCalls = FrameInfo.hasCalls () || FrameInfo.hasTailCall ();
150- // Functions that use the llvm.amdgcn.init.whole.wave intrinsic often have
151- // VGPR arguments that are only added for the purpose of preserving the
152- // inactive lanes. These should not be included in the number of used VGPRs.
153- bool NeedsExplicitVGPRCount = MFI->hasInitWholeWave ();
154- if (!HasCalls && !NeedsExplicitVGPRCount) {
155-
149+ if (!FrameInfo.hasCalls () && !FrameInfo.hasTailCall ()) {
156150 Info.NumVGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::VGPR_32RegClass,
157151 /* IncludeCalls=*/ false );
158152 return Info;
@@ -163,34 +157,24 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
163157
164158 for (const MachineBasicBlock &MBB : MF) {
165159 for (const MachineInstr &MI : MBB) {
166- if (NeedsExplicitVGPRCount) {
167- for (unsigned I = 0 ; I < MI.getNumOperands (); ++I) {
168- const MachineOperand &MO = MI.getOperand (I);
169-
170- if (!MO.isReg ())
171- continue ;
172- Register Reg = MO.getReg ();
173- const TargetRegisterClass *RC = TRI.getPhysRegBaseClass (Reg);
174-
175- if (!RC || !TRI.isVGPRClass (RC))
176- continue ;
177-
178- // Skip inactive VGPRs in chain functions with the init.whole.wave
179- // intrinsic. These will only appear as implicit use operands on the
180- // chain call, and as the def of an IMPLICIT_DEF. We're going to skip
181- // implicit defs unconditionally though because if they're important
182- // in a different context then they will be counted when they are
183- // used.
184- bool IsChainCall =
185- MFI->isChainFunction () && MI.getOpcode () == AMDGPU::SI_TCRETURN;
186- if (IsChainCall || MI.isImplicitDef ())
187- continue ;
188-
189- unsigned Width = divideCeil (TRI.getRegSizeInBits (*RC), 32 );
190- unsigned HWReg = TRI.getHWRegIndex (Reg);
191- int MaxUsed = HWReg + Width - 1 ;
192- MaxVGPR = std::max (MaxUsed, MaxVGPR);
193- }
160+ for (unsigned I = 0 ; I < MI.getNumOperands (); ++I) {
161+ const MachineOperand &MO = MI.getOperand (I);
162+
163+ if (!MO.isReg ())
164+ continue ;
165+ Register Reg = MO.getReg ();
166+ const TargetRegisterClass *RC = TRI.getPhysRegBaseClass (Reg);
167+
168+ if (!RC || !TRI.isVGPRClass (RC))
169+ continue ;
170+
171+ if (MI.isCall () || MI.isImplicitDef ())
172+ continue ;
173+
174+ unsigned Width = divideCeil (TRI.getRegSizeInBits (*RC), 32 );
175+ unsigned HWReg = TRI.getHWRegIndex (Reg);
176+ int MaxUsed = HWReg + Width - 1 ;
177+ MaxVGPR = std::max (MaxUsed, MaxVGPR);
194178 }
195179
196180 if (MI.isCall ()) {
@@ -252,10 +236,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
252236 }
253237 }
254238
255- if (NeedsExplicitVGPRCount)
256- Info.NumVGPR = MaxVGPR + 1 ;
257- else
258- Info.NumVGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::VGPR_32RegClass, false );
239+ Info.NumVGPR = MaxVGPR + 1 ;
259240
260241 return Info;
261242}
0 commit comments