@@ -137,29 +137,274 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
137137 if (MFI->isStackRealigned ())
138138 Info.PrivateSegmentSize += FrameInfo.getMaxAlign ().value ();
139139
140- Info.UsesVCC = MRI.isPhysRegUsed (AMDGPU::VCC);
141-
142- Info.NumVGPR = TRI.getNumDefinedPhysRegs (MRI, AMDGPU::VGPR_32RegClass);
143- Info.NumExplicitSGPR =
144- TRI.getNumDefinedPhysRegs (MRI, AMDGPU::SGPR_32RegClass);
145- if (ST.hasMAIInsts ())
146- Info.NumAGPR = TRI.getNumDefinedPhysRegs (MRI, AMDGPU::AGPR_32RegClass);
147-
148- // Preloaded registers are written by the hardware, not defined in the
149- // function body, so they need special handling.
150- if (MFI->isEntryFunction ()) {
151- Info.NumExplicitSGPR =
152- std::max<int32_t >(Info.NumExplicitSGPR , MFI->getNumPreloadedSGPRs ());
153- Info.NumVGPR = std::max<int32_t >(Info.NumVGPR , MFI->getNumPreloadedVGPRs ());
154- }
155-
156- if (!FrameInfo.hasCalls () && !FrameInfo.hasTailCall ())
140+ Info.UsesVCC =
141+ MRI.isPhysRegUsed (AMDGPU::VCC_LO) || MRI.isPhysRegUsed (AMDGPU::VCC_HI);
142+
143+ // If there are no calls, MachineRegisterInfo can tell us the used register
144+ // count easily.
145+ // A tail call isn't considered a call for MachineFrameInfo's purposes.
146+ if (!FrameInfo.hasCalls () && !FrameInfo.hasTailCall ()) {
147+ Info.NumVGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::VGPR_32RegClass);
148+ Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::SGPR_32RegClass);
149+ if (ST.hasMAIInsts ())
150+ Info.NumAGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::AGPR_32RegClass);
157151 return Info;
152+ }
158153
154+ int32_t MaxVGPR = -1 ;
155+ int32_t MaxAGPR = -1 ;
156+ int32_t MaxSGPR = -1 ;
159157 Info.CalleeSegmentSize = 0 ;
160158
161159 for (const MachineBasicBlock &MBB : MF) {
162160 for (const MachineInstr &MI : MBB) {
161+ // TODO: Check regmasks? Do they occur anywhere except calls?
162+ for (const MachineOperand &MO : MI.operands ()) {
163+ unsigned Width = 0 ;
164+ bool IsSGPR = false ;
165+ bool IsAGPR = false ;
166+
167+ if (!MO.isReg ())
168+ continue ;
169+
170+ Register Reg = MO.getReg ();
171+ switch (Reg) {
172+ case AMDGPU::EXEC:
173+ case AMDGPU::EXEC_LO:
174+ case AMDGPU::EXEC_HI:
175+ case AMDGPU::SCC:
176+ case AMDGPU::M0:
177+ case AMDGPU::M0_LO16:
178+ case AMDGPU::M0_HI16:
179+ case AMDGPU::SRC_SHARED_BASE_LO:
180+ case AMDGPU::SRC_SHARED_BASE:
181+ case AMDGPU::SRC_SHARED_LIMIT_LO:
182+ case AMDGPU::SRC_SHARED_LIMIT:
183+ case AMDGPU::SRC_PRIVATE_BASE_LO:
184+ case AMDGPU::SRC_PRIVATE_BASE:
185+ case AMDGPU::SRC_PRIVATE_LIMIT_LO:
186+ case AMDGPU::SRC_PRIVATE_LIMIT:
187+ case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
188+ case AMDGPU::SGPR_NULL:
189+ case AMDGPU::SGPR_NULL64:
190+ case AMDGPU::MODE:
191+ continue ;
192+
193+ case AMDGPU::NoRegister:
194+ assert (MI.isDebugInstr () &&
195+ " Instruction uses invalid noreg register" );
196+ continue ;
197+
198+ case AMDGPU::VCC:
199+ case AMDGPU::VCC_LO:
200+ case AMDGPU::VCC_HI:
201+ case AMDGPU::VCC_LO_LO16:
202+ case AMDGPU::VCC_LO_HI16:
203+ case AMDGPU::VCC_HI_LO16:
204+ case AMDGPU::VCC_HI_HI16:
205+ Info.UsesVCC = true ;
206+ continue ;
207+
208+ case AMDGPU::FLAT_SCR:
209+ case AMDGPU::FLAT_SCR_LO:
210+ case AMDGPU::FLAT_SCR_HI:
211+ continue ;
212+
213+ case AMDGPU::XNACK_MASK:
214+ case AMDGPU::XNACK_MASK_LO:
215+ case AMDGPU::XNACK_MASK_HI:
216+ llvm_unreachable (" xnack_mask registers should not be used" );
217+
218+ case AMDGPU::LDS_DIRECT:
219+ llvm_unreachable (" lds_direct register should not be used" );
220+
221+ case AMDGPU::TBA:
222+ case AMDGPU::TBA_LO:
223+ case AMDGPU::TBA_HI:
224+ case AMDGPU::TMA:
225+ case AMDGPU::TMA_LO:
226+ case AMDGPU::TMA_HI:
227+ llvm_unreachable (" trap handler registers should not be used" );
228+
229+ case AMDGPU::SRC_VCCZ:
230+ llvm_unreachable (" src_vccz register should not be used" );
231+
232+ case AMDGPU::SRC_EXECZ:
233+ llvm_unreachable (" src_execz register should not be used" );
234+
235+ case AMDGPU::SRC_SCC:
236+ llvm_unreachable (" src_scc register should not be used" );
237+
238+ default :
239+ break ;
240+ }
241+
242+ if (AMDGPU::SGPR_32RegClass.contains (Reg) ||
243+ AMDGPU::SGPR_LO16RegClass.contains (Reg) ||
244+ AMDGPU::SGPR_HI16RegClass.contains (Reg)) {
245+ IsSGPR = true ;
246+ Width = 1 ;
247+ } else if (AMDGPU::VGPR_32RegClass.contains (Reg) ||
248+ AMDGPU::VGPR_16RegClass.contains (Reg)) {
249+ IsSGPR = false ;
250+ Width = 1 ;
251+ } else if (AMDGPU::AGPR_32RegClass.contains (Reg) ||
252+ AMDGPU::AGPR_LO16RegClass.contains (Reg)) {
253+ IsSGPR = false ;
254+ IsAGPR = true ;
255+ Width = 1 ;
256+ } else if (AMDGPU::SGPR_64RegClass.contains (Reg)) {
257+ IsSGPR = true ;
258+ Width = 2 ;
259+ } else if (AMDGPU::VReg_64RegClass.contains (Reg)) {
260+ IsSGPR = false ;
261+ Width = 2 ;
262+ } else if (AMDGPU::AReg_64RegClass.contains (Reg)) {
263+ IsSGPR = false ;
264+ IsAGPR = true ;
265+ Width = 2 ;
266+ } else if (AMDGPU::VReg_96RegClass.contains (Reg)) {
267+ IsSGPR = false ;
268+ Width = 3 ;
269+ } else if (AMDGPU::SReg_96RegClass.contains (Reg)) {
270+ IsSGPR = true ;
271+ Width = 3 ;
272+ } else if (AMDGPU::AReg_96RegClass.contains (Reg)) {
273+ IsSGPR = false ;
274+ IsAGPR = true ;
275+ Width = 3 ;
276+ } else if (AMDGPU::SGPR_128RegClass.contains (Reg)) {
277+ IsSGPR = true ;
278+ Width = 4 ;
279+ } else if (AMDGPU::VReg_128RegClass.contains (Reg)) {
280+ IsSGPR = false ;
281+ Width = 4 ;
282+ } else if (AMDGPU::AReg_128RegClass.contains (Reg)) {
283+ IsSGPR = false ;
284+ IsAGPR = true ;
285+ Width = 4 ;
286+ } else if (AMDGPU::VReg_160RegClass.contains (Reg)) {
287+ IsSGPR = false ;
288+ Width = 5 ;
289+ } else if (AMDGPU::SReg_160RegClass.contains (Reg)) {
290+ IsSGPR = true ;
291+ Width = 5 ;
292+ } else if (AMDGPU::AReg_160RegClass.contains (Reg)) {
293+ IsSGPR = false ;
294+ IsAGPR = true ;
295+ Width = 5 ;
296+ } else if (AMDGPU::VReg_192RegClass.contains (Reg)) {
297+ IsSGPR = false ;
298+ Width = 6 ;
299+ } else if (AMDGPU::SReg_192RegClass.contains (Reg)) {
300+ IsSGPR = true ;
301+ Width = 6 ;
302+ } else if (AMDGPU::AReg_192RegClass.contains (Reg)) {
303+ IsSGPR = false ;
304+ IsAGPR = true ;
305+ Width = 6 ;
306+ } else if (AMDGPU::VReg_224RegClass.contains (Reg)) {
307+ IsSGPR = false ;
308+ Width = 7 ;
309+ } else if (AMDGPU::SReg_224RegClass.contains (Reg)) {
310+ IsSGPR = true ;
311+ Width = 7 ;
312+ } else if (AMDGPU::AReg_224RegClass.contains (Reg)) {
313+ IsSGPR = false ;
314+ IsAGPR = true ;
315+ Width = 7 ;
316+ } else if (AMDGPU::SReg_256RegClass.contains (Reg)) {
317+ IsSGPR = true ;
318+ Width = 8 ;
319+ } else if (AMDGPU::VReg_256RegClass.contains (Reg)) {
320+ IsSGPR = false ;
321+ Width = 8 ;
322+ } else if (AMDGPU::AReg_256RegClass.contains (Reg)) {
323+ IsSGPR = false ;
324+ IsAGPR = true ;
325+ Width = 8 ;
326+ } else if (AMDGPU::VReg_288RegClass.contains (Reg)) {
327+ IsSGPR = false ;
328+ Width = 9 ;
329+ } else if (AMDGPU::SReg_288RegClass.contains (Reg)) {
330+ IsSGPR = true ;
331+ Width = 9 ;
332+ } else if (AMDGPU::AReg_288RegClass.contains (Reg)) {
333+ IsSGPR = false ;
334+ IsAGPR = true ;
335+ Width = 9 ;
336+ } else if (AMDGPU::VReg_320RegClass.contains (Reg)) {
337+ IsSGPR = false ;
338+ Width = 10 ;
339+ } else if (AMDGPU::SReg_320RegClass.contains (Reg)) {
340+ IsSGPR = true ;
341+ Width = 10 ;
342+ } else if (AMDGPU::AReg_320RegClass.contains (Reg)) {
343+ IsSGPR = false ;
344+ IsAGPR = true ;
345+ Width = 10 ;
346+ } else if (AMDGPU::VReg_352RegClass.contains (Reg)) {
347+ IsSGPR = false ;
348+ Width = 11 ;
349+ } else if (AMDGPU::SReg_352RegClass.contains (Reg)) {
350+ IsSGPR = true ;
351+ Width = 11 ;
352+ } else if (AMDGPU::AReg_352RegClass.contains (Reg)) {
353+ IsSGPR = false ;
354+ IsAGPR = true ;
355+ Width = 11 ;
356+ } else if (AMDGPU::VReg_384RegClass.contains (Reg)) {
357+ IsSGPR = false ;
358+ Width = 12 ;
359+ } else if (AMDGPU::SReg_384RegClass.contains (Reg)) {
360+ IsSGPR = true ;
361+ Width = 12 ;
362+ } else if (AMDGPU::AReg_384RegClass.contains (Reg)) {
363+ IsSGPR = false ;
364+ IsAGPR = true ;
365+ Width = 12 ;
366+ } else if (AMDGPU::SReg_512RegClass.contains (Reg)) {
367+ IsSGPR = true ;
368+ Width = 16 ;
369+ } else if (AMDGPU::VReg_512RegClass.contains (Reg)) {
370+ IsSGPR = false ;
371+ Width = 16 ;
372+ } else if (AMDGPU::AReg_512RegClass.contains (Reg)) {
373+ IsSGPR = false ;
374+ IsAGPR = true ;
375+ Width = 16 ;
376+ } else if (AMDGPU::SReg_1024RegClass.contains (Reg)) {
377+ IsSGPR = true ;
378+ Width = 32 ;
379+ } else if (AMDGPU::VReg_1024RegClass.contains (Reg)) {
380+ IsSGPR = false ;
381+ Width = 32 ;
382+ } else if (AMDGPU::AReg_1024RegClass.contains (Reg)) {
383+ IsSGPR = false ;
384+ IsAGPR = true ;
385+ Width = 32 ;
386+ } else {
387+ // We only expect TTMP registers or registers that do not belong to
388+ // any RC.
389+ assert ((AMDGPU::TTMP_32RegClass.contains (Reg) ||
390+ AMDGPU::TTMP_64RegClass.contains (Reg) ||
391+ AMDGPU::TTMP_128RegClass.contains (Reg) ||
392+ AMDGPU::TTMP_256RegClass.contains (Reg) ||
393+ AMDGPU::TTMP_512RegClass.contains (Reg) ||
394+ !TRI.getPhysRegBaseClass (Reg)) &&
395+ " Unknown register class" );
396+ }
397+ unsigned HWReg = TRI.getHWRegIndex (Reg);
398+ int MaxUsed = HWReg + Width - 1 ;
399+ if (IsSGPR) {
400+ MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
401+ } else if (IsAGPR) {
402+ MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
403+ } else {
404+ MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
405+ }
406+ }
407+
163408 if (MI.isCall ()) {
164409 // Pseudo used just to encode the underlying global. Is there a better
165410 // way to track this?
@@ -219,5 +464,9 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
219464 }
220465 }
221466
467+ Info.NumExplicitSGPR = MaxSGPR + 1 ;
468+ Info.NumVGPR = MaxVGPR + 1 ;
469+ Info.NumAGPR = MaxAGPR + 1 ;
470+
222471 return Info;
223472}
0 commit comments