@@ -137,274 +137,29 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
137137 if (MFI->isStackRealigned ())
138138 Info.PrivateSegmentSize += FrameInfo.getMaxAlign ().value ();
139139
140- Info.UsesVCC =
141- MRI.isPhysRegUsed (AMDGPU::VCC_LO) || MRI.isPhysRegUsed (AMDGPU::VCC_HI);
142-
143- // If there are no calls, MachineRegisterInfo can tell us the used register
144- // count easily.
145- // A tail call isn't considered a call for MachineFrameInfo's purposes.
146- if (!FrameInfo.hasCalls () && !FrameInfo.hasTailCall ()) {
147- Info.NumVGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::VGPR_32RegClass);
148- Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::SGPR_32RegClass);
149- if (ST.hasMAIInsts ())
150- Info.NumAGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::AGPR_32RegClass);
151- return Info;
140+ Info.UsesVCC = MRI.isPhysRegUsed (AMDGPU::VCC);
141+
142+ Info.NumVGPR = TRI.getNumDefinedPhysRegs (MRI, AMDGPU::VGPR_32RegClass);
143+ Info.NumExplicitSGPR =
144+ TRI.getNumDefinedPhysRegs (MRI, AMDGPU::SGPR_32RegClass);
145+ if (ST.hasMAIInsts ())
146+ Info.NumAGPR = TRI.getNumDefinedPhysRegs (MRI, AMDGPU::AGPR_32RegClass);
147+
148+ // Preloaded registers are written by the hardware, not defined in the
149+ // function body, so they need special handling.
150+ if (MFI->isEntryFunction ()) {
151+ Info.NumExplicitSGPR =
152+ std::max<int32_t >(Info.NumExplicitSGPR , MFI->getNumPreloadedSGPRs ());
153+ Info.NumVGPR = std::max<int32_t >(Info.NumVGPR , MFI->getNumPreloadedVGPRs ());
152154 }
153155
154- int32_t MaxVGPR = - 1 ;
155- int32_t MaxAGPR = - 1 ;
156- int32_t MaxSGPR = - 1 ;
156+ if (!FrameInfo. hasCalls () && !FrameInfo. hasTailCall ())
157+ return Info ;
158+
157159 Info.CalleeSegmentSize = 0 ;
158160
159161 for (const MachineBasicBlock &MBB : MF) {
160162 for (const MachineInstr &MI : MBB) {
161- // TODO: Check regmasks? Do they occur anywhere except calls?
162- for (const MachineOperand &MO : MI.operands ()) {
163- unsigned Width = 0 ;
164- bool IsSGPR = false ;
165- bool IsAGPR = false ;
166-
167- if (!MO.isReg ())
168- continue ;
169-
170- Register Reg = MO.getReg ();
171- switch (Reg) {
172- case AMDGPU::EXEC:
173- case AMDGPU::EXEC_LO:
174- case AMDGPU::EXEC_HI:
175- case AMDGPU::SCC:
176- case AMDGPU::M0:
177- case AMDGPU::M0_LO16:
178- case AMDGPU::M0_HI16:
179- case AMDGPU::SRC_SHARED_BASE_LO:
180- case AMDGPU::SRC_SHARED_BASE:
181- case AMDGPU::SRC_SHARED_LIMIT_LO:
182- case AMDGPU::SRC_SHARED_LIMIT:
183- case AMDGPU::SRC_PRIVATE_BASE_LO:
184- case AMDGPU::SRC_PRIVATE_BASE:
185- case AMDGPU::SRC_PRIVATE_LIMIT_LO:
186- case AMDGPU::SRC_PRIVATE_LIMIT:
187- case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
188- case AMDGPU::SGPR_NULL:
189- case AMDGPU::SGPR_NULL64:
190- case AMDGPU::MODE:
191- continue ;
192-
193- case AMDGPU::NoRegister:
194- assert (MI.isDebugInstr () &&
195- " Instruction uses invalid noreg register" );
196- continue ;
197-
198- case AMDGPU::VCC:
199- case AMDGPU::VCC_LO:
200- case AMDGPU::VCC_HI:
201- case AMDGPU::VCC_LO_LO16:
202- case AMDGPU::VCC_LO_HI16:
203- case AMDGPU::VCC_HI_LO16:
204- case AMDGPU::VCC_HI_HI16:
205- Info.UsesVCC = true ;
206- continue ;
207-
208- case AMDGPU::FLAT_SCR:
209- case AMDGPU::FLAT_SCR_LO:
210- case AMDGPU::FLAT_SCR_HI:
211- continue ;
212-
213- case AMDGPU::XNACK_MASK:
214- case AMDGPU::XNACK_MASK_LO:
215- case AMDGPU::XNACK_MASK_HI:
216- llvm_unreachable (" xnack_mask registers should not be used" );
217-
218- case AMDGPU::LDS_DIRECT:
219- llvm_unreachable (" lds_direct register should not be used" );
220-
221- case AMDGPU::TBA:
222- case AMDGPU::TBA_LO:
223- case AMDGPU::TBA_HI:
224- case AMDGPU::TMA:
225- case AMDGPU::TMA_LO:
226- case AMDGPU::TMA_HI:
227- llvm_unreachable (" trap handler registers should not be used" );
228-
229- case AMDGPU::SRC_VCCZ:
230- llvm_unreachable (" src_vccz register should not be used" );
231-
232- case AMDGPU::SRC_EXECZ:
233- llvm_unreachable (" src_execz register should not be used" );
234-
235- case AMDGPU::SRC_SCC:
236- llvm_unreachable (" src_scc register should not be used" );
237-
238- default :
239- break ;
240- }
241-
242- if (AMDGPU::SGPR_32RegClass.contains (Reg) ||
243- AMDGPU::SGPR_LO16RegClass.contains (Reg) ||
244- AMDGPU::SGPR_HI16RegClass.contains (Reg)) {
245- IsSGPR = true ;
246- Width = 1 ;
247- } else if (AMDGPU::VGPR_32RegClass.contains (Reg) ||
248- AMDGPU::VGPR_16RegClass.contains (Reg)) {
249- IsSGPR = false ;
250- Width = 1 ;
251- } else if (AMDGPU::AGPR_32RegClass.contains (Reg) ||
252- AMDGPU::AGPR_LO16RegClass.contains (Reg)) {
253- IsSGPR = false ;
254- IsAGPR = true ;
255- Width = 1 ;
256- } else if (AMDGPU::SGPR_64RegClass.contains (Reg)) {
257- IsSGPR = true ;
258- Width = 2 ;
259- } else if (AMDGPU::VReg_64RegClass.contains (Reg)) {
260- IsSGPR = false ;
261- Width = 2 ;
262- } else if (AMDGPU::AReg_64RegClass.contains (Reg)) {
263- IsSGPR = false ;
264- IsAGPR = true ;
265- Width = 2 ;
266- } else if (AMDGPU::VReg_96RegClass.contains (Reg)) {
267- IsSGPR = false ;
268- Width = 3 ;
269- } else if (AMDGPU::SReg_96RegClass.contains (Reg)) {
270- IsSGPR = true ;
271- Width = 3 ;
272- } else if (AMDGPU::AReg_96RegClass.contains (Reg)) {
273- IsSGPR = false ;
274- IsAGPR = true ;
275- Width = 3 ;
276- } else if (AMDGPU::SGPR_128RegClass.contains (Reg)) {
277- IsSGPR = true ;
278- Width = 4 ;
279- } else if (AMDGPU::VReg_128RegClass.contains (Reg)) {
280- IsSGPR = false ;
281- Width = 4 ;
282- } else if (AMDGPU::AReg_128RegClass.contains (Reg)) {
283- IsSGPR = false ;
284- IsAGPR = true ;
285- Width = 4 ;
286- } else if (AMDGPU::VReg_160RegClass.contains (Reg)) {
287- IsSGPR = false ;
288- Width = 5 ;
289- } else if (AMDGPU::SReg_160RegClass.contains (Reg)) {
290- IsSGPR = true ;
291- Width = 5 ;
292- } else if (AMDGPU::AReg_160RegClass.contains (Reg)) {
293- IsSGPR = false ;
294- IsAGPR = true ;
295- Width = 5 ;
296- } else if (AMDGPU::VReg_192RegClass.contains (Reg)) {
297- IsSGPR = false ;
298- Width = 6 ;
299- } else if (AMDGPU::SReg_192RegClass.contains (Reg)) {
300- IsSGPR = true ;
301- Width = 6 ;
302- } else if (AMDGPU::AReg_192RegClass.contains (Reg)) {
303- IsSGPR = false ;
304- IsAGPR = true ;
305- Width = 6 ;
306- } else if (AMDGPU::VReg_224RegClass.contains (Reg)) {
307- IsSGPR = false ;
308- Width = 7 ;
309- } else if (AMDGPU::SReg_224RegClass.contains (Reg)) {
310- IsSGPR = true ;
311- Width = 7 ;
312- } else if (AMDGPU::AReg_224RegClass.contains (Reg)) {
313- IsSGPR = false ;
314- IsAGPR = true ;
315- Width = 7 ;
316- } else if (AMDGPU::SReg_256RegClass.contains (Reg)) {
317- IsSGPR = true ;
318- Width = 8 ;
319- } else if (AMDGPU::VReg_256RegClass.contains (Reg)) {
320- IsSGPR = false ;
321- Width = 8 ;
322- } else if (AMDGPU::AReg_256RegClass.contains (Reg)) {
323- IsSGPR = false ;
324- IsAGPR = true ;
325- Width = 8 ;
326- } else if (AMDGPU::VReg_288RegClass.contains (Reg)) {
327- IsSGPR = false ;
328- Width = 9 ;
329- } else if (AMDGPU::SReg_288RegClass.contains (Reg)) {
330- IsSGPR = true ;
331- Width = 9 ;
332- } else if (AMDGPU::AReg_288RegClass.contains (Reg)) {
333- IsSGPR = false ;
334- IsAGPR = true ;
335- Width = 9 ;
336- } else if (AMDGPU::VReg_320RegClass.contains (Reg)) {
337- IsSGPR = false ;
338- Width = 10 ;
339- } else if (AMDGPU::SReg_320RegClass.contains (Reg)) {
340- IsSGPR = true ;
341- Width = 10 ;
342- } else if (AMDGPU::AReg_320RegClass.contains (Reg)) {
343- IsSGPR = false ;
344- IsAGPR = true ;
345- Width = 10 ;
346- } else if (AMDGPU::VReg_352RegClass.contains (Reg)) {
347- IsSGPR = false ;
348- Width = 11 ;
349- } else if (AMDGPU::SReg_352RegClass.contains (Reg)) {
350- IsSGPR = true ;
351- Width = 11 ;
352- } else if (AMDGPU::AReg_352RegClass.contains (Reg)) {
353- IsSGPR = false ;
354- IsAGPR = true ;
355- Width = 11 ;
356- } else if (AMDGPU::VReg_384RegClass.contains (Reg)) {
357- IsSGPR = false ;
358- Width = 12 ;
359- } else if (AMDGPU::SReg_384RegClass.contains (Reg)) {
360- IsSGPR = true ;
361- Width = 12 ;
362- } else if (AMDGPU::AReg_384RegClass.contains (Reg)) {
363- IsSGPR = false ;
364- IsAGPR = true ;
365- Width = 12 ;
366- } else if (AMDGPU::SReg_512RegClass.contains (Reg)) {
367- IsSGPR = true ;
368- Width = 16 ;
369- } else if (AMDGPU::VReg_512RegClass.contains (Reg)) {
370- IsSGPR = false ;
371- Width = 16 ;
372- } else if (AMDGPU::AReg_512RegClass.contains (Reg)) {
373- IsSGPR = false ;
374- IsAGPR = true ;
375- Width = 16 ;
376- } else if (AMDGPU::SReg_1024RegClass.contains (Reg)) {
377- IsSGPR = true ;
378- Width = 32 ;
379- } else if (AMDGPU::VReg_1024RegClass.contains (Reg)) {
380- IsSGPR = false ;
381- Width = 32 ;
382- } else if (AMDGPU::AReg_1024RegClass.contains (Reg)) {
383- IsSGPR = false ;
384- IsAGPR = true ;
385- Width = 32 ;
386- } else {
387- // We only expect TTMP registers or registers that do not belong to
388- // any RC.
389- assert ((AMDGPU::TTMP_32RegClass.contains (Reg) ||
390- AMDGPU::TTMP_64RegClass.contains (Reg) ||
391- AMDGPU::TTMP_128RegClass.contains (Reg) ||
392- AMDGPU::TTMP_256RegClass.contains (Reg) ||
393- AMDGPU::TTMP_512RegClass.contains (Reg) ||
394- !TRI.getPhysRegBaseClass (Reg)) &&
395- " Unknown register class" );
396- }
397- unsigned HWReg = TRI.getHWRegIndex (Reg);
398- int MaxUsed = HWReg + Width - 1 ;
399- if (IsSGPR) {
400- MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
401- } else if (IsAGPR) {
402- MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
403- } else {
404- MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
405- }
406- }
407-
408163 if (MI.isCall ()) {
409164 // Pseudo used just to encode the underlying global. Is there a better
410165 // way to track this?
@@ -464,9 +219,5 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
464219 }
465220 }
466221
467- Info.NumExplicitSGPR = MaxSGPR + 1 ;
468- Info.NumVGPR = MaxVGPR + 1 ;
469- Info.NumAGPR = MaxAGPR + 1 ;
470-
471222 return Info;
472223}
0 commit comments