@@ -139,77 +139,39 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
139139
140140 Info.UsesVCC =
141141 MRI.isPhysRegUsed (AMDGPU::VCC_LO) || MRI.isPhysRegUsed (AMDGPU::VCC_HI);
142+ Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::SGPR_32RegClass,
143+ /* IncludeCalls=*/ false );
144+ if (ST.hasMAIInsts ())
145+ Info.NumAGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::AGPR_32RegClass,
146+ /* IncludeCalls=*/ false );
142147
143148 // If there are no calls, MachineRegisterInfo can tell us the used register
144149 // count easily.
145150 // A tail call isn't considered a call for MachineFrameInfo's purposes.
146151 if (!FrameInfo.hasCalls () && !FrameInfo.hasTailCall ()) {
147- Info.NumVGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::VGPR_32RegClass);
148- Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::SGPR_32RegClass);
149- if (ST.hasMAIInsts ())
150- Info.NumAGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::AGPR_32RegClass);
152+ Info.NumVGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::VGPR_32RegClass,
153+ /* IncludeCalls=*/ false );
151154 return Info;
152155 }
153156
154157 int32_t MaxVGPR = -1 ;
155- int32_t MaxAGPR = -1 ;
156- int32_t MaxSGPR = -1 ;
157158 Info.CalleeSegmentSize = 0 ;
158159
159160 for (const MachineBasicBlock &MBB : MF) {
160161 for (const MachineInstr &MI : MBB) {
161- // TODO: Check regmasks? Do they occur anywhere except calls?
162- for (const MachineOperand &MO : MI.operands ()) {
163- unsigned Width = 0 ;
164- bool IsSGPR = false ;
165- bool IsAGPR = false ;
162+ for (unsigned I = 0 ; I < MI.getNumOperands (); ++I) {
163+ const MachineOperand &MO = MI.getOperand (I);
166164
167165 if (!MO.isReg ())
168166 continue ;
169167
170168 Register Reg = MO.getReg ();
171169 switch (Reg) {
172- case AMDGPU::EXEC:
173- case AMDGPU::EXEC_LO:
174- case AMDGPU::EXEC_HI:
175- case AMDGPU::SCC:
176- case AMDGPU::M0:
177- case AMDGPU::M0_LO16:
178- case AMDGPU::M0_HI16:
179- case AMDGPU::SRC_SHARED_BASE_LO:
180- case AMDGPU::SRC_SHARED_BASE:
181- case AMDGPU::SRC_SHARED_LIMIT_LO:
182- case AMDGPU::SRC_SHARED_LIMIT:
183- case AMDGPU::SRC_PRIVATE_BASE_LO:
184- case AMDGPU::SRC_PRIVATE_BASE:
185- case AMDGPU::SRC_PRIVATE_LIMIT_LO:
186- case AMDGPU::SRC_PRIVATE_LIMIT:
187- case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
188- case AMDGPU::SGPR_NULL:
189- case AMDGPU::SGPR_NULL64:
190- case AMDGPU::MODE:
191- continue ;
192-
193170 case AMDGPU::NoRegister:
194171 assert (MI.isDebugInstr () &&
195172 " Instruction uses invalid noreg register" );
196173 continue ;
197174
198- case AMDGPU::VCC:
199- case AMDGPU::VCC_LO:
200- case AMDGPU::VCC_HI:
201- case AMDGPU::VCC_LO_LO16:
202- case AMDGPU::VCC_LO_HI16:
203- case AMDGPU::VCC_HI_LO16:
204- case AMDGPU::VCC_HI_HI16:
205- Info.UsesVCC = true ;
206- continue ;
207-
208- case AMDGPU::FLAT_SCR:
209- case AMDGPU::FLAT_SCR_LO:
210- case AMDGPU::FLAT_SCR_HI:
211- continue ;
212-
213175 case AMDGPU::XNACK_MASK:
214176 case AMDGPU::XNACK_MASK_LO:
215177 case AMDGPU::XNACK_MASK_HI:
@@ -239,170 +201,22 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
239201 break ;
240202 }
241203
242- if (AMDGPU::SGPR_32RegClass.contains (Reg) ||
243- AMDGPU::SGPR_LO16RegClass.contains (Reg) ||
244- AMDGPU::SGPR_HI16RegClass.contains (Reg)) {
245- IsSGPR = true ;
246- Width = 1 ;
247- } else if (AMDGPU::VGPR_32RegClass.contains (Reg) ||
248- AMDGPU::VGPR_16RegClass.contains (Reg)) {
249- IsSGPR = false ;
250- Width = 1 ;
251- } else if (AMDGPU::AGPR_32RegClass.contains (Reg) ||
252- AMDGPU::AGPR_LO16RegClass.contains (Reg)) {
253- IsSGPR = false ;
254- IsAGPR = true ;
255- Width = 1 ;
256- } else if (AMDGPU::SGPR_64RegClass.contains (Reg)) {
257- IsSGPR = true ;
258- Width = 2 ;
259- } else if (AMDGPU::VReg_64RegClass.contains (Reg)) {
260- IsSGPR = false ;
261- Width = 2 ;
262- } else if (AMDGPU::AReg_64RegClass.contains (Reg)) {
263- IsSGPR = false ;
264- IsAGPR = true ;
265- Width = 2 ;
266- } else if (AMDGPU::VReg_96RegClass.contains (Reg)) {
267- IsSGPR = false ;
268- Width = 3 ;
269- } else if (AMDGPU::SReg_96RegClass.contains (Reg)) {
270- IsSGPR = true ;
271- Width = 3 ;
272- } else if (AMDGPU::AReg_96RegClass.contains (Reg)) {
273- IsSGPR = false ;
274- IsAGPR = true ;
275- Width = 3 ;
276- } else if (AMDGPU::SGPR_128RegClass.contains (Reg)) {
277- IsSGPR = true ;
278- Width = 4 ;
279- } else if (AMDGPU::VReg_128RegClass.contains (Reg)) {
280- IsSGPR = false ;
281- Width = 4 ;
282- } else if (AMDGPU::AReg_128RegClass.contains (Reg)) {
283- IsSGPR = false ;
284- IsAGPR = true ;
285- Width = 4 ;
286- } else if (AMDGPU::VReg_160RegClass.contains (Reg)) {
287- IsSGPR = false ;
288- Width = 5 ;
289- } else if (AMDGPU::SReg_160RegClass.contains (Reg)) {
290- IsSGPR = true ;
291- Width = 5 ;
292- } else if (AMDGPU::AReg_160RegClass.contains (Reg)) {
293- IsSGPR = false ;
294- IsAGPR = true ;
295- Width = 5 ;
296- } else if (AMDGPU::VReg_192RegClass.contains (Reg)) {
297- IsSGPR = false ;
298- Width = 6 ;
299- } else if (AMDGPU::SReg_192RegClass.contains (Reg)) {
300- IsSGPR = true ;
301- Width = 6 ;
302- } else if (AMDGPU::AReg_192RegClass.contains (Reg)) {
303- IsSGPR = false ;
304- IsAGPR = true ;
305- Width = 6 ;
306- } else if (AMDGPU::VReg_224RegClass.contains (Reg)) {
307- IsSGPR = false ;
308- Width = 7 ;
309- } else if (AMDGPU::SReg_224RegClass.contains (Reg)) {
310- IsSGPR = true ;
311- Width = 7 ;
312- } else if (AMDGPU::AReg_224RegClass.contains (Reg)) {
313- IsSGPR = false ;
314- IsAGPR = true ;
315- Width = 7 ;
316- } else if (AMDGPU::SReg_256RegClass.contains (Reg)) {
317- IsSGPR = true ;
318- Width = 8 ;
319- } else if (AMDGPU::VReg_256RegClass.contains (Reg)) {
320- IsSGPR = false ;
321- Width = 8 ;
322- } else if (AMDGPU::AReg_256RegClass.contains (Reg)) {
323- IsSGPR = false ;
324- IsAGPR = true ;
325- Width = 8 ;
326- } else if (AMDGPU::VReg_288RegClass.contains (Reg)) {
327- IsSGPR = false ;
328- Width = 9 ;
329- } else if (AMDGPU::SReg_288RegClass.contains (Reg)) {
330- IsSGPR = true ;
331- Width = 9 ;
332- } else if (AMDGPU::AReg_288RegClass.contains (Reg)) {
333- IsSGPR = false ;
334- IsAGPR = true ;
335- Width = 9 ;
336- } else if (AMDGPU::VReg_320RegClass.contains (Reg)) {
337- IsSGPR = false ;
338- Width = 10 ;
339- } else if (AMDGPU::SReg_320RegClass.contains (Reg)) {
340- IsSGPR = true ;
341- Width = 10 ;
342- } else if (AMDGPU::AReg_320RegClass.contains (Reg)) {
343- IsSGPR = false ;
344- IsAGPR = true ;
345- Width = 10 ;
346- } else if (AMDGPU::VReg_352RegClass.contains (Reg)) {
347- IsSGPR = false ;
348- Width = 11 ;
349- } else if (AMDGPU::SReg_352RegClass.contains (Reg)) {
350- IsSGPR = true ;
351- Width = 11 ;
352- } else if (AMDGPU::AReg_352RegClass.contains (Reg)) {
353- IsSGPR = false ;
354- IsAGPR = true ;
355- Width = 11 ;
356- } else if (AMDGPU::VReg_384RegClass.contains (Reg)) {
357- IsSGPR = false ;
358- Width = 12 ;
359- } else if (AMDGPU::SReg_384RegClass.contains (Reg)) {
360- IsSGPR = true ;
361- Width = 12 ;
362- } else if (AMDGPU::AReg_384RegClass.contains (Reg)) {
363- IsSGPR = false ;
364- IsAGPR = true ;
365- Width = 12 ;
366- } else if (AMDGPU::SReg_512RegClass.contains (Reg)) {
367- IsSGPR = true ;
368- Width = 16 ;
369- } else if (AMDGPU::VReg_512RegClass.contains (Reg)) {
370- IsSGPR = false ;
371- Width = 16 ;
372- } else if (AMDGPU::AReg_512RegClass.contains (Reg)) {
373- IsSGPR = false ;
374- IsAGPR = true ;
375- Width = 16 ;
376- } else if (AMDGPU::SReg_1024RegClass.contains (Reg)) {
377- IsSGPR = true ;
378- Width = 32 ;
379- } else if (AMDGPU::VReg_1024RegClass.contains (Reg)) {
380- IsSGPR = false ;
381- Width = 32 ;
382- } else if (AMDGPU::AReg_1024RegClass.contains (Reg)) {
383- IsSGPR = false ;
384- IsAGPR = true ;
385- Width = 32 ;
386- } else {
387- // We only expect TTMP registers or registers that do not belong to
388- // any RC.
389- assert ((AMDGPU::TTMP_32RegClass.contains (Reg) ||
390- AMDGPU::TTMP_64RegClass.contains (Reg) ||
391- AMDGPU::TTMP_128RegClass.contains (Reg) ||
392- AMDGPU::TTMP_256RegClass.contains (Reg) ||
393- AMDGPU::TTMP_512RegClass.contains (Reg) ||
394- !TRI.getPhysRegBaseClass (Reg)) &&
395- " Unknown register class" );
396- }
204+ const TargetRegisterClass *RC = TRI.getPhysRegBaseClass (Reg);
205+ assert ((!RC || TRI.isVGPRClass (RC) || TRI.isSGPRClass (RC) ||
206+ TRI.isAGPRClass (RC) || AMDGPU::TTMP_32RegClass.contains (Reg) ||
207+ AMDGPU::TTMP_64RegClass.contains (Reg) ||
208+ AMDGPU::TTMP_128RegClass.contains (Reg) ||
209+ AMDGPU::TTMP_256RegClass.contains (Reg) ||
210+ AMDGPU::TTMP_512RegClass.contains (Reg)) &&
211+ " Unknown register class" );
212+
213+ if (!RC || !TRI.isVGPRClass (RC))
214+ continue ;
215+
216+ unsigned Width = divideCeil (TRI.getRegSizeInBits (*RC), 32 );
397217 unsigned HWReg = TRI.getHWRegIndex (Reg);
398218 int MaxUsed = HWReg + Width - 1 ;
399- if (IsSGPR) {
400- MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
401- } else if (IsAGPR) {
402- MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
403- } else {
404- MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
405- }
219+ MaxVGPR = std::max (MaxUsed, MaxVGPR);
406220 }
407221
408222 if (MI.isCall ()) {
@@ -464,9 +278,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
464278 }
465279 }
466280
467- Info.NumExplicitSGPR = MaxSGPR + 1 ;
468281 Info.NumVGPR = MaxVGPR + 1 ;
469- Info.NumAGPR = MaxAGPR + 1 ;
470282
471283 return Info;
472284}
0 commit comments