@@ -167,77 +167,39 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage(
167
167
168
168
Info.UsesVCC =
169
169
MRI.isPhysRegUsed (AMDGPU::VCC_LO) || MRI.isPhysRegUsed (AMDGPU::VCC_HI);
170
+ Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::SGPR_32RegClass,
171
+ /* IncludeCalls=*/ false );
172
+ if (ST.hasMAIInsts ())
173
+ Info.NumAGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::AGPR_32RegClass,
174
+ /* IncludeCalls=*/ false );
170
175
171
176
// If there are no calls, MachineRegisterInfo can tell us the used register
172
177
// count easily.
173
178
// A tail call isn't considered a call for MachineFrameInfo's purposes.
174
179
if (!FrameInfo.hasCalls () && !FrameInfo.hasTailCall ()) {
175
- Info.NumVGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::VGPR_32RegClass);
176
- Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::SGPR_32RegClass);
177
- if (ST.hasMAIInsts ())
178
- Info.NumAGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::AGPR_32RegClass);
180
+ Info.NumVGPR = TRI.getNumUsedPhysRegs (MRI, AMDGPU::VGPR_32RegClass,
181
+ /* IncludeCalls=*/ false );
179
182
return Info;
180
183
}
181
184
182
185
int32_t MaxVGPR = -1 ;
183
- int32_t MaxAGPR = -1 ;
184
- int32_t MaxSGPR = -1 ;
185
186
Info.CalleeSegmentSize = 0 ;
186
187
187
188
for (const MachineBasicBlock &MBB : MF) {
188
189
for (const MachineInstr &MI : MBB) {
189
- // TODO: Check regmasks? Do they occur anywhere except calls?
190
- for (const MachineOperand &MO : MI.operands ()) {
191
- unsigned Width = 0 ;
192
- bool IsSGPR = false ;
193
- bool IsAGPR = false ;
190
+ for (unsigned I = 0 ; I < MI.getNumOperands (); ++I) {
191
+ const MachineOperand &MO = MI.getOperand (I);
194
192
195
193
if (!MO.isReg ())
196
194
continue ;
197
195
198
196
Register Reg = MO.getReg ();
199
197
switch (Reg) {
200
- case AMDGPU::EXEC:
201
- case AMDGPU::EXEC_LO:
202
- case AMDGPU::EXEC_HI:
203
- case AMDGPU::SCC:
204
- case AMDGPU::M0:
205
- case AMDGPU::M0_LO16:
206
- case AMDGPU::M0_HI16:
207
- case AMDGPU::SRC_SHARED_BASE_LO:
208
- case AMDGPU::SRC_SHARED_BASE:
209
- case AMDGPU::SRC_SHARED_LIMIT_LO:
210
- case AMDGPU::SRC_SHARED_LIMIT:
211
- case AMDGPU::SRC_PRIVATE_BASE_LO:
212
- case AMDGPU::SRC_PRIVATE_BASE:
213
- case AMDGPU::SRC_PRIVATE_LIMIT_LO:
214
- case AMDGPU::SRC_PRIVATE_LIMIT:
215
- case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
216
- case AMDGPU::SGPR_NULL:
217
- case AMDGPU::SGPR_NULL64:
218
- case AMDGPU::MODE:
219
- continue ;
220
-
221
198
case AMDGPU::NoRegister:
222
199
assert (MI.isDebugInstr () &&
223
200
" Instruction uses invalid noreg register" );
224
201
continue ;
225
202
226
- case AMDGPU::VCC:
227
- case AMDGPU::VCC_LO:
228
- case AMDGPU::VCC_HI:
229
- case AMDGPU::VCC_LO_LO16:
230
- case AMDGPU::VCC_LO_HI16:
231
- case AMDGPU::VCC_HI_LO16:
232
- case AMDGPU::VCC_HI_HI16:
233
- Info.UsesVCC = true ;
234
- continue ;
235
-
236
- case AMDGPU::FLAT_SCR:
237
- case AMDGPU::FLAT_SCR_LO:
238
- case AMDGPU::FLAT_SCR_HI:
239
- continue ;
240
-
241
203
case AMDGPU::XNACK_MASK:
242
204
case AMDGPU::XNACK_MASK_LO:
243
205
case AMDGPU::XNACK_MASK_HI:
@@ -267,170 +229,22 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage(
267
229
break ;
268
230
}
269
231
270
- if (AMDGPU::SGPR_32RegClass.contains (Reg) ||
271
- AMDGPU::SGPR_LO16RegClass.contains (Reg) ||
272
- AMDGPU::SGPR_HI16RegClass.contains (Reg)) {
273
- IsSGPR = true ;
274
- Width = 1 ;
275
- } else if (AMDGPU::VGPR_32RegClass.contains (Reg) ||
276
- AMDGPU::VGPR_16RegClass.contains (Reg)) {
277
- IsSGPR = false ;
278
- Width = 1 ;
279
- } else if (AMDGPU::AGPR_32RegClass.contains (Reg) ||
280
- AMDGPU::AGPR_LO16RegClass.contains (Reg)) {
281
- IsSGPR = false ;
282
- IsAGPR = true ;
283
- Width = 1 ;
284
- } else if (AMDGPU::SGPR_64RegClass.contains (Reg)) {
285
- IsSGPR = true ;
286
- Width = 2 ;
287
- } else if (AMDGPU::VReg_64RegClass.contains (Reg)) {
288
- IsSGPR = false ;
289
- Width = 2 ;
290
- } else if (AMDGPU::AReg_64RegClass.contains (Reg)) {
291
- IsSGPR = false ;
292
- IsAGPR = true ;
293
- Width = 2 ;
294
- } else if (AMDGPU::VReg_96RegClass.contains (Reg)) {
295
- IsSGPR = false ;
296
- Width = 3 ;
297
- } else if (AMDGPU::SReg_96RegClass.contains (Reg)) {
298
- IsSGPR = true ;
299
- Width = 3 ;
300
- } else if (AMDGPU::AReg_96RegClass.contains (Reg)) {
301
- IsSGPR = false ;
302
- IsAGPR = true ;
303
- Width = 3 ;
304
- } else if (AMDGPU::SGPR_128RegClass.contains (Reg)) {
305
- IsSGPR = true ;
306
- Width = 4 ;
307
- } else if (AMDGPU::VReg_128RegClass.contains (Reg)) {
308
- IsSGPR = false ;
309
- Width = 4 ;
310
- } else if (AMDGPU::AReg_128RegClass.contains (Reg)) {
311
- IsSGPR = false ;
312
- IsAGPR = true ;
313
- Width = 4 ;
314
- } else if (AMDGPU::VReg_160RegClass.contains (Reg)) {
315
- IsSGPR = false ;
316
- Width = 5 ;
317
- } else if (AMDGPU::SReg_160RegClass.contains (Reg)) {
318
- IsSGPR = true ;
319
- Width = 5 ;
320
- } else if (AMDGPU::AReg_160RegClass.contains (Reg)) {
321
- IsSGPR = false ;
322
- IsAGPR = true ;
323
- Width = 5 ;
324
- } else if (AMDGPU::VReg_192RegClass.contains (Reg)) {
325
- IsSGPR = false ;
326
- Width = 6 ;
327
- } else if (AMDGPU::SReg_192RegClass.contains (Reg)) {
328
- IsSGPR = true ;
329
- Width = 6 ;
330
- } else if (AMDGPU::AReg_192RegClass.contains (Reg)) {
331
- IsSGPR = false ;
332
- IsAGPR = true ;
333
- Width = 6 ;
334
- } else if (AMDGPU::VReg_224RegClass.contains (Reg)) {
335
- IsSGPR = false ;
336
- Width = 7 ;
337
- } else if (AMDGPU::SReg_224RegClass.contains (Reg)) {
338
- IsSGPR = true ;
339
- Width = 7 ;
340
- } else if (AMDGPU::AReg_224RegClass.contains (Reg)) {
341
- IsSGPR = false ;
342
- IsAGPR = true ;
343
- Width = 7 ;
344
- } else if (AMDGPU::SReg_256RegClass.contains (Reg)) {
345
- IsSGPR = true ;
346
- Width = 8 ;
347
- } else if (AMDGPU::VReg_256RegClass.contains (Reg)) {
348
- IsSGPR = false ;
349
- Width = 8 ;
350
- } else if (AMDGPU::AReg_256RegClass.contains (Reg)) {
351
- IsSGPR = false ;
352
- IsAGPR = true ;
353
- Width = 8 ;
354
- } else if (AMDGPU::VReg_288RegClass.contains (Reg)) {
355
- IsSGPR = false ;
356
- Width = 9 ;
357
- } else if (AMDGPU::SReg_288RegClass.contains (Reg)) {
358
- IsSGPR = true ;
359
- Width = 9 ;
360
- } else if (AMDGPU::AReg_288RegClass.contains (Reg)) {
361
- IsSGPR = false ;
362
- IsAGPR = true ;
363
- Width = 9 ;
364
- } else if (AMDGPU::VReg_320RegClass.contains (Reg)) {
365
- IsSGPR = false ;
366
- Width = 10 ;
367
- } else if (AMDGPU::SReg_320RegClass.contains (Reg)) {
368
- IsSGPR = true ;
369
- Width = 10 ;
370
- } else if (AMDGPU::AReg_320RegClass.contains (Reg)) {
371
- IsSGPR = false ;
372
- IsAGPR = true ;
373
- Width = 10 ;
374
- } else if (AMDGPU::VReg_352RegClass.contains (Reg)) {
375
- IsSGPR = false ;
376
- Width = 11 ;
377
- } else if (AMDGPU::SReg_352RegClass.contains (Reg)) {
378
- IsSGPR = true ;
379
- Width = 11 ;
380
- } else if (AMDGPU::AReg_352RegClass.contains (Reg)) {
381
- IsSGPR = false ;
382
- IsAGPR = true ;
383
- Width = 11 ;
384
- } else if (AMDGPU::VReg_384RegClass.contains (Reg)) {
385
- IsSGPR = false ;
386
- Width = 12 ;
387
- } else if (AMDGPU::SReg_384RegClass.contains (Reg)) {
388
- IsSGPR = true ;
389
- Width = 12 ;
390
- } else if (AMDGPU::AReg_384RegClass.contains (Reg)) {
391
- IsSGPR = false ;
392
- IsAGPR = true ;
393
- Width = 12 ;
394
- } else if (AMDGPU::SReg_512RegClass.contains (Reg)) {
395
- IsSGPR = true ;
396
- Width = 16 ;
397
- } else if (AMDGPU::VReg_512RegClass.contains (Reg)) {
398
- IsSGPR = false ;
399
- Width = 16 ;
400
- } else if (AMDGPU::AReg_512RegClass.contains (Reg)) {
401
- IsSGPR = false ;
402
- IsAGPR = true ;
403
- Width = 16 ;
404
- } else if (AMDGPU::SReg_1024RegClass.contains (Reg)) {
405
- IsSGPR = true ;
406
- Width = 32 ;
407
- } else if (AMDGPU::VReg_1024RegClass.contains (Reg)) {
408
- IsSGPR = false ;
409
- Width = 32 ;
410
- } else if (AMDGPU::AReg_1024RegClass.contains (Reg)) {
411
- IsSGPR = false ;
412
- IsAGPR = true ;
413
- Width = 32 ;
414
- } else {
415
- // We only expect TTMP registers or registers that do not belong to
416
- // any RC.
417
- assert ((AMDGPU::TTMP_32RegClass.contains (Reg) ||
418
- AMDGPU::TTMP_64RegClass.contains (Reg) ||
419
- AMDGPU::TTMP_128RegClass.contains (Reg) ||
420
- AMDGPU::TTMP_256RegClass.contains (Reg) ||
421
- AMDGPU::TTMP_512RegClass.contains (Reg) ||
422
- !TRI.getPhysRegBaseClass (Reg)) &&
423
- " Unknown register class" );
424
- }
232
+ const TargetRegisterClass *RC = TRI.getPhysRegBaseClass (Reg);
233
+ assert ((!RC || TRI.isVGPRClass (RC) || TRI.isSGPRClass (RC) ||
234
+ TRI.isAGPRClass (RC) || AMDGPU::TTMP_32RegClass.contains (Reg) ||
235
+ AMDGPU::TTMP_64RegClass.contains (Reg) ||
236
+ AMDGPU::TTMP_128RegClass.contains (Reg) ||
237
+ AMDGPU::TTMP_256RegClass.contains (Reg) ||
238
+ AMDGPU::TTMP_512RegClass.contains (Reg)) &&
239
+ " Unknown register class" );
240
+
241
+ if (!RC || !TRI.isVGPRClass (RC))
242
+ continue ;
243
+
244
+ unsigned Width = divideCeil (TRI.getRegSizeInBits (*RC), 32 );
425
245
unsigned HWReg = TRI.getHWRegIndex (Reg);
426
246
int MaxUsed = HWReg + Width - 1 ;
427
- if (IsSGPR) {
428
- MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
429
- } else if (IsAGPR) {
430
- MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
431
- } else {
432
- MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
433
- }
247
+ MaxVGPR = std::max (MaxUsed, MaxVGPR);
434
248
}
435
249
436
250
if (MI.isCall ()) {
@@ -492,9 +306,7 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage(
492
306
}
493
307
}
494
308
495
- Info.NumExplicitSGPR = MaxSGPR + 1 ;
496
309
Info.NumVGPR = MaxVGPR + 1 ;
497
- Info.NumAGPR = MaxAGPR + 1 ;
498
310
499
311
return Info;
500
312
}
0 commit comments