Skip to content

Commit 5bac67d

Browse files
authored
[AMDGPU] Use SIRegisterInfo to compute used registers. NFCI (#149051)
Simplify the code in AMDGPUResourceUsageAnalysis to rely more on the TargetRegisterInfo for computing the number of used SGPRs and AGPRs. This is a preliminary refactoring split out from #144855. (While we could technically use TRI to compute the used number of VGPRs at this point too, I'm leaving some of the original code in since for VGPRs we're going to introduce some special cases).
1 parent 90f733c commit 5bac67d

File tree

3 files changed

+31
-217
lines changed

3 files changed

+31
-217
lines changed

llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp

Lines changed: 23 additions & 211 deletions
Original file line numberDiff line numberDiff line change
@@ -167,77 +167,39 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage(
167167

168168
Info.UsesVCC =
169169
MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
170+
Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::SGPR_32RegClass,
171+
/*IncludeCalls=*/false);
172+
if (ST.hasMAIInsts())
173+
Info.NumAGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::AGPR_32RegClass,
174+
/*IncludeCalls=*/false);
170175

171176
// If there are no calls, MachineRegisterInfo can tell us the used register
172177
// count easily.
173178
// A tail call isn't considered a call for MachineFrameInfo's purposes.
174179
if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
175-
Info.NumVGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::VGPR_32RegClass);
176-
Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::SGPR_32RegClass);
177-
if (ST.hasMAIInsts())
178-
Info.NumAGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::AGPR_32RegClass);
180+
Info.NumVGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::VGPR_32RegClass,
181+
/*IncludeCalls=*/false);
179182
return Info;
180183
}
181184

182185
int32_t MaxVGPR = -1;
183-
int32_t MaxAGPR = -1;
184-
int32_t MaxSGPR = -1;
185186
Info.CalleeSegmentSize = 0;
186187

187188
for (const MachineBasicBlock &MBB : MF) {
188189
for (const MachineInstr &MI : MBB) {
189-
// TODO: Check regmasks? Do they occur anywhere except calls?
190-
for (const MachineOperand &MO : MI.operands()) {
191-
unsigned Width = 0;
192-
bool IsSGPR = false;
193-
bool IsAGPR = false;
190+
for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
191+
const MachineOperand &MO = MI.getOperand(I);
194192

195193
if (!MO.isReg())
196194
continue;
197195

198196
Register Reg = MO.getReg();
199197
switch (Reg) {
200-
case AMDGPU::EXEC:
201-
case AMDGPU::EXEC_LO:
202-
case AMDGPU::EXEC_HI:
203-
case AMDGPU::SCC:
204-
case AMDGPU::M0:
205-
case AMDGPU::M0_LO16:
206-
case AMDGPU::M0_HI16:
207-
case AMDGPU::SRC_SHARED_BASE_LO:
208-
case AMDGPU::SRC_SHARED_BASE:
209-
case AMDGPU::SRC_SHARED_LIMIT_LO:
210-
case AMDGPU::SRC_SHARED_LIMIT:
211-
case AMDGPU::SRC_PRIVATE_BASE_LO:
212-
case AMDGPU::SRC_PRIVATE_BASE:
213-
case AMDGPU::SRC_PRIVATE_LIMIT_LO:
214-
case AMDGPU::SRC_PRIVATE_LIMIT:
215-
case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
216-
case AMDGPU::SGPR_NULL:
217-
case AMDGPU::SGPR_NULL64:
218-
case AMDGPU::MODE:
219-
continue;
220-
221198
case AMDGPU::NoRegister:
222199
assert(MI.isDebugInstr() &&
223200
"Instruction uses invalid noreg register");
224201
continue;
225202

226-
case AMDGPU::VCC:
227-
case AMDGPU::VCC_LO:
228-
case AMDGPU::VCC_HI:
229-
case AMDGPU::VCC_LO_LO16:
230-
case AMDGPU::VCC_LO_HI16:
231-
case AMDGPU::VCC_HI_LO16:
232-
case AMDGPU::VCC_HI_HI16:
233-
Info.UsesVCC = true;
234-
continue;
235-
236-
case AMDGPU::FLAT_SCR:
237-
case AMDGPU::FLAT_SCR_LO:
238-
case AMDGPU::FLAT_SCR_HI:
239-
continue;
240-
241203
case AMDGPU::XNACK_MASK:
242204
case AMDGPU::XNACK_MASK_LO:
243205
case AMDGPU::XNACK_MASK_HI:
@@ -267,170 +229,22 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage(
267229
break;
268230
}
269231

270-
if (AMDGPU::SGPR_32RegClass.contains(Reg) ||
271-
AMDGPU::SGPR_LO16RegClass.contains(Reg) ||
272-
AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
273-
IsSGPR = true;
274-
Width = 1;
275-
} else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
276-
AMDGPU::VGPR_16RegClass.contains(Reg)) {
277-
IsSGPR = false;
278-
Width = 1;
279-
} else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
280-
AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
281-
IsSGPR = false;
282-
IsAGPR = true;
283-
Width = 1;
284-
} else if (AMDGPU::SGPR_64RegClass.contains(Reg)) {
285-
IsSGPR = true;
286-
Width = 2;
287-
} else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
288-
IsSGPR = false;
289-
Width = 2;
290-
} else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
291-
IsSGPR = false;
292-
IsAGPR = true;
293-
Width = 2;
294-
} else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
295-
IsSGPR = false;
296-
Width = 3;
297-
} else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
298-
IsSGPR = true;
299-
Width = 3;
300-
} else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
301-
IsSGPR = false;
302-
IsAGPR = true;
303-
Width = 3;
304-
} else if (AMDGPU::SGPR_128RegClass.contains(Reg)) {
305-
IsSGPR = true;
306-
Width = 4;
307-
} else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
308-
IsSGPR = false;
309-
Width = 4;
310-
} else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
311-
IsSGPR = false;
312-
IsAGPR = true;
313-
Width = 4;
314-
} else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
315-
IsSGPR = false;
316-
Width = 5;
317-
} else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
318-
IsSGPR = true;
319-
Width = 5;
320-
} else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
321-
IsSGPR = false;
322-
IsAGPR = true;
323-
Width = 5;
324-
} else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
325-
IsSGPR = false;
326-
Width = 6;
327-
} else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
328-
IsSGPR = true;
329-
Width = 6;
330-
} else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
331-
IsSGPR = false;
332-
IsAGPR = true;
333-
Width = 6;
334-
} else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
335-
IsSGPR = false;
336-
Width = 7;
337-
} else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
338-
IsSGPR = true;
339-
Width = 7;
340-
} else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
341-
IsSGPR = false;
342-
IsAGPR = true;
343-
Width = 7;
344-
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
345-
IsSGPR = true;
346-
Width = 8;
347-
} else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
348-
IsSGPR = false;
349-
Width = 8;
350-
} else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
351-
IsSGPR = false;
352-
IsAGPR = true;
353-
Width = 8;
354-
} else if (AMDGPU::VReg_288RegClass.contains(Reg)) {
355-
IsSGPR = false;
356-
Width = 9;
357-
} else if (AMDGPU::SReg_288RegClass.contains(Reg)) {
358-
IsSGPR = true;
359-
Width = 9;
360-
} else if (AMDGPU::AReg_288RegClass.contains(Reg)) {
361-
IsSGPR = false;
362-
IsAGPR = true;
363-
Width = 9;
364-
} else if (AMDGPU::VReg_320RegClass.contains(Reg)) {
365-
IsSGPR = false;
366-
Width = 10;
367-
} else if (AMDGPU::SReg_320RegClass.contains(Reg)) {
368-
IsSGPR = true;
369-
Width = 10;
370-
} else if (AMDGPU::AReg_320RegClass.contains(Reg)) {
371-
IsSGPR = false;
372-
IsAGPR = true;
373-
Width = 10;
374-
} else if (AMDGPU::VReg_352RegClass.contains(Reg)) {
375-
IsSGPR = false;
376-
Width = 11;
377-
} else if (AMDGPU::SReg_352RegClass.contains(Reg)) {
378-
IsSGPR = true;
379-
Width = 11;
380-
} else if (AMDGPU::AReg_352RegClass.contains(Reg)) {
381-
IsSGPR = false;
382-
IsAGPR = true;
383-
Width = 11;
384-
} else if (AMDGPU::VReg_384RegClass.contains(Reg)) {
385-
IsSGPR = false;
386-
Width = 12;
387-
} else if (AMDGPU::SReg_384RegClass.contains(Reg)) {
388-
IsSGPR = true;
389-
Width = 12;
390-
} else if (AMDGPU::AReg_384RegClass.contains(Reg)) {
391-
IsSGPR = false;
392-
IsAGPR = true;
393-
Width = 12;
394-
} else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
395-
IsSGPR = true;
396-
Width = 16;
397-
} else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
398-
IsSGPR = false;
399-
Width = 16;
400-
} else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
401-
IsSGPR = false;
402-
IsAGPR = true;
403-
Width = 16;
404-
} else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
405-
IsSGPR = true;
406-
Width = 32;
407-
} else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
408-
IsSGPR = false;
409-
Width = 32;
410-
} else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
411-
IsSGPR = false;
412-
IsAGPR = true;
413-
Width = 32;
414-
} else {
415-
// We only expect TTMP registers or registers that do not belong to
416-
// any RC.
417-
assert((AMDGPU::TTMP_32RegClass.contains(Reg) ||
418-
AMDGPU::TTMP_64RegClass.contains(Reg) ||
419-
AMDGPU::TTMP_128RegClass.contains(Reg) ||
420-
AMDGPU::TTMP_256RegClass.contains(Reg) ||
421-
AMDGPU::TTMP_512RegClass.contains(Reg) ||
422-
!TRI.getPhysRegBaseClass(Reg)) &&
423-
"Unknown register class");
424-
}
232+
const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(Reg);
233+
assert((!RC || TRI.isVGPRClass(RC) || TRI.isSGPRClass(RC) ||
234+
TRI.isAGPRClass(RC) || AMDGPU::TTMP_32RegClass.contains(Reg) ||
235+
AMDGPU::TTMP_64RegClass.contains(Reg) ||
236+
AMDGPU::TTMP_128RegClass.contains(Reg) ||
237+
AMDGPU::TTMP_256RegClass.contains(Reg) ||
238+
AMDGPU::TTMP_512RegClass.contains(Reg)) &&
239+
"Unknown register class");
240+
241+
if (!RC || !TRI.isVGPRClass(RC))
242+
continue;
243+
244+
unsigned Width = divideCeil(TRI.getRegSizeInBits(*RC), 32);
425245
unsigned HWReg = TRI.getHWRegIndex(Reg);
426246
int MaxUsed = HWReg + Width - 1;
427-
if (IsSGPR) {
428-
MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
429-
} else if (IsAGPR) {
430-
MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
431-
} else {
432-
MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
433-
}
247+
MaxVGPR = std::max(MaxUsed, MaxVGPR);
434248
}
435249

436250
if (MI.isCall()) {
@@ -492,9 +306,7 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage(
492306
}
493307
}
494308

495-
Info.NumExplicitSGPR = MaxSGPR + 1;
496309
Info.NumVGPR = MaxVGPR + 1;
497-
Info.NumAGPR = MaxAGPR + 1;
498310

499311
return Info;
500312
}

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4052,11 +4052,11 @@ SIRegisterInfo::getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
40524052
return 0;
40534053
}
40544054

4055-
unsigned
4056-
SIRegisterInfo::getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
4057-
const TargetRegisterClass &RC) const {
4055+
unsigned SIRegisterInfo::getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
4056+
const TargetRegisterClass &RC,
4057+
bool IncludeCalls) const {
40584058
for (MCPhysReg Reg : reverse(RC.getRegisters()))
4059-
if (MRI.isPhysRegUsed(Reg))
4059+
if (MRI.isPhysRegUsed(Reg, /*SkipRegMaskTest=*/!IncludeCalls))
40604060
return getHWRegIndex(Reg) + 1;
40614061
return 0;
40624062
}

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -486,9 +486,11 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
486486
unsigned SubReg) const;
487487

488488
// \returns a number of registers of a given \p RC used in a function.
489-
// Does not go inside function calls.
489+
// Does not go inside function calls. If \p IncludeCalls is true, it will
490+
// include registers that may be clobbered by calls.
490491
unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
491-
const TargetRegisterClass &RC) const;
492+
const TargetRegisterClass &RC,
493+
bool IncludeCalls = true) const;
492494

493495
std::optional<uint8_t> getVRegFlagValue(StringRef Name) const override {
494496
return Name == "WWM_REG" ? AMDGPU::VirtRegFlag::WWM_REG

0 commit comments

Comments
 (0)