Skip to content

Commit 1ea0b5b

Browse files
committed
[AMDGPU] Use SIRegisterInfo to compute used registers. NFCI
Simplify the code in AMDGPUResourceUsageAnalysis to rely more on the TargetRegisterInfo for computing the number of used SGPRs and AGPRs. This is a preliminary refactoring split out from llvm#144855. (While we could technically use TRI to compute the used number of VGPRs at this point too, I'm leaving some of the original code in since for VGPRs we're going to introduce some special cases).
1 parent cf3d136 commit 1ea0b5b

File tree

3 files changed

+31
-217
lines changed

3 files changed

+31
-217
lines changed

llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp

Lines changed: 23 additions & 211 deletions
Original file line numberDiff line numberDiff line change
@@ -139,77 +139,39 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
139139

140140
Info.UsesVCC =
141141
MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
142+
Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::SGPR_32RegClass,
143+
/*IncludeCalls=*/false);
144+
if (ST.hasMAIInsts())
145+
Info.NumAGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::AGPR_32RegClass,
146+
/*IncludeCalls=*/false);
142147

143148
// If there are no calls, MachineRegisterInfo can tell us the used register
144149
// count easily.
145150
// A tail call isn't considered a call for MachineFrameInfo's purposes.
146151
if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
147-
Info.NumVGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::VGPR_32RegClass);
148-
Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::SGPR_32RegClass);
149-
if (ST.hasMAIInsts())
150-
Info.NumAGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::AGPR_32RegClass);
152+
Info.NumVGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::VGPR_32RegClass,
153+
/*IncludeCalls=*/false);
151154
return Info;
152155
}
153156

154157
int32_t MaxVGPR = -1;
155-
int32_t MaxAGPR = -1;
156-
int32_t MaxSGPR = -1;
157158
Info.CalleeSegmentSize = 0;
158159

159160
for (const MachineBasicBlock &MBB : MF) {
160161
for (const MachineInstr &MI : MBB) {
161-
// TODO: Check regmasks? Do they occur anywhere except calls?
162-
for (const MachineOperand &MO : MI.operands()) {
163-
unsigned Width = 0;
164-
bool IsSGPR = false;
165-
bool IsAGPR = false;
162+
for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
163+
const MachineOperand &MO = MI.getOperand(I);
166164

167165
if (!MO.isReg())
168166
continue;
169167

170168
Register Reg = MO.getReg();
171169
switch (Reg) {
172-
case AMDGPU::EXEC:
173-
case AMDGPU::EXEC_LO:
174-
case AMDGPU::EXEC_HI:
175-
case AMDGPU::SCC:
176-
case AMDGPU::M0:
177-
case AMDGPU::M0_LO16:
178-
case AMDGPU::M0_HI16:
179-
case AMDGPU::SRC_SHARED_BASE_LO:
180-
case AMDGPU::SRC_SHARED_BASE:
181-
case AMDGPU::SRC_SHARED_LIMIT_LO:
182-
case AMDGPU::SRC_SHARED_LIMIT:
183-
case AMDGPU::SRC_PRIVATE_BASE_LO:
184-
case AMDGPU::SRC_PRIVATE_BASE:
185-
case AMDGPU::SRC_PRIVATE_LIMIT_LO:
186-
case AMDGPU::SRC_PRIVATE_LIMIT:
187-
case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
188-
case AMDGPU::SGPR_NULL:
189-
case AMDGPU::SGPR_NULL64:
190-
case AMDGPU::MODE:
191-
continue;
192-
193170
case AMDGPU::NoRegister:
194171
assert(MI.isDebugInstr() &&
195172
"Instruction uses invalid noreg register");
196173
continue;
197174

198-
case AMDGPU::VCC:
199-
case AMDGPU::VCC_LO:
200-
case AMDGPU::VCC_HI:
201-
case AMDGPU::VCC_LO_LO16:
202-
case AMDGPU::VCC_LO_HI16:
203-
case AMDGPU::VCC_HI_LO16:
204-
case AMDGPU::VCC_HI_HI16:
205-
Info.UsesVCC = true;
206-
continue;
207-
208-
case AMDGPU::FLAT_SCR:
209-
case AMDGPU::FLAT_SCR_LO:
210-
case AMDGPU::FLAT_SCR_HI:
211-
continue;
212-
213175
case AMDGPU::XNACK_MASK:
214176
case AMDGPU::XNACK_MASK_LO:
215177
case AMDGPU::XNACK_MASK_HI:
@@ -239,170 +201,22 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
239201
break;
240202
}
241203

242-
if (AMDGPU::SGPR_32RegClass.contains(Reg) ||
243-
AMDGPU::SGPR_LO16RegClass.contains(Reg) ||
244-
AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
245-
IsSGPR = true;
246-
Width = 1;
247-
} else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
248-
AMDGPU::VGPR_16RegClass.contains(Reg)) {
249-
IsSGPR = false;
250-
Width = 1;
251-
} else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
252-
AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
253-
IsSGPR = false;
254-
IsAGPR = true;
255-
Width = 1;
256-
} else if (AMDGPU::SGPR_64RegClass.contains(Reg)) {
257-
IsSGPR = true;
258-
Width = 2;
259-
} else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
260-
IsSGPR = false;
261-
Width = 2;
262-
} else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
263-
IsSGPR = false;
264-
IsAGPR = true;
265-
Width = 2;
266-
} else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
267-
IsSGPR = false;
268-
Width = 3;
269-
} else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
270-
IsSGPR = true;
271-
Width = 3;
272-
} else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
273-
IsSGPR = false;
274-
IsAGPR = true;
275-
Width = 3;
276-
} else if (AMDGPU::SGPR_128RegClass.contains(Reg)) {
277-
IsSGPR = true;
278-
Width = 4;
279-
} else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
280-
IsSGPR = false;
281-
Width = 4;
282-
} else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
283-
IsSGPR = false;
284-
IsAGPR = true;
285-
Width = 4;
286-
} else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
287-
IsSGPR = false;
288-
Width = 5;
289-
} else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
290-
IsSGPR = true;
291-
Width = 5;
292-
} else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
293-
IsSGPR = false;
294-
IsAGPR = true;
295-
Width = 5;
296-
} else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
297-
IsSGPR = false;
298-
Width = 6;
299-
} else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
300-
IsSGPR = true;
301-
Width = 6;
302-
} else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
303-
IsSGPR = false;
304-
IsAGPR = true;
305-
Width = 6;
306-
} else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
307-
IsSGPR = false;
308-
Width = 7;
309-
} else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
310-
IsSGPR = true;
311-
Width = 7;
312-
} else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
313-
IsSGPR = false;
314-
IsAGPR = true;
315-
Width = 7;
316-
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
317-
IsSGPR = true;
318-
Width = 8;
319-
} else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
320-
IsSGPR = false;
321-
Width = 8;
322-
} else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
323-
IsSGPR = false;
324-
IsAGPR = true;
325-
Width = 8;
326-
} else if (AMDGPU::VReg_288RegClass.contains(Reg)) {
327-
IsSGPR = false;
328-
Width = 9;
329-
} else if (AMDGPU::SReg_288RegClass.contains(Reg)) {
330-
IsSGPR = true;
331-
Width = 9;
332-
} else if (AMDGPU::AReg_288RegClass.contains(Reg)) {
333-
IsSGPR = false;
334-
IsAGPR = true;
335-
Width = 9;
336-
} else if (AMDGPU::VReg_320RegClass.contains(Reg)) {
337-
IsSGPR = false;
338-
Width = 10;
339-
} else if (AMDGPU::SReg_320RegClass.contains(Reg)) {
340-
IsSGPR = true;
341-
Width = 10;
342-
} else if (AMDGPU::AReg_320RegClass.contains(Reg)) {
343-
IsSGPR = false;
344-
IsAGPR = true;
345-
Width = 10;
346-
} else if (AMDGPU::VReg_352RegClass.contains(Reg)) {
347-
IsSGPR = false;
348-
Width = 11;
349-
} else if (AMDGPU::SReg_352RegClass.contains(Reg)) {
350-
IsSGPR = true;
351-
Width = 11;
352-
} else if (AMDGPU::AReg_352RegClass.contains(Reg)) {
353-
IsSGPR = false;
354-
IsAGPR = true;
355-
Width = 11;
356-
} else if (AMDGPU::VReg_384RegClass.contains(Reg)) {
357-
IsSGPR = false;
358-
Width = 12;
359-
} else if (AMDGPU::SReg_384RegClass.contains(Reg)) {
360-
IsSGPR = true;
361-
Width = 12;
362-
} else if (AMDGPU::AReg_384RegClass.contains(Reg)) {
363-
IsSGPR = false;
364-
IsAGPR = true;
365-
Width = 12;
366-
} else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
367-
IsSGPR = true;
368-
Width = 16;
369-
} else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
370-
IsSGPR = false;
371-
Width = 16;
372-
} else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
373-
IsSGPR = false;
374-
IsAGPR = true;
375-
Width = 16;
376-
} else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
377-
IsSGPR = true;
378-
Width = 32;
379-
} else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
380-
IsSGPR = false;
381-
Width = 32;
382-
} else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
383-
IsSGPR = false;
384-
IsAGPR = true;
385-
Width = 32;
386-
} else {
387-
// We only expect TTMP registers or registers that do not belong to
388-
// any RC.
389-
assert((AMDGPU::TTMP_32RegClass.contains(Reg) ||
390-
AMDGPU::TTMP_64RegClass.contains(Reg) ||
391-
AMDGPU::TTMP_128RegClass.contains(Reg) ||
392-
AMDGPU::TTMP_256RegClass.contains(Reg) ||
393-
AMDGPU::TTMP_512RegClass.contains(Reg) ||
394-
!TRI.getPhysRegBaseClass(Reg)) &&
395-
"Unknown register class");
396-
}
204+
const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(Reg);
205+
assert((!RC || TRI.isVGPRClass(RC) || TRI.isSGPRClass(RC) ||
206+
TRI.isAGPRClass(RC) || AMDGPU::TTMP_32RegClass.contains(Reg) ||
207+
AMDGPU::TTMP_64RegClass.contains(Reg) ||
208+
AMDGPU::TTMP_128RegClass.contains(Reg) ||
209+
AMDGPU::TTMP_256RegClass.contains(Reg) ||
210+
AMDGPU::TTMP_512RegClass.contains(Reg)) &&
211+
"Unknown register class");
212+
213+
if (!RC || !TRI.isVGPRClass(RC))
214+
continue;
215+
216+
unsigned Width = divideCeil(TRI.getRegSizeInBits(*RC), 32);
397217
unsigned HWReg = TRI.getHWRegIndex(Reg);
398218
int MaxUsed = HWReg + Width - 1;
399-
if (IsSGPR) {
400-
MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
401-
} else if (IsAGPR) {
402-
MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
403-
} else {
404-
MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
405-
}
219+
MaxVGPR = std::max(MaxUsed, MaxVGPR);
406220
}
407221

408222
if (MI.isCall()) {
@@ -464,9 +278,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
464278
}
465279
}
466280

467-
Info.NumExplicitSGPR = MaxSGPR + 1;
468281
Info.NumVGPR = MaxVGPR + 1;
469-
Info.NumAGPR = MaxAGPR + 1;
470282

471283
return Info;
472284
}

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4050,11 +4050,11 @@ SIRegisterInfo::getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
40504050
return 0;
40514051
}
40524052

4053-
unsigned
4054-
SIRegisterInfo::getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
4055-
const TargetRegisterClass &RC) const {
4053+
unsigned SIRegisterInfo::getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
4054+
const TargetRegisterClass &RC,
4055+
bool IncludeCalls) const {
40564056
for (MCPhysReg Reg : reverse(RC.getRegisters()))
4057-
if (MRI.isPhysRegUsed(Reg))
4057+
if (MRI.isPhysRegUsed(Reg, /*SkipRegMaskTest=*/!IncludeCalls))
40584058
return getHWRegIndex(Reg) + 1;
40594059
return 0;
40604060
}

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -482,9 +482,11 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
482482
unsigned SubReg) const;
483483

484484
// \returns a number of registers of a given \p RC used in a function.
485-
// Does not go inside function calls.
485+
// Does not go inside function calls. If \p IncludeCalls is true, it will
486+
// include registers that may be clobbered by calls.
486487
unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
487-
const TargetRegisterClass &RC) const;
488+
const TargetRegisterClass &RC,
489+
bool IncludeCalls = true) const;
488490

489491
std::optional<uint8_t> getVRegFlagValue(StringRef Name) const override {
490492
return Name == "WWM_REG" ? AMDGPU::VirtRegFlag::WWM_REG

0 commit comments

Comments
 (0)