Skip to content

Commit 44ff1ed

Browse files
authored
AMDGPU: Move getMaxNumVectorRegs into GCNSubtarget (NFC) (#150889)
Addresses a TODO
1 parent 41f3332 commit 44ff1ed

File tree

5 files changed

+64
-66
lines changed

5 files changed

+64
-66
lines changed

llvm/lib/Target/AMDGPU/GCNSubtarget.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,63 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
537537
return getMaxNumVGPRs(MF.getFunction());
538538
}
539539

540+
std::pair<unsigned, unsigned>
541+
GCNSubtarget::getMaxNumVectorRegs(const Function &F) const {
542+
const unsigned MaxVectorRegs = getMaxNumVGPRs(F);
543+
544+
unsigned MaxNumVGPRs = MaxVectorRegs;
545+
unsigned MaxNumAGPRs = 0;
546+
547+
// On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
548+
// a wave may have up to 512 total vector registers combining together both
549+
// VGPRs and AGPRs. Hence, in an entry function without calls and without
550+
// AGPRs used within it, it is possible to use the whole vector register
551+
// budget for VGPRs.
552+
//
553+
// TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
554+
// register file accordingly.
555+
if (hasGFX90AInsts()) {
556+
unsigned MinNumAGPRs = 0;
557+
const unsigned TotalNumAGPRs = AMDGPU::AGPR_32RegClass.getNumRegs();
558+
const unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
559+
560+
const std::pair<unsigned, unsigned> DefaultNumAGPR = {~0u, ~0u};
561+
562+
// TODO: The lower bound should probably force the number of required
563+
// registers up, overriding amdgpu-waves-per-eu.
564+
std::tie(MinNumAGPRs, MaxNumAGPRs) =
565+
AMDGPU::getIntegerPairAttribute(F, "amdgpu-agpr-alloc", DefaultNumAGPR,
566+
/*OnlyFirstRequired=*/true);
567+
568+
if (MinNumAGPRs == DefaultNumAGPR.first) {
569+
// Default to splitting half the registers if AGPRs are required.
570+
MinNumAGPRs = MaxNumAGPRs = MaxVectorRegs / 2;
571+
} else {
572+
// Align to accum_offset's allocation granularity.
573+
MinNumAGPRs = alignTo(MinNumAGPRs, 4);
574+
575+
MinNumAGPRs = std::min(MinNumAGPRs, TotalNumAGPRs);
576+
}
577+
578+
// Clamp values to be inbounds of our limits, and ensure min <= max.
579+
580+
MaxNumAGPRs = std::min(std::max(MinNumAGPRs, MaxNumAGPRs), MaxVectorRegs);
581+
MinNumAGPRs = std::min(std::min(MinNumAGPRs, TotalNumAGPRs), MaxNumAGPRs);
582+
583+
MaxNumVGPRs = std::min(MaxVectorRegs - MinNumAGPRs, TotalNumVGPRs);
584+
MaxNumAGPRs = std::min(MaxVectorRegs - MaxNumVGPRs, MaxNumAGPRs);
585+
586+
assert(MaxNumVGPRs + MaxNumAGPRs <= MaxVectorRegs &&
587+
MaxNumAGPRs <= TotalNumAGPRs && MaxNumVGPRs <= TotalNumVGPRs &&
588+
"invalid register counts");
589+
} else if (hasMAIInsts()) {
590+
// On gfx908 the number of AGPRs always equals the number of VGPRs.
591+
MaxNumAGPRs = MaxNumVGPRs = MaxVectorRegs;
592+
}
593+
594+
return std::pair(MaxNumVGPRs, MaxNumAGPRs);
595+
}
596+
540597
void GCNSubtarget::adjustSchedDependency(
541598
SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep,
542599
const TargetSchedModel *SchedModel) const {

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1667,6 +1667,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
16671667
return getMaxNumVGPRs(F);
16681668
}
16691669

1670+
/// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
1671+
/// of waves per execution unit required for the function \p MF.
1672+
std::pair<unsigned, unsigned> getMaxNumVectorRegs(const Function &F) const;
1673+
16701674
/// \returns Maximum number of VGPRs that meets number of waves per execution
16711675
/// unit requirement for function \p MF, or number of VGPRs explicitly
16721676
/// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.

llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
351351
MachineRegisterInfo &MRI = MF.getRegInfo();
352352
BitVector ReservedRegs = TRI->getReservedRegs(MF);
353353
BitVector NonWwmAllocMask(TRI->getNumRegs());
354+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
354355

355356
// FIXME: MaxNumVGPRsForWwmAllocation might need to be adjusted in the future
356357
// to have a balanced allocation between WWM values and per-thread vector
@@ -359,7 +360,7 @@ void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
359360
NumRegs =
360361
std::min(static_cast<unsigned>(MFI->getSGPRSpillVGPRs().size()), NumRegs);
361362

362-
auto [MaxNumVGPRs, MaxNumAGPRs] = TRI->getMaxNumVectorRegs(MF);
363+
auto [MaxNumVGPRs, MaxNumAGPRs] = ST.getMaxNumVectorRegs(MF.getFunction());
363364
// Try to use the highest available registers for now. Later after
364365
// vgpr-regalloc, they can be shifted to the lowest range.
365366
unsigned I = 0;

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 1 addition & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -572,65 +572,6 @@ MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(
572572
return getAlignedHighSGPRForRC(MF, /*Align=*/4, &AMDGPU::SGPR_128RegClass);
573573
}
574574

575-
std::pair<unsigned, unsigned>
576-
SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const {
577-
const unsigned MaxVectorRegs = ST.getMaxNumVGPRs(MF);
578-
579-
unsigned MaxNumVGPRs = MaxVectorRegs;
580-
unsigned MaxNumAGPRs = 0;
581-
582-
// On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
583-
// a wave may have up to 512 total vector registers combining together both
584-
// VGPRs and AGPRs. Hence, in an entry function without calls and without
585-
// AGPRs used within it, it is possible to use the whole vector register
586-
// budget for VGPRs.
587-
//
588-
// TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
589-
// register file accordingly.
590-
if (ST.hasGFX90AInsts()) {
591-
unsigned MinNumAGPRs = 0;
592-
const unsigned TotalNumAGPRs = AMDGPU::AGPR_32RegClass.getNumRegs();
593-
const unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
594-
595-
const std::pair<unsigned, unsigned> DefaultNumAGPR = {~0u, ~0u};
596-
597-
// TODO: Move this logic into subtarget on IR function
598-
//
599-
// TODO: The lower bound should probably force the number of required
600-
// registers up, overriding amdgpu-waves-per-eu.
601-
std::tie(MinNumAGPRs, MaxNumAGPRs) = AMDGPU::getIntegerPairAttribute(
602-
MF.getFunction(), "amdgpu-agpr-alloc", DefaultNumAGPR,
603-
/*OnlyFirstRequired=*/true);
604-
605-
if (MinNumAGPRs == DefaultNumAGPR.first) {
606-
// Default to splitting half the registers if AGPRs are required.
607-
MinNumAGPRs = MaxNumAGPRs = MaxVectorRegs / 2;
608-
} else {
609-
// Align to accum_offset's allocation granularity.
610-
MinNumAGPRs = alignTo(MinNumAGPRs, 4);
611-
612-
MinNumAGPRs = std::min(MinNumAGPRs, TotalNumAGPRs);
613-
}
614-
615-
// Clamp values to be inbounds of our limits, and ensure min <= max.
616-
617-
MaxNumAGPRs = std::min(std::max(MinNumAGPRs, MaxNumAGPRs), MaxVectorRegs);
618-
MinNumAGPRs = std::min(std::min(MinNumAGPRs, TotalNumAGPRs), MaxNumAGPRs);
619-
620-
MaxNumVGPRs = std::min(MaxVectorRegs - MinNumAGPRs, TotalNumVGPRs);
621-
MaxNumAGPRs = std::min(MaxVectorRegs - MaxNumVGPRs, MaxNumAGPRs);
622-
623-
assert(MaxNumVGPRs + MaxNumAGPRs <= MaxVectorRegs &&
624-
MaxNumAGPRs <= TotalNumAGPRs && MaxNumVGPRs <= TotalNumVGPRs &&
625-
"invalid register counts");
626-
} else if (ST.hasMAIInsts()) {
627-
// On gfx908 the number of AGPRs always equals the number of VGPRs.
628-
MaxNumAGPRs = MaxNumVGPRs = MaxVectorRegs;
629-
}
630-
631-
return std::pair(MaxNumVGPRs, MaxNumAGPRs);
632-
}
633-
634575
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
635576
BitVector Reserved(getNumRegs());
636577
Reserved.set(AMDGPU::MODE);
@@ -742,7 +683,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
742683

743684
// Reserve VGPRs/AGPRs.
744685
//
745-
auto [MaxNumVGPRs, MaxNumAGPRs] = getMaxNumVectorRegs(MF);
686+
auto [MaxNumVGPRs, MaxNumAGPRs] = ST.getMaxNumVectorRegs(MF.getFunction());
746687

747688
for (const TargetRegisterClass *RC : regclasses()) {
748689
if (RC->isBaseClass() && isVGPRClass(RC)) {

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
9090
/// spilling is needed.
9191
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
9292

93-
/// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
94-
/// of waves per execution unit required for the function \p MF.
95-
std::pair<unsigned, unsigned>
96-
getMaxNumVectorRegs(const MachineFunction &MF) const;
97-
9893
BitVector getReservedRegs(const MachineFunction &MF) const override;
9994
bool isAsmClobberable(const MachineFunction &MF,
10095
MCRegister PhysReg) const override;

0 commit comments

Comments
 (0)