Skip to content

Commit ae7bb74

Browse files
arsenmjrbyrnes
authored andcommitted
AMDGPU: Move getMaxNumVectorRegs into GCNSubtarget (NFC) (llvm#150889)
Addresses a TODO
1 parent fefceb8 commit ae7bb74

File tree

5 files changed

+64
-66
lines changed

5 files changed

+64
-66
lines changed

llvm/lib/Target/AMDGPU/GCNSubtarget.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,63 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
521521
return getMaxNumVGPRs(MF.getFunction());
522522
}
523523

524+
std::pair<unsigned, unsigned>
525+
GCNSubtarget::getMaxNumVectorRegs(const Function &F) const {
526+
const unsigned MaxVectorRegs = getMaxNumVGPRs(F);
527+
528+
unsigned MaxNumVGPRs = MaxVectorRegs;
529+
unsigned MaxNumAGPRs = 0;
530+
531+
// On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
532+
// a wave may have up to 512 total vector registers combining together both
533+
// VGPRs and AGPRs. Hence, in an entry function without calls and without
534+
// AGPRs used within it, it is possible to use the whole vector register
535+
// budget for VGPRs.
536+
//
537+
// TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
538+
// register file accordingly.
539+
if (hasGFX90AInsts()) {
540+
unsigned MinNumAGPRs = 0;
541+
const unsigned TotalNumAGPRs = AMDGPU::AGPR_32RegClass.getNumRegs();
542+
const unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
543+
544+
const std::pair<unsigned, unsigned> DefaultNumAGPR = {~0u, ~0u};
545+
546+
// TODO: The lower bound should probably force the number of required
547+
// registers up, overriding amdgpu-waves-per-eu.
548+
std::tie(MinNumAGPRs, MaxNumAGPRs) =
549+
AMDGPU::getIntegerPairAttribute(F, "amdgpu-agpr-alloc", DefaultNumAGPR,
550+
/*OnlyFirstRequired=*/true);
551+
552+
if (MinNumAGPRs == DefaultNumAGPR.first) {
553+
// Default to splitting half the registers if AGPRs are required.
554+
MinNumAGPRs = MaxNumAGPRs = MaxVectorRegs / 2;
555+
} else {
556+
// Align to accum_offset's allocation granularity.
557+
MinNumAGPRs = alignTo(MinNumAGPRs, 4);
558+
559+
MinNumAGPRs = std::min(MinNumAGPRs, TotalNumAGPRs);
560+
}
561+
562+
// Clamp values to be inbounds of our limits, and ensure min <= max.
563+
564+
MaxNumAGPRs = std::min(std::max(MinNumAGPRs, MaxNumAGPRs), MaxVectorRegs);
565+
MinNumAGPRs = std::min(std::min(MinNumAGPRs, TotalNumAGPRs), MaxNumAGPRs);
566+
567+
MaxNumVGPRs = std::min(MaxVectorRegs - MinNumAGPRs, TotalNumVGPRs);
568+
MaxNumAGPRs = std::min(MaxVectorRegs - MaxNumVGPRs, MaxNumAGPRs);
569+
570+
assert(MaxNumVGPRs + MaxNumAGPRs <= MaxVectorRegs &&
571+
MaxNumAGPRs <= TotalNumAGPRs && MaxNumVGPRs <= TotalNumVGPRs &&
572+
"invalid register counts");
573+
} else if (hasMAIInsts()) {
574+
// On gfx908 the number of AGPRs always equals the number of VGPRs.
575+
MaxNumAGPRs = MaxNumVGPRs = MaxVectorRegs;
576+
}
577+
578+
return std::pair(MaxNumVGPRs, MaxNumAGPRs);
579+
}
580+
524581
void GCNSubtarget::adjustSchedDependency(
525582
SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep,
526583
const TargetSchedModel *SchedModel) const {

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1600,6 +1600,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
16001600
return getMaxNumVGPRs(F);
16011601
}
16021602

1603+
/// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
1604+
/// of waves per execution unit required for the function \p MF.
1605+
std::pair<unsigned, unsigned> getMaxNumVectorRegs(const Function &F) const;
1606+
16031607
/// \returns Maximum number of VGPRs that meets number of waves per execution
16041608
/// unit requirement for function \p MF, or number of VGPRs explicitly
16051609
/// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.

llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
351351
MachineRegisterInfo &MRI = MF.getRegInfo();
352352
BitVector ReservedRegs = TRI->getReservedRegs(MF);
353353
BitVector NonWwmAllocMask(TRI->getNumRegs());
354+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
354355

355356
// FIXME: MaxNumVGPRsForWwmAllocation might need to be adjusted in the future
356357
// to have a balanced allocation between WWM values and per-thread vector
@@ -359,7 +360,7 @@ void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
359360
NumRegs =
360361
std::min(static_cast<unsigned>(MFI->getSGPRSpillVGPRs().size()), NumRegs);
361362

362-
auto [MaxNumVGPRs, MaxNumAGPRs] = TRI->getMaxNumVectorRegs(MF);
363+
auto [MaxNumVGPRs, MaxNumAGPRs] = ST.getMaxNumVectorRegs(MF.getFunction());
363364
// Try to use the highest available registers for now. Later after
364365
// vgpr-regalloc, they can be shifted to the lowest range.
365366
unsigned I = 0;

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 1 addition & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -570,65 +570,6 @@ MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(
570570
return getAlignedHighSGPRForRC(MF, /*Align=*/4, &AMDGPU::SGPR_128RegClass);
571571
}
572572

573-
std::pair<unsigned, unsigned>
574-
SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const {
575-
const unsigned MaxVectorRegs = ST.getMaxNumVGPRs(MF);
576-
577-
unsigned MaxNumVGPRs = MaxVectorRegs;
578-
unsigned MaxNumAGPRs = 0;
579-
580-
// On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
581-
// a wave may have up to 512 total vector registers combining together both
582-
// VGPRs and AGPRs. Hence, in an entry function without calls and without
583-
// AGPRs used within it, it is possible to use the whole vector register
584-
// budget for VGPRs.
585-
//
586-
// TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
587-
// register file accordingly.
588-
if (ST.hasGFX90AInsts()) {
589-
unsigned MinNumAGPRs = 0;
590-
const unsigned TotalNumAGPRs = AMDGPU::AGPR_32RegClass.getNumRegs();
591-
const unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
592-
593-
const std::pair<unsigned, unsigned> DefaultNumAGPR = {~0u, ~0u};
594-
595-
// TODO: Move this logic into subtarget on IR function
596-
//
597-
// TODO: The lower bound should probably force the number of required
598-
// registers up, overriding amdgpu-waves-per-eu.
599-
std::tie(MinNumAGPRs, MaxNumAGPRs) = AMDGPU::getIntegerPairAttribute(
600-
MF.getFunction(), "amdgpu-agpr-alloc", DefaultNumAGPR,
601-
/*OnlyFirstRequired=*/true);
602-
603-
if (MinNumAGPRs == DefaultNumAGPR.first) {
604-
// Default to splitting half the registers if AGPRs are required.
605-
MinNumAGPRs = MaxNumAGPRs = MaxVectorRegs / 2;
606-
} else {
607-
// Align to accum_offset's allocation granularity.
608-
MinNumAGPRs = alignTo(MinNumAGPRs, 4);
609-
610-
MinNumAGPRs = std::min(MinNumAGPRs, TotalNumAGPRs);
611-
}
612-
613-
// Clamp values to be inbounds of our limits, and ensure min <= max.
614-
615-
MaxNumAGPRs = std::min(std::max(MinNumAGPRs, MaxNumAGPRs), MaxVectorRegs);
616-
MinNumAGPRs = std::min(std::min(MinNumAGPRs, TotalNumAGPRs), MaxNumAGPRs);
617-
618-
MaxNumVGPRs = std::min(MaxVectorRegs - MinNumAGPRs, TotalNumVGPRs);
619-
MaxNumAGPRs = std::min(MaxVectorRegs - MaxNumVGPRs, MaxNumAGPRs);
620-
621-
assert(MaxNumVGPRs + MaxNumAGPRs <= MaxVectorRegs &&
622-
MaxNumAGPRs <= TotalNumAGPRs && MaxNumVGPRs <= TotalNumVGPRs &&
623-
"invalid register counts");
624-
} else if (ST.hasMAIInsts()) {
625-
// On gfx908 the number of AGPRs always equals the number of VGPRs.
626-
MaxNumAGPRs = MaxNumVGPRs = MaxVectorRegs;
627-
}
628-
629-
return std::pair(MaxNumVGPRs, MaxNumAGPRs);
630-
}
631-
632573
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
633574
BitVector Reserved(getNumRegs());
634575
Reserved.set(AMDGPU::MODE);
@@ -736,7 +677,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
736677

737678
// Reserve VGPRs/AGPRs.
738679
//
739-
auto [MaxNumVGPRs, MaxNumAGPRs] = getMaxNumVectorRegs(MF);
680+
auto [MaxNumVGPRs, MaxNumAGPRs] = ST.getMaxNumVectorRegs(MF.getFunction());
740681

741682
for (const TargetRegisterClass *RC : regclasses()) {
742683
if (RC->isBaseClass() && isVGPRClass(RC)) {

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
9090
/// spilling is needed.
9191
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
9292

93-
/// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
94-
/// of waves per execution unit required for the function \p MF.
95-
std::pair<unsigned, unsigned>
96-
getMaxNumVectorRegs(const MachineFunction &MF) const;
97-
9893
BitVector getReservedRegs(const MachineFunction &MF) const override;
9994
bool isAsmClobberable(const MachineFunction &MF,
10095
MCRegister PhysReg) const override;

0 commit comments

Comments
 (0)