Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 38 additions & 8 deletions llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
VGPRExcessLimit =
Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
AGPRExcessLimit =
Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::AGPR_32RegClass);

SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
// Set the initial TargetOccupnacy to the maximum occupancy that we can
Expand All @@ -98,6 +100,9 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
SGPRCriticalLimit =
std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);

AGPRCriticalLimit =
std::min(ST.getMaxNumAGPRs(TargetOccupancy), AGPRExcessLimit);

if (!KnownExcessRP) {
VGPRCriticalLimit = std::min(
ST.getMaxNumVGPRs(TargetOccupancy, MFI.getDynamicVGPRBlockSize()),
Expand Down Expand Up @@ -201,7 +206,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
const RegPressureTracker &RPTracker,
const SIRegisterInfo *SRI,
unsigned SGPRPressure,
unsigned VGPRPressure, bool IsBottomUp) {
unsigned VGPRPressure,
unsigned AGPRPressure, bool IsBottomUp) {
Cand.SU = SU;
Cand.AtTop = AtTop;

Expand Down Expand Up @@ -230,6 +236,7 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
Pressure.resize(4, 0);
Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = AGPRPressure;

for (const auto &Diff : DAG->getPressureDiff(SU)) {
if (!Diff.isValid())
Expand All @@ -247,22 +254,28 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] ||
Pressure[AMDGPU::RegisterPressureSets::AGPR_32] !=
CheckPressure[AMDGPU::RegisterPressureSets::AGPR_32]) {
errs() << "Register Pressure is inaccurate when calculated through "
"PressureDiff\n"
<< "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]
<< ", expected "
<< CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n"
<< "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
<< ", expected "
<< CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n";
<< CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n"
<< "AGPR got " << Pressure[AMDGPU::RegisterPressureSets::AGPR_32]
<< ", expected "
<< CheckPressure[AMDGPU::RegisterPressureSets::AGPR_32] << "\n";
report_fatal_error("inaccurate register pressure calculation");
}
#endif
}

unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
unsigned NewAGPRPressure = Pressure[AMDGPU::RegisterPressureSets::AGPR_32];

// If two instructions increase the pressure of different register sets
// by the same amount, the generic scheduler will prefer to schedule the
Expand All @@ -272,9 +285,11 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
// only for VGPRs or only for SGPRs.

// FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
const unsigned MaxVGPRPressureInc = 16;
static constexpr unsigned MaxVGPRPressureInc = 16;
bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
bool ShouldTrackAGPRs = !ShouldTrackVGPRs && AGPRPressure >= AGPRExcessLimit;
bool ShouldTrackSGPRs =
!ShouldTrackVGPRs && !ShouldTrackAGPRs && SGPRPressure >= SGPRExcessLimit;

// FIXME: We have to enter REG-EXCESS before we reach the actual threshold
// to increase the likelihood we don't go over the limits. We should improve
Expand All @@ -291,6 +306,12 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
}

if (ShouldTrackAGPRs && NewAGPRPressure >= AGPRExcessLimit) {
HasHighPressure = true;
Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::AGPR_32);
Cand.RPDelta.Excess.setUnitInc(NewAGPRPressure - AGPRExcessLimit);
}

if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
HasHighPressure = true;
Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
Expand All @@ -304,13 +325,19 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,

int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
int AGPRDelta = NewAGPRPressure - AGPRCriticalLimit;

if (SGPRDelta >= 0 || VGPRDelta >= 0) {
if (SGPRDelta >= 0 || VGPRDelta >= 0 || AGPRDelta >= 0) {
HasHighPressure = true;
if (SGPRDelta > VGPRDelta) {
// Prioritize reducing the VGPRDelta if both are >= 0
if (SGPRDelta > VGPRDelta && SGPRDelta > AGPRDelta) {
Cand.RPDelta.CriticalMax =
PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
} else if (AGPRDelta > VGPRDelta) {
Cand.RPDelta.CriticalMax =
PressureChange(AMDGPU::RegisterPressureSets::AGPR_32);
Cand.RPDelta.CriticalMax.setUnitInc(AGPRDelta);
} else {
Cand.RPDelta.CriticalMax =
PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
Expand All @@ -330,24 +357,27 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
unsigned SGPRPressure = 0;
unsigned VGPRPressure = 0;
unsigned AGPRPressure = 0;
if (DAG->isTrackingPressure()) {
if (!GCNTrackers) {
SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
AGPRPressure = Pressure[AMDGPU::RegisterPressureSets::AGPR_32];
} else {
GCNRPTracker *T = IsBottomUp
? static_cast<GCNRPTracker *>(&UpwardTracker)
: static_cast<GCNRPTracker *>(&DownwardTracker);
SGPRPressure = T->getPressure().getSGPRNum();
VGPRPressure = T->getPressure().getArchVGPRNum();
AGPRPressure = T->getPressure().getAGPRNum();
}
}
ReadyQueue &Q = Zone.Available;
for (SUnit *SU : Q) {

SchedCandidate TryCand(ZonePolicy);
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
VGPRPressure, IsBottomUp);
VGPRPressure, AGPRPressure, IsBottomUp);
// Pass SchedBoundary only when comparing nodes from the same boundary.
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
tryCandidate(Cand, TryCand, ZoneArg);
Expand Down
10 changes: 7 additions & 3 deletions llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ class GCNSchedStrategy : public GenericScheduler {
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
const RegPressureTracker &RPTracker,
const SIRegisterInfo *SRI, unsigned SGPRPressure,
unsigned VGPRPressure, bool IsBottomUp);
unsigned VGPRPressure, unsigned AGPRPressure,
bool IsBottomUp);

std::vector<unsigned> Pressure;

Expand All @@ -63,6 +64,8 @@ class GCNSchedStrategy : public GenericScheduler {

unsigned VGPRExcessLimit;

unsigned AGPRExcessLimit;

unsigned TargetOccupancy;

MachineFunction *MF;
Expand Down Expand Up @@ -103,6 +106,8 @@ class GCNSchedStrategy : public GenericScheduler {

unsigned VGPRCriticalLimit;

unsigned AGPRCriticalLimit;

unsigned SGPRLimitBias = 0;

unsigned VGPRLimitBias = 0;
Expand Down Expand Up @@ -183,8 +188,7 @@ class ScheduleMetrics {
};

inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
dbgs() << "\n Schedule Metric (scaled by "
<< ScheduleMetrics::ScaleFactor
dbgs() << "\n Schedule Metric (scaled by " << ScheduleMetrics::ScaleFactor
<< " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
<< Sm.getLength() << " ]\n";
return OS;
Expand Down
12 changes: 4 additions & 8 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -1722,8 +1722,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// unit requirement.
unsigned getMaxNumVGPRs(const Function &F) const;

unsigned getMaxNumAGPRs(const Function &F) const {
return getMaxNumVGPRs(F);
unsigned getMaxNumAGPRs(unsigned WavesPerEU) const {
return AMDGPU::IsaInfo::getMaxNumAGPRs(this, WavesPerEU);
}

/// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
Expand All @@ -1744,13 +1744,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

bool supportsWave64() const { return !hasGFX1250Insts(); }

bool isWave32() const {
return getWavefrontSize() == 32;
}
bool isWave32() const { return getWavefrontSize() == 32; }

bool isWave64() const {
return getWavefrontSize() == 64;
}
bool isWave64() const { return getWavefrontSize() == 64; }

/// Returns if the wavesize of this subtarget is known reliable. This is false
/// only for the a default target-cpu that does not have an explicit
Expand Down
16 changes: 16 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1494,6 +1494,22 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
}

unsigned getMaxNumAGPRs(const MCSubtargetInfo *STI, unsigned int WavesPerEU) {
if (!STI->getFeatureBits().test(FeatureMAIInsts))
return 0;

assert(WavesPerEU != 0);

assert(!STI->getFeatureBits().test(FeatureDynamicVGPR));

unsigned MaxNumAGPRs =
alignTo(getTotalNumVGPRs(STI) / WavesPerEU, getVGPRAllocGranule(STI, 0));
unsigned AddressableNumAGPRs = getAddressableNumArchVGPRs(STI);
return std::min(MaxNumAGPRs, AddressableNumAGPRs);
}

unsigned getAddressableNumAGPRs(const MCSubtargetInfo *STI) { return 256; }

unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
std::optional<bool> EnableWavefrontSize32) {
return getGranulatedNumRegisterBlocks(
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,13 @@ unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
unsigned MaxWaves,
unsigned TotalNumVGPRs);

/// \returns Maximum number of AGPRs that meets given number of waves per
/// execution unit requirement for given subtarget \p STI.
unsigned getMaxNumAGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);

/// \returns Addressable number of AGPRs for a given subtarget \p STI.
unsigned getAddressableNumAGPRs(const MCSubtargetInfo *STI);

/// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
/// Gen.
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
Expand Down
Loading