Skip to content

Commit d4b1ab7

Browse files
authored
[AMDGPU] Examine instructions in pending queues during scheduling (llvm#147653)
Examine instructions in the pending queue when scheduling. This makes instructions visible to scheduling heuristics even when they aren't immediately issuable due to hardware resource constraints. The scheduler has two hardware resource modeling modes: an in-order mode where instructions must be ready to issue before scheduling, and out-of-order models where instructions are always visible to heuristics. Special handling exists for unbuffered processor resources in out-of-order models. These resources can cause pipeline stalls when used back-to-back, so they're typically avoided. However, for AMDGPU targets, managing register pressure and reducing spilling is critical enough to justify exceptions to this approach. This change enables examination of instructions that can't be immediately issued because they use an already occupied unbuffered resource. By making these instructions visible to scheduling heuristics anyway, we gain more flexibility in scheduling decisions, potentially allowing better register pressure and hardware resource management.
1 parent a7cda50 commit d4b1ab7

File tree

7 files changed

+1385
-1241
lines changed

7 files changed

+1385
-1241
lines changed

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 163 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ static cl::opt<bool> GCNTrackers(
6969
cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
7070
cl::init(false));
7171

72+
static cl::opt<unsigned> PendingQueueLimit(
73+
"amdgpu-scheduler-pending-queue-limit", cl::Hidden,
74+
cl::desc(
75+
"Max (Available+Pending) size to inspect pending queue (0 disables)"),
76+
cl::init(256));
77+
7278
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
7379
#define DUMP_MAX_REG_PRESSURE
7480
static cl::opt<bool> PrintMaxRPRegUsageBeforeScheduler(
@@ -335,17 +341,52 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
335341
}
336342
}
337343

344+
static bool shouldCheckPending(SchedBoundary &Zone,
345+
const TargetSchedModel *SchedModel) {
346+
bool HasBufferedModel =
347+
SchedModel->hasInstrSchedModel() && SchedModel->getMicroOpBufferSize();
348+
unsigned Combined = Zone.Available.size() + Zone.Pending.size();
349+
return Combined <= PendingQueueLimit && HasBufferedModel;
350+
}
351+
352+
static SUnit *pickOnlyChoice(SchedBoundary &Zone,
353+
const TargetSchedModel *SchedModel) {
354+
// pickOnlyChoice() releases pending instructions and checks for new hazards.
355+
SUnit *OnlyChoice = Zone.pickOnlyChoice();
356+
if (!shouldCheckPending(Zone, SchedModel) || Zone.Pending.empty())
357+
return OnlyChoice;
358+
359+
return nullptr;
360+
}
361+
362+
void GCNSchedStrategy::printCandidateDecision(const SchedCandidate &Current,
363+
const SchedCandidate &Preferred) {
364+
LLVM_DEBUG({
365+
dbgs() << "Prefer:\t\t";
366+
DAG->dumpNode(*Preferred.SU);
367+
368+
if (Current.SU) {
369+
dbgs() << "Not:\t";
370+
DAG->dumpNode(*Current.SU);
371+
}
372+
373+
dbgs() << "Reason:\t\t";
374+
traceCandidate(Preferred);
375+
});
376+
}
377+
338378
// This function is mostly cut and pasted from
339379
// GenericScheduler::pickNodeFromQueue()
340380
void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
341381
const CandPolicy &ZonePolicy,
342382
const RegPressureTracker &RPTracker,
343-
SchedCandidate &Cand,
383+
SchedCandidate &Cand, bool &IsPending,
344384
bool IsBottomUp) {
345385
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
346386
ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
347387
unsigned SGPRPressure = 0;
348388
unsigned VGPRPressure = 0;
389+
IsPending = false;
349390
if (DAG->isTrackingPressure()) {
350391
if (!GCNTrackers) {
351392
SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
@@ -358,8 +399,9 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
358399
VGPRPressure = T->getPressure().getArchVGPRNum();
359400
}
360401
}
361-
ReadyQueue &Q = Zone.Available;
362-
for (SUnit *SU : Q) {
402+
LLVM_DEBUG(dbgs() << "Available Q:\n");
403+
ReadyQueue &AQ = Zone.Available;
404+
for (SUnit *SU : AQ) {
363405

364406
SchedCandidate TryCand(ZonePolicy);
365407
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
@@ -371,40 +413,70 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
371413
// Initialize resource delta if needed in case future heuristics query it.
372414
if (TryCand.ResDelta == SchedResourceDelta())
373415
TryCand.initResourceDelta(Zone.DAG, SchedModel);
416+
LLVM_DEBUG(printCandidateDecision(Cand, TryCand));
374417
Cand.setBest(TryCand);
375-
LLVM_DEBUG(traceCandidate(Cand));
418+
} else {
419+
printCandidateDecision(TryCand, Cand);
420+
}
421+
}
422+
423+
if (!shouldCheckPending(Zone, SchedModel))
424+
return;
425+
426+
LLVM_DEBUG(dbgs() << "Pending Q:\n");
427+
ReadyQueue &PQ = Zone.Pending;
428+
for (SUnit *SU : PQ) {
429+
430+
SchedCandidate TryCand(ZonePolicy);
431+
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
432+
VGPRPressure, IsBottomUp);
433+
// Pass SchedBoundary only when comparing nodes from the same boundary.
434+
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
435+
tryPendingCandidate(Cand, TryCand, ZoneArg);
436+
if (TryCand.Reason != NoCand) {
437+
// Initialize resource delta if needed in case future heuristics query it.
438+
if (TryCand.ResDelta == SchedResourceDelta())
439+
TryCand.initResourceDelta(Zone.DAG, SchedModel);
440+
LLVM_DEBUG(printCandidateDecision(Cand, TryCand));
441+
IsPending = true;
442+
Cand.setBest(TryCand);
443+
} else {
444+
printCandidateDecision(TryCand, Cand);
376445
}
377446
}
378447
}
379448

380449
// This function is mostly cut and pasted from
381450
// GenericScheduler::pickNodeBidirectional()
382-
SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
451+
SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode,
452+
bool &PickedPending) {
383453
// Schedule as far as possible in the direction of no choice. This is most
384454
// efficient, but also provides the best heuristics for CriticalPSets.
385-
if (SUnit *SU = Bot.pickOnlyChoice()) {
455+
if (SUnit *SU = pickOnlyChoice(Bot, SchedModel)) {
386456
IsTopNode = false;
387457
return SU;
388458
}
389-
if (SUnit *SU = Top.pickOnlyChoice()) {
459+
if (SUnit *SU = pickOnlyChoice(Top, SchedModel)) {
390460
IsTopNode = true;
391461
return SU;
392462
}
393-
// Set the bottom-up policy based on the state of the current bottom zone and
394-
// the instructions outside the zone, including the top zone.
463+
// Set the bottom-up policy based on the state of the current bottom zone
464+
// and the instructions outside the zone, including the top zone.
395465
CandPolicy BotPolicy;
396466
setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
397467
// Set the top-down policy based on the state of the current top zone and
398468
// the instructions outside the zone, including the bottom zone.
399469
CandPolicy TopPolicy;
400470
setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
401471

472+
bool BotPending = false;
402473
// See if BotCand is still valid (because we previously scheduled from Top).
403474
LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
404475
if (!BotCand.isValid() || BotCand.SU->isScheduled ||
405476
BotCand.Policy != BotPolicy) {
406477
BotCand.reset(CandPolicy());
407478
pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand,
479+
BotPending,
408480
/*IsBottomUp=*/true);
409481
assert(BotCand.Reason != NoCand && "failed to find the first candidate");
410482
} else {
@@ -414,19 +486,22 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
414486
SchedCandidate TCand;
415487
TCand.reset(CandPolicy());
416488
pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand,
489+
BotPending,
417490
/*IsBottomUp=*/true);
418491
assert(TCand.SU == BotCand.SU &&
419492
"Last pick result should correspond to re-picking right now");
420493
}
421494
#endif
422495
}
423496

497+
bool TopPending = false;
424498
// Check if the top Q has a better candidate.
425499
LLVM_DEBUG(dbgs() << "Picking from Top:\n");
426500
if (!TopCand.isValid() || TopCand.SU->isScheduled ||
427501
TopCand.Policy != TopPolicy) {
428502
TopCand.reset(CandPolicy());
429503
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand,
504+
TopPending,
430505
/*IsBottomUp=*/false);
431506
assert(TopCand.Reason != NoCand && "failed to find the first candidate");
432507
} else {
@@ -436,6 +511,7 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
436511
SchedCandidate TCand;
437512
TCand.reset(CandPolicy());
438513
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand,
514+
TopPending,
439515
/*IsBottomUp=*/false);
440516
assert(TCand.SU == TopCand.SU &&
441517
"Last pick result should correspond to re-picking right now");
@@ -446,12 +522,21 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
446522
// Pick best from BotCand and TopCand.
447523
LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
448524
dbgs() << "Bot Cand: "; traceCandidate(BotCand););
449-
SchedCandidate Cand = BotCand;
450-
TopCand.Reason = NoCand;
451-
tryCandidate(Cand, TopCand, nullptr);
452-
if (TopCand.Reason != NoCand) {
453-
Cand.setBest(TopCand);
525+
SchedCandidate Cand = BotPending ? TopCand : BotCand;
526+
SchedCandidate TryCand = BotPending ? BotCand : TopCand;
527+
PickedPending = BotPending && TopPending;
528+
529+
TryCand.Reason = NoCand;
530+
if (BotPending || TopPending) {
531+
PickedPending |= tryPendingCandidate(Cand, TopCand, nullptr);
532+
} else {
533+
tryCandidate(Cand, TryCand, nullptr);
534+
}
535+
536+
if (TryCand.Reason != NoCand) {
537+
Cand.setBest(TryCand);
454538
}
539+
455540
LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
456541

457542
IsTopNode = Cand.AtTop;
@@ -466,35 +551,55 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
466551
Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
467552
return nullptr;
468553
}
554+
bool PickedPending;
469555
SUnit *SU;
470556
do {
557+
PickedPending = false;
471558
if (RegionPolicy.OnlyTopDown) {
472-
SU = Top.pickOnlyChoice();
559+
SU = pickOnlyChoice(Top, SchedModel);
473560
if (!SU) {
474561
CandPolicy NoPolicy;
475562
TopCand.reset(NoPolicy);
476563
pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand,
564+
PickedPending,
477565
/*IsBottomUp=*/false);
478566
assert(TopCand.Reason != NoCand && "failed to find a candidate");
479567
SU = TopCand.SU;
480568
}
481569
IsTopNode = true;
482570
} else if (RegionPolicy.OnlyBottomUp) {
483-
SU = Bot.pickOnlyChoice();
571+
SU = pickOnlyChoice(Bot, SchedModel);
484572
if (!SU) {
485573
CandPolicy NoPolicy;
486574
BotCand.reset(NoPolicy);
487575
pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand,
576+
PickedPending,
488577
/*IsBottomUp=*/true);
489578
assert(BotCand.Reason != NoCand && "failed to find a candidate");
490579
SU = BotCand.SU;
491580
}
492581
IsTopNode = false;
493582
} else {
494-
SU = pickNodeBidirectional(IsTopNode);
583+
SU = pickNodeBidirectional(IsTopNode, PickedPending);
495584
}
496585
} while (SU->isScheduled);
497586

587+
if (PickedPending) {
588+
unsigned ReadyCycle = IsTopNode ? SU->TopReadyCycle : SU->BotReadyCycle;
589+
SchedBoundary &Zone = IsTopNode ? Top : Bot;
590+
unsigned CurrentCycle = Zone.getCurrCycle();
591+
if (ReadyCycle > CurrentCycle)
592+
Zone.bumpCycle(ReadyCycle);
593+
594+
// FIXME: checkHazard() doesn't give information about which cycle the
595+
// hazard will resolve so just keep bumping the cycle by 1. This could be
596+
// made more efficient if checkHazard() returned more details.
597+
while (Zone.checkHazard(SU))
598+
Zone.bumpCycle(Zone.getCurrCycle() + 1);
599+
600+
Zone.releasePending();
601+
}
602+
498603
if (SU->isTopReady())
499604
Top.removeReady(SU);
500605
if (SU->isBottomReady())
@@ -540,6 +645,47 @@ GCNSchedStageID GCNSchedStrategy::getNextStage() const {
540645
return *std::next(CurrentStage);
541646
}
542647

648+
bool GCNSchedStrategy::tryPendingCandidate(SchedCandidate &Cand,
649+
SchedCandidate &TryCand,
650+
SchedBoundary *Zone) const {
651+
// Initialize the candidate if needed.
652+
if (!Cand.isValid()) {
653+
TryCand.Reason = NodeOrder;
654+
return true;
655+
}
656+
657+
// Bias PhysReg Defs and copies to their uses and defined respectively.
658+
if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
659+
biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
660+
return TryCand.Reason != NoCand;
661+
662+
// Avoid exceeding the target's limit.
663+
if (DAG->isTrackingPressure() &&
664+
tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
665+
RegExcess, TRI, DAG->MF))
666+
return TryCand.Reason != NoCand;
667+
668+
// Avoid increasing the max critical pressure in the scheduled region.
669+
if (DAG->isTrackingPressure() &&
670+
tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
671+
TryCand, Cand, RegCritical, TRI, DAG->MF))
672+
return TryCand.Reason != NoCand;
673+
674+
bool SameBoundary = Zone != nullptr;
675+
if (SameBoundary) {
676+
TryCand.initResourceDelta(DAG, SchedModel);
677+
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
678+
TryCand, Cand, ResourceReduce))
679+
return TryCand.Reason != NoCand;
680+
if (tryGreater(TryCand.ResDelta.DemandedResources,
681+
Cand.ResDelta.DemandedResources, TryCand, Cand,
682+
ResourceDemand))
683+
return TryCand.Reason != NoCand;
684+
}
685+
686+
return false;
687+
}
688+
543689
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
544690
const MachineSchedContext *C, bool IsLegacyScheduler)
545691
: GCNSchedStrategy(C) {

llvm/lib/Target/AMDGPU/GCNSchedStrategy.h

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,17 +44,34 @@ raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
4444
/// heuristics to determine excess/critical pressure sets.
4545
class GCNSchedStrategy : public GenericScheduler {
4646
protected:
47-
SUnit *pickNodeBidirectional(bool &IsTopNode);
47+
SUnit *pickNodeBidirectional(bool &IsTopNode, bool &PickedPending);
4848

4949
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
5050
const RegPressureTracker &RPTracker,
51-
SchedCandidate &Cand, bool IsBottomUp);
51+
SchedCandidate &Cand, bool &IsPending,
52+
bool IsBottomUp);
5253

5354
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
5455
const RegPressureTracker &RPTracker,
5556
const SIRegisterInfo *SRI, unsigned SGPRPressure,
5657
unsigned VGPRPressure, bool IsBottomUp);
5758

59+
/// Evaluates instructions in the pending queue using a subset of scheduling
60+
/// heuristics.
61+
///
62+
/// Instructions that cannot be issued due to hardware constraints are placed
63+
/// in the pending queue rather than the available queue, making them normally
64+
/// invisible to scheduling heuristics. However, in certain scenarios (such as
65+
/// avoiding register spilling), it may be beneficial to consider scheduling
66+
/// these not-yet-ready instructions.
67+
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
68+
SchedBoundary *Zone) const;
69+
70+
#ifndef NDEBUG
71+
void printCandidateDecision(const SchedCandidate &Current,
72+
const SchedCandidate &Preferred);
73+
#endif
74+
5875
std::vector<unsigned> Pressure;
5976

6077
std::vector<unsigned> MaxPressure;

0 commit comments

Comments
 (0)