Skip to content

Commit 78e8d97

Browse files
pratikasharigcbot
authored andcommitted
Add subtree selection heuristic to preRA scheduler
When choosing between 2 nodes with same SU number, allow breaking tie based on subtree selection heuristic. Pick a node if it's direct predecessor of last scheduled node. This allows us to retire the busy GRF faster and reduce register pressure quickly. Currently this feature is disabled and must be enabled by setting a bit in pre-sched ctrl.
1 parent 1512e18 commit 78e8d97

File tree

1 file changed

+78
-8
lines changed

1 file changed

+78
-8
lines changed

visa/LocalScheduler/G4_Sched.cpp

Lines changed: 78 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -444,21 +444,24 @@ struct SchedConfig {
444444
MASK_SKIP_CLUSTER = 1U << 3,
445445
MASK_SKIP_HOLD = 1U << 4,
446446
MASK_NOT_ITERATE = 1U << 5,
447+
MASK_TRY_SUBTREE_SCHEDULE = 1U << 6,
447448
};
448449
unsigned Dump : 1;
449450
unsigned UseLatency : 1;
450451
unsigned UseMinReg : 1;
451452
unsigned SkipClustering : 1; // default 0 i.e. try min-reg with clustering
452453
unsigned SkipHoldList : 1; // default 0 i.e. use hold list in latency-hiding
453454
unsigned DoNotIterate : 1; // default 0 i.e. iterative latency-scheduling
455+
unsigned TrySubtreeSchedule : 1; // prefer sub-tree schedule heuristic
454456

455457
explicit SchedConfig(unsigned Config)
456458
: Dump((Config & MASK_DUMP) != 0),
457459
UseLatency((Config & MASK_LATENCY) != 0),
458460
UseMinReg((Config & MASK_MIN_REG) != 0),
459461
SkipClustering((Config & MASK_SKIP_CLUSTER) != 0),
460462
SkipHoldList((Config & MASK_SKIP_HOLD) != 0),
461-
DoNotIterate((Config & MASK_NOT_ITERATE) != 0) {}
463+
DoNotIterate((Config & MASK_NOT_ITERATE) != 0),
464+
TrySubtreeSchedule((Config & MASK_TRY_SUBTREE_SCHEDULE) != 0) {}
462465
};
463466

464467
#define SCHED_DUMP(X) \
@@ -512,7 +515,8 @@ class BB_Scheduler {
512515
// UpperBoundGRF is the measure max reg-pressure of this kernel before scheduling
513516
bool scheduleBlockForLatency(unsigned &MaxPressure, bool ReassignID,
514517
unsigned UpperBoundGRF);
515-
void SethiUllmanScheduling(bool DoClustering);
518+
void SethiUllmanScheduling(bool DoClustering,
519+
bool UseSubtreeHeuristic = false);
516520

517521
private:
518522
void LatencyScheduling(unsigned GroupingThreshold);
@@ -823,9 +827,15 @@ class SethiUllmanQueue : public QueueBase {
823827
// the max time-stamp among node uses
824828
std::vector<unsigned> LiveTS;
825829

830+
std::vector<G4_INST *> &schedule;
831+
832+
bool UseSubtreeHeuristic = false;
833+
826834
public:
827-
SethiUllmanQueue(preDDD &ddd, RegisterPressure &rp, SchedConfig config)
828-
: QueueBase(ddd, rp, config) {
835+
SethiUllmanQueue(preDDD &ddd, RegisterPressure &rp, SchedConfig config,
836+
std::vector<G4_INST *> &s, bool SubtreeHeuristic)
837+
: QueueBase(ddd, rp, config), schedule(s) {
838+
UseSubtreeHeuristic = SubtreeHeuristic;
829839
init();
830840
}
831841

@@ -839,7 +849,7 @@ class SethiUllmanQueue : public QueueBase {
839849

840850
bool empty() const { return Q.empty(); }
841851

842-
friend void BB_Scheduler::SethiUllmanScheduling(bool);
852+
friend void BB_Scheduler::SethiUllmanScheduling(bool, bool);
843853

844854
private:
845855
// Initialize Sethi-Ullman numbers.
@@ -973,6 +983,36 @@ bool SethiUllmanQueue::compare(preNode *N1, preNode *N2) {
973983
if (SU1 > SU2)
974984
return true;
975985

986+
if (UseSubtreeHeuristic && !schedule.empty()) {
987+
// Select next node from queue that's a predecessor of last scheduled node.
988+
// This can retire registers early, but it can worsen latency.
989+
//
990+
// If SU1 is immediate parent of last scheduled node then select it.
991+
// If SU2 is immediate parent of last scheduled node then select it.
992+
G4_INST *LastScheduled = nullptr;
993+
for (auto RI = schedule.rbegin(); RI != schedule.rend(); ++RI) {
994+
auto *Inst = (*RI);
995+
if (Inst->isPseudoKill())
996+
continue;
997+
LastScheduled = Inst;
998+
break;
999+
}
1000+
if (LastScheduled) {
1001+
auto &SuccS1 = N1->Succs;
1002+
auto &SuccS2 = N2->Succs;
1003+
for (auto &EdgeS1 : SuccS1) {
1004+
auto *SuccS1 = EdgeS1.getNode();
1005+
if (SuccS1->getInst() == LastScheduled)
1006+
return false;
1007+
}
1008+
for (auto &EdgeS2 : SuccS2) {
1009+
auto *SuccS2 = EdgeS2.getNode();
1010+
if (SuccS2->getInst() == LastScheduled)
1011+
return true;
1012+
}
1013+
}
1014+
}
1015+
9761016
// Otherwise, break tie with their IDs. Smaller ID means higher priority.
9771017
return N1->getID() > N2->getID();
9781018
}
@@ -1153,8 +1193,21 @@ bool BB_Scheduler::scheduleBlockForPressure(unsigned &MaxPressure,
11531193
// try clustering first
11541194
SethiUllmanScheduling(true);
11551195
if (commitIfBeneficial(MaxPressure)) {
1196+
// If MaxPressure is still > 2x Threshold, attempt subtree scheduling
1197+
// heuristic. This costs compile time, so run it only if pressure is
1198+
// very high.
1199+
bool SubtreeHeuristicChosen = false;
1200+
if (MaxPressure > (2 * Threshold) && config.TrySubtreeSchedule) {
1201+
ddd.reset(false);
1202+
SethiUllmanScheduling(false, true);
1203+
if (commitIfBeneficial(MaxPressure)) {
1204+
SCHED_DUMP(std::cerr << "Chose subtree heuristic\n");
1205+
SubtreeHeuristicChosen = true;
1206+
}
1207+
}
1208+
if (!SubtreeHeuristicChosen)
1209+
kernel.fg.builder->getJitInfo()->statsVerbose.minRegClusterCount++;
11561210
SCHED_DUMP(rp.dump(ddd.getBB(), "After scheduling for presssure, "));
1157-
kernel.fg.builder->getJitInfo()->statsVerbose.minRegClusterCount++;
11581211
Changed = true;
11591212
} else {
11601213
ddd.reset(false);
@@ -1164,9 +1217,25 @@ bool BB_Scheduler::scheduleBlockForPressure(unsigned &MaxPressure,
11641217
// try not-clustering
11651218
SethiUllmanScheduling(false);
11661219
if (commitIfBeneficial(MaxPressure)) {
1220+
if (MaxPressure > (2 * Threshold) && config.TrySubtreeSchedule) {
1221+
SethiUllmanScheduling(false, true);
1222+
commitIfBeneficial(MaxPressure);
1223+
}
11671224
SCHED_DUMP(rp.dump(ddd.getBB(), "After scheduling for presssure, "));
11681225
kernel.fg.builder->getJitInfo()->statsVerbose.minRegSUCount++;
11691226
Changed = true;
1227+
} else if (config.TrySubtreeSchedule) {
1228+
ddd.reset(false);
1229+
}
1230+
}
1231+
1232+
if (!Changed) {
1233+
if (MaxPressure > (2 * Threshold) && config.TrySubtreeSchedule) {
1234+
SethiUllmanScheduling(false, true);
1235+
if (commitIfBeneficial(MaxPressure)) {
1236+
SCHED_DUMP(rp.dump(ddd.getBB(), "After scheduling for presssure, "));
1237+
Changed = true;
1238+
}
11701239
}
11711240
}
11721241
}
@@ -1175,9 +1244,10 @@ bool BB_Scheduler::scheduleBlockForPressure(unsigned &MaxPressure,
11751244
return Changed;
11761245
}
11771246

1178-
void BB_Scheduler::SethiUllmanScheduling(bool DoClustering) {
1247+
void BB_Scheduler::SethiUllmanScheduling(bool DoClustering,
1248+
bool UseSubtreeHeuristic) {
11791249
schedule.clear();
1180-
SethiUllmanQueue Q(ddd, rp, config);
1250+
SethiUllmanQueue Q(ddd, rp, config, schedule, UseSubtreeHeuristic);
11811251
Q.push(ddd.getExitNode());
11821252

11831253
while (!Q.empty()) {

0 commit comments

Comments
 (0)