@@ -444,21 +444,24 @@ struct SchedConfig {
444
444
MASK_SKIP_CLUSTER = 1U << 3 ,
445
445
MASK_SKIP_HOLD = 1U << 4 ,
446
446
MASK_NOT_ITERATE = 1U << 5 ,
447
+ MASK_TRY_SUBTREE_SCHEDULE = 1U << 6 ,
447
448
};
448
449
unsigned Dump : 1 ;
449
450
unsigned UseLatency : 1 ;
450
451
unsigned UseMinReg : 1 ;
451
452
unsigned SkipClustering : 1 ; // default 0 i.e. try min-reg with clustering
452
453
unsigned SkipHoldList : 1 ; // default 0 i.e. use hold list in latency-hiding
453
454
unsigned DoNotIterate : 1 ; // default 0 i.e. iterative latency-scheduling
455
+ unsigned TrySubtreeSchedule : 1 ; // prefer sub-tree schedule heuristic
454
456
455
457
explicit SchedConfig (unsigned Config)
456
458
: Dump((Config & MASK_DUMP) != 0),
457
459
UseLatency((Config & MASK_LATENCY) != 0),
458
460
UseMinReg((Config & MASK_MIN_REG) != 0),
459
461
SkipClustering((Config & MASK_SKIP_CLUSTER) != 0),
460
462
SkipHoldList((Config & MASK_SKIP_HOLD) != 0),
461
- DoNotIterate((Config & MASK_NOT_ITERATE) != 0) {}
463
+ DoNotIterate((Config & MASK_NOT_ITERATE) != 0),
464
+ TrySubtreeSchedule((Config & MASK_TRY_SUBTREE_SCHEDULE) != 0) {}
462
465
};
463
466
464
467
#define SCHED_DUMP (X ) \
@@ -512,7 +515,8 @@ class BB_Scheduler {
512
515
// UpperBoundGRF is the measure max reg-pressure of this kernel before scheduling
513
516
bool scheduleBlockForLatency (unsigned &MaxPressure, bool ReassignID,
514
517
unsigned UpperBoundGRF);
515
- void SethiUllmanScheduling (bool DoClustering);
518
+ void SethiUllmanScheduling (bool DoClustering,
519
+ bool UseSubtreeHeuristic = false );
516
520
517
521
private:
518
522
void LatencyScheduling (unsigned GroupingThreshold);
@@ -823,9 +827,15 @@ class SethiUllmanQueue : public QueueBase {
823
827
// the max time-stamp among node uses
824
828
std::vector<unsigned > LiveTS;
825
829
830
+ std::vector<G4_INST *> &schedule;
831
+
832
+ bool UseSubtreeHeuristic = false ;
833
+
826
834
public:
827
- SethiUllmanQueue (preDDD &ddd, RegisterPressure &rp, SchedConfig config)
828
- : QueueBase(ddd, rp, config) {
835
+ SethiUllmanQueue (preDDD &ddd, RegisterPressure &rp, SchedConfig config,
836
+ std::vector<G4_INST *> &s, bool SubtreeHeuristic)
837
+ : QueueBase(ddd, rp, config), schedule(s) {
838
+ UseSubtreeHeuristic = SubtreeHeuristic;
829
839
init ();
830
840
}
831
841
@@ -839,7 +849,7 @@ class SethiUllmanQueue : public QueueBase {
839
849
840
850
bool empty () const { return Q.empty (); }
841
851
842
- friend void BB_Scheduler::SethiUllmanScheduling (bool );
852
+ friend void BB_Scheduler::SethiUllmanScheduling (bool , bool );
843
853
844
854
private:
845
855
// Initialize Sethi-Ullman numbers.
@@ -973,6 +983,36 @@ bool SethiUllmanQueue::compare(preNode *N1, preNode *N2) {
973
983
if (SU1 > SU2)
974
984
return true ;
975
985
986
+ if (UseSubtreeHeuristic && !schedule.empty ()) {
987
+ // Select next node from queue that's a predecessor of last scheduled node.
988
+ // This can retire registers early, but it can worsen latency.
989
+ //
990
+ // If SU1 is immediate parent of last scheduled node then select it.
991
+ // If SU2 is immediate parent of last scheduled node then select it.
992
+ G4_INST *LastScheduled = nullptr ;
993
+ for (auto RI = schedule.rbegin (); RI != schedule.rend (); ++RI) {
994
+ auto *Inst = (*RI);
995
+ if (Inst->isPseudoKill ())
996
+ continue ;
997
+ LastScheduled = Inst;
998
+ break ;
999
+ }
1000
+ if (LastScheduled) {
1001
+ auto &SuccS1 = N1->Succs ;
1002
+ auto &SuccS2 = N2->Succs ;
1003
+ for (auto &EdgeS1 : SuccS1) {
1004
+ auto *SuccS1 = EdgeS1.getNode ();
1005
+ if (SuccS1->getInst () == LastScheduled)
1006
+ return false ;
1007
+ }
1008
+ for (auto &EdgeS2 : SuccS2) {
1009
+ auto *SuccS2 = EdgeS2.getNode ();
1010
+ if (SuccS2->getInst () == LastScheduled)
1011
+ return true ;
1012
+ }
1013
+ }
1014
+ }
1015
+
976
1016
// Otherwise, break tie with their IDs. Smaller ID means higher priority.
977
1017
return N1->getID () > N2->getID ();
978
1018
}
@@ -1153,8 +1193,21 @@ bool BB_Scheduler::scheduleBlockForPressure(unsigned &MaxPressure,
1153
1193
// try clustering first
1154
1194
SethiUllmanScheduling (true );
1155
1195
if (commitIfBeneficial (MaxPressure)) {
1196
+ // If MaxPressure is still > 2x Threshold, attempt subtree scheduling
1197
+ // heuristic. This costs compile time, so run it only if pressure is
1198
+ // very high.
1199
+ bool SubtreeHeuristicChosen = false ;
1200
+ if (MaxPressure > (2 * Threshold) && config.TrySubtreeSchedule ) {
1201
+ ddd.reset (false );
1202
+ SethiUllmanScheduling (false , true );
1203
+ if (commitIfBeneficial (MaxPressure)) {
1204
+ SCHED_DUMP (std::cerr << " Chose subtree heuristic\n " );
1205
+ SubtreeHeuristicChosen = true ;
1206
+ }
1207
+ }
1208
+ if (!SubtreeHeuristicChosen)
1209
+ kernel.fg .builder ->getJitInfo ()->statsVerbose .minRegClusterCount ++;
1156
1210
SCHED_DUMP (rp.dump (ddd.getBB (), " After scheduling for presssure, " ));
1157
- kernel.fg .builder ->getJitInfo ()->statsVerbose .minRegClusterCount ++;
1158
1211
Changed = true ;
1159
1212
} else {
1160
1213
ddd.reset (false );
@@ -1164,9 +1217,25 @@ bool BB_Scheduler::scheduleBlockForPressure(unsigned &MaxPressure,
1164
1217
// try not-clustering
1165
1218
SethiUllmanScheduling (false );
1166
1219
if (commitIfBeneficial (MaxPressure)) {
1220
+ if (MaxPressure > (2 * Threshold) && config.TrySubtreeSchedule ) {
1221
+ SethiUllmanScheduling (false , true );
1222
+ commitIfBeneficial (MaxPressure);
1223
+ }
1167
1224
SCHED_DUMP (rp.dump (ddd.getBB (), " After scheduling for presssure, " ));
1168
1225
kernel.fg .builder ->getJitInfo ()->statsVerbose .minRegSUCount ++;
1169
1226
Changed = true ;
1227
+ } else if (config.TrySubtreeSchedule ) {
1228
+ ddd.reset (false );
1229
+ }
1230
+ }
1231
+
1232
+ if (!Changed) {
1233
+ if (MaxPressure > (2 * Threshold) && config.TrySubtreeSchedule ) {
1234
+ SethiUllmanScheduling (false , true );
1235
+ if (commitIfBeneficial (MaxPressure)) {
1236
+ SCHED_DUMP (rp.dump (ddd.getBB (), " After scheduling for presssure, " ));
1237
+ Changed = true ;
1238
+ }
1170
1239
}
1171
1240
}
1172
1241
}
@@ -1175,9 +1244,10 @@ bool BB_Scheduler::scheduleBlockForPressure(unsigned &MaxPressure,
1175
1244
return Changed;
1176
1245
}
1177
1246
1178
- void BB_Scheduler::SethiUllmanScheduling (bool DoClustering) {
1247
+ void BB_Scheduler::SethiUllmanScheduling (bool DoClustering,
1248
+ bool UseSubtreeHeuristic) {
1179
1249
schedule.clear ();
1180
- SethiUllmanQueue Q (ddd, rp, config);
1250
+ SethiUllmanQueue Q (ddd, rp, config, schedule, UseSubtreeHeuristic );
1181
1251
Q.push (ddd.getExitNode ());
1182
1252
1183
1253
while (!Q.empty ()) {
0 commit comments