@@ -520,6 +520,7 @@ GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
520
520
const MachineSchedContext *C, bool IsLegacyScheduler)
521
521
: GCNSchedStrategy(C) {
522
522
SchedStages.push_back (GCNSchedStageID::OccInitialSchedule);
523
+ SchedStages.push_back (GCNSchedStageID::MaxsUnpackPackedF32OpsSchedule);
523
524
SchedStages.push_back (GCNSchedStageID::UnclusteredHighRPReschedule);
524
525
SchedStages.push_back (GCNSchedStageID::ClusteredLowOccupancyReschedule);
525
526
SchedStages.push_back (GCNSchedStageID::PreRARematerialize);
@@ -783,6 +784,9 @@ GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
783
784
case GCNSchedStageID::MemoryClauseInitialSchedule:
784
785
return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,
785
786
*this );
787
+ case GCNSchedStageID::MaxsUnpackPackedF32OpsSchedule:
788
+ return std::make_unique<MaxsUnpackPackedF32OpsScheduleStage>(SchedStageID,
789
+ *this );
786
790
}
787
791
788
792
llvm_unreachable (" Unknown SchedStageID." );
@@ -1130,6 +1134,31 @@ void UnclusteredHighRPStage::finalizeGCNSchedStage() {
1130
1134
GCNSchedStage::finalizeGCNSchedStage ();
1131
1135
}
1132
1136
1137
+ struct MaxsUnpackPackedF32OpsDAGMutation : ScheduleDAGMutation {
1138
+ GCNScheduleDAGMILive &DAG;
1139
+
1140
+ MaxsUnpackPackedF32OpsDAGMutation (GCNScheduleDAGMILive &DAG) : DAG(DAG) {}
1141
+
1142
+ void apply (ScheduleDAGInstrs *DAG) override {
1143
+ const TargetInstrInfo &TII = *DAG->TII ;
1144
+ const GCNSubtarget &ST = DAG->MF .getSubtarget <GCNSubtarget>();
1145
+ LLVM_DEBUG (dbgs () << " Completed MaxsUnpackPackedF32OpsDAGMutation\n " );
1146
+ }
1147
+ };
1148
+
1149
+ namespace llvm {
1150
+ std::unique_ptr<ScheduleDAGMutation>
1151
+ createMaxsUnpackPackedF32OpsDAGMutation (GCNScheduleDAGMILive &DAG) {
1152
+ return std::make_unique<MaxsUnpackPackedF32OpsDAGMutation>(DAG);
1153
+ }
1154
+
1155
+ std::unique_ptr<ScheduleDAGMutation>
1156
+ createMaxsUnpackPackedF32OpsDAGMutation (ScheduleDAGMILive &DAG) {
1157
+ return std::make_unique<MaxsUnpackPackedF32OpsDAGMutation>(reinterpret_cast <GCNScheduleDAGMILive &>(DAG));
1158
+ }
1159
+
1160
+ }
1161
+
1133
1162
bool GCNSchedStage::initGCNRegion () {
1134
1163
// Check whether this new region is also a new block.
1135
1164
if (DAG.RegionBegin ->getParent () != CurrentMBB)
@@ -1189,6 +1218,11 @@ bool GCNSchedStage::initGCNRegion() {
1189
1218
: AMDGPU::SchedulingPhase::PreRAReentry));
1190
1219
}
1191
1220
1221
+ if (StageID == GCNSchedStageID::MaxsUnpackPackedF32OpsSchedule) {
1222
+ DAG.addMutation (createMaxsUnpackPackedF32OpsDAGMutation (DAG));
1223
+ return true ;
1224
+ }
1225
+
1192
1226
return true ;
1193
1227
}
1194
1228
@@ -1548,6 +1582,11 @@ bool MemoryClauseInitialScheduleStage::shouldRevertScheduling(
1548
1582
return mayCauseSpilling (WavesAfter);
1549
1583
}
1550
1584
1585
+ bool MaxsUnpackPackedF32OpsScheduleStage::shouldRevertScheduling (
1586
+ unsigned WavesAfter) {
1587
+ return false ;
1588
+ }
1589
+
1551
1590
bool GCNSchedStage::mayCauseSpilling (unsigned WavesAfter) {
1552
1591
if (WavesAfter <= MFI.getMinWavesPerEU () && isRegionWithExcessRP () &&
1553
1592
!PressureAfter.less (MF, PressureBefore)) {
0 commit comments