Skip to content

Commit f205dee

Browse files
kerbowajrbyrnes
authored andcommitted
Experimental FA scheduling.
WIP Changes - Custom XDL resource tracking. Assume first 8 cycles of 8-pass mfma stalls the whole pipeline. Prefer MFMA if XDL is free, see `tryXDL` in `tryCand` for heuristic priorities. - Fix exp latency. - Force topdown scheduling in marked regions. - Add `GCNPostSchedStrategy` and `tryCand` overrides for both the default schedulers. - PostRA prefer MFMA with more successors. Change-Id: Id1afc349c575484a093e9e179a93b1e93f2f4b4b
1 parent e935fb9 commit f205dee

File tree

6 files changed

+41
-90
lines changed

6 files changed

+41
-90
lines changed

llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2667,6 +2667,12 @@ void IGroupLPDAGMutation::initSchedGroupBarrierPipelineStage(
26672667
bool IGroupLPDAGMutation::initIGLPOpt(SUnit &SU) {
26682668
IGLPStrategyID StrategyID =
26692669
(IGLPStrategyID)SU.getInstr()->getOperand(0).getImm();
2670+
if (StrategyID == 10) {
2671+
for (auto &SU : DAG->SUnits)
2672+
SU.hasReservedResource = false;
2673+
2674+
return false;
2675+
}
26702676
auto S = createIGLPStrategy(StrategyID, DAG, TII);
26712677
if (!S->shouldApplyStrategy(DAG, Phase))
26722678
return false;

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1118,7 +1118,7 @@ GCNTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
11181118
ScheduleDAGInstrs *
11191119
GCNTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
11201120
ScheduleDAGMI *DAG =
1121-
new GCNPostScheduleDAGMILive(C, std::make_unique<PostGenericScheduler>(C),
1121+
new GCNPostScheduleDAGMILive(C, std::make_unique<GCNPostSchedStrategy>(C),
11221122
/*RemoveKillFlags=*/true);
11231123
const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
11241124
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 21 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -505,9 +505,7 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
505505
#endif
506506

507507
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
508-
MachineInstr *MI = SU->getInstr();
509-
bool IsXDL = MI ? TII->isXDL(*SU->getInstr()) : false;
510-
bool IsALU = MI ? TII->isVALU(*SU->getInstr()) || TII->isSALU(*SU->getInstr()) : false;
508+
bool IsXDL = TII->isXDL(*SU->getInstr());
511509
unsigned Cycles = SU->Latency;
512510
if (IsXDL) {
513511
// FIXME: Hack since XDL is only actually occupying for 24 cycles with 8
@@ -516,10 +514,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
516514
Cycles -= 2;
517515
XDLProcRes.reset();
518516
XDLProcRes.reserve(Cycles);
519-
} else if (IsALU) {
520-
XDLProcRes.release(Cycles);
521517
} else {
522-
XDLProcRes.release(1);
518+
XDLProcRes.release(Cycles);
523519
}
524520

525521
LLVM_DEBUG(dbgs() << "OldXDLProcRes: " << XDLCyclesBefore
@@ -780,11 +776,7 @@ bool GCNSchedStrategy::tryXDL(SchedCandidate &Cand, SchedCandidate &TryCand,
780776
SchedBoundary *Zone) const {
781777
assert(Zone->isTop());
782778
MachineInstr *CInst = Cand.SU->getInstr();
783-
if (!CInst)
784-
return false;
785779
MachineInstr *TCInst = TryCand.SU->getInstr();
786-
if (!TCInst)
787-
return false;
788780
const SIInstrInfo *TII = DAG->MF.getSubtarget<GCNSubtarget>().getInstrInfo();
789781

790782
bool CandIsXDL = TII->isXDL(*CInst);
@@ -821,10 +813,7 @@ bool GCNSchedStrategy::tryXDL(SchedCandidate &Cand, SchedCandidate &TryCand,
821813
if (!CandSeenSuccs.insert(SuccSU).second)
822814
continue;
823815

824-
MachineInstr *SuccMI = SuccSU->getInstr();
825-
if (!SuccMI)
826-
continue;
827-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
816+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
828817
++CandReadyVALUSuccs;
829818
}
830819
}
@@ -835,10 +824,7 @@ bool GCNSchedStrategy::tryXDL(SchedCandidate &Cand, SchedCandidate &TryCand,
835824
if (!TrySeenSuccs.insert(SuccSU).second)
836825
continue;
837826

838-
MachineInstr *SuccMI = SuccSU->getInstr();
839-
if (!SuccMI)
840-
continue;
841-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
827+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
842828
++TryReadyVALUSuccs;
843829
}
844830
}
@@ -874,21 +860,15 @@ bool GCNSchedStrategy::tryXDL(SchedCandidate &Cand, SchedCandidate &TryCand,
874860
SUnit *SuccSU = Succ.getSUnit();
875861
if (!CandSeenSuccs.insert(SuccSU).second)
876862
continue;
877-
MachineInstr *SuccMI = SuccSU->getInstr();
878-
if (!SuccMI)
879-
continue;
880-
if (TII->isVALU(*SuccMI))
863+
if (TII->isVALU(*SuccSU->getInstr()))
881864
++CandVALUSuccs;
882865
}
883866

884867
for (SDep &Succ : TryCand.SU->Succs) {
885868
SUnit *SuccSU = Succ.getSUnit();
886869
if (!TrySeenSuccs.insert(SuccSU).second)
887870
continue;
888-
MachineInstr *SuccMI = SuccSU->getInstr();
889-
if (!SuccMI)
890-
continue;
891-
if (TII->isVALU(*SuccMI))
871+
if (TII->isVALU(*SuccSU->getInstr()))
892872
++TryVALUSuccs;
893873
}
894874

@@ -930,10 +910,7 @@ bool GCNSchedStrategy::tryXDL(SchedCandidate &Cand, SchedCandidate &TryCand,
930910
if (!CandSeenSuccs.insert(SuccSU).second)
931911
continue;
932912

933-
MachineInstr *SuccMI = SuccSU->getInstr();
934-
if (!SuccMI)
935-
continue;
936-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
913+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
937914
++CandReadyVALUSuccs;
938915
}
939916
}
@@ -944,10 +921,7 @@ bool GCNSchedStrategy::tryXDL(SchedCandidate &Cand, SchedCandidate &TryCand,
944921
if (!TrySeenSuccs.insert(SuccSU).second)
945922
continue;
946923

947-
MachineInstr *SuccMI = SuccSU->getInstr();
948-
if (!SuccMI)
949-
continue;
950-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
924+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
951925
++TryReadyVALUSuccs;
952926
}
953927
}
@@ -986,10 +960,7 @@ bool GCNSchedStrategy::tryXDL(SchedCandidate &Cand, SchedCandidate &TryCand,
986960
if (!CandSeenSuccs.insert(SuccSU).second)
987961
continue;
988962

989-
MachineInstr *SuccMI = SuccSU->getInstr();
990-
if (!SuccMI)
991-
continue;
992-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
963+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
993964
++CandReadyVALUSuccs;
994965
}
995966
}
@@ -1000,10 +971,7 @@ bool GCNSchedStrategy::tryXDL(SchedCandidate &Cand, SchedCandidate &TryCand,
1000971
if (!TrySeenSuccs.insert(SuccSU).second)
1001972
continue;
1002973

1003-
MachineInstr *SuccMI = SuccSU->getInstr();
1004-
if (!SuccMI)
1005-
continue;
1006-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
974+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
1007975
++TryReadyVALUSuccs;
1008976
}
1009977
}
@@ -2828,9 +2796,7 @@ SUnit *GCNPostSchedStrategy::pickNode(bool &IsTopNode) {
28282796
#endif
28292797

28302798
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
2831-
MachineInstr *MI = SU->getInstr();
2832-
bool IsXDL = MI ? TII->isXDL(*SU->getInstr()) : false;
2833-
bool IsALU = MI ? TII->isVALU(*SU->getInstr()) || TII->isSALU(*SU->getInstr()) : false;
2799+
bool IsXDL = TII->isXDL(*SU->getInstr());
28342800
unsigned Cycles = SU->Latency;
28352801
if (IsXDL) {
28362802
// FIXME: Hack since XDL is only actually occupying for 24 cycles with 8
@@ -2839,10 +2805,8 @@ SUnit *GCNPostSchedStrategy::pickNode(bool &IsTopNode) {
28392805
Cycles -= 2;
28402806
XDLProcRes.reset();
28412807
XDLProcRes.reserve(Cycles);
2842-
} else if (IsALU) {
2843-
XDLProcRes.release(Cycles);
28442808
} else {
2845-
XDLProcRes.release(1);
2809+
XDLProcRes.release(Cycles);
28462810
}
28472811

28482812
LLVM_DEBUG(dbgs() << "OldXDLProcRes: " << XDLCyclesBefore
@@ -2858,11 +2822,7 @@ SUnit *GCNPostSchedStrategy::pickNode(bool &IsTopNode) {
28582822
bool GCNPostSchedStrategy::tryXDL(SchedCandidate &Cand,
28592823
SchedCandidate &TryCand) {
28602824
MachineInstr *CInst = Cand.SU->getInstr();
2861-
if (!CInst)
2862-
return false;
28632825
MachineInstr *TCInst = TryCand.SU->getInstr();
2864-
if (!TCInst)
2865-
return false;
28662826
const SIInstrInfo *TII = DAG->MF.getSubtarget<GCNSubtarget>().getInstrInfo();
28672827

28682828
bool CandIsXDL = TII->isXDL(*CInst);
@@ -2899,10 +2859,7 @@ bool GCNPostSchedStrategy::tryXDL(SchedCandidate &Cand,
28992859
if (!CandSeenSuccs.insert(SuccSU).second)
29002860
continue;
29012861

2902-
MachineInstr *SuccMI = SuccSU->getInstr();
2903-
if (!SuccMI)
2904-
continue;
2905-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
2862+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
29062863
++CandReadyVALUSuccs;
29072864
}
29082865
}
@@ -2913,10 +2870,7 @@ bool GCNPostSchedStrategy::tryXDL(SchedCandidate &Cand,
29132870
if (!TrySeenSuccs.insert(SuccSU).second)
29142871
continue;
29152872

2916-
MachineInstr *SuccMI = SuccSU->getInstr();
2917-
if (!SuccMI)
2918-
continue;
2919-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
2873+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
29202874
++TryReadyVALUSuccs;
29212875
}
29222876
}
@@ -2952,21 +2906,15 @@ bool GCNPostSchedStrategy::tryXDL(SchedCandidate &Cand,
29522906
SUnit *SuccSU = Succ.getSUnit();
29532907
if (!CandSeenSuccs.insert(SuccSU).second)
29542908
continue;
2955-
MachineInstr *SuccMI = SuccSU->getInstr();
2956-
if (!SuccMI)
2957-
continue;
2958-
if (TII->isVALU(*SuccMI))
2909+
if (TII->isVALU(*SuccSU->getInstr()))
29592910
++CandVALUSuccs;
29602911
}
29612912

29622913
for (SDep &Succ : TryCand.SU->Succs) {
29632914
SUnit *SuccSU = Succ.getSUnit();
29642915
if (!TrySeenSuccs.insert(SuccSU).second)
29652916
continue;
2966-
MachineInstr *SuccMI = SuccSU->getInstr();
2967-
if (!SuccMI)
2968-
continue;
2969-
if (TII->isVALU(*SuccMI))
2917+
if (TII->isVALU(*SuccSU->getInstr()))
29702918
++TryVALUSuccs;
29712919
}
29722920

@@ -3008,10 +2956,7 @@ bool GCNPostSchedStrategy::tryXDL(SchedCandidate &Cand,
30082956
if (!CandSeenSuccs.insert(SuccSU).second)
30092957
continue;
30102958

3011-
MachineInstr *SuccMI = SuccSU->getInstr();
3012-
if (!SuccMI)
3013-
continue;
3014-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
2959+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
30152960
++CandReadyVALUSuccs;
30162961
}
30172962
}
@@ -3022,10 +2967,7 @@ bool GCNPostSchedStrategy::tryXDL(SchedCandidate &Cand,
30222967
if (!TrySeenSuccs.insert(SuccSU).second)
30232968
continue;
30242969

3025-
MachineInstr *SuccMI = SuccSU->getInstr();
3026-
if (!SuccMI)
3027-
continue;
3028-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
2970+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
30292971
++TryReadyVALUSuccs;
30302972
}
30312973
}
@@ -3064,10 +3006,7 @@ bool GCNPostSchedStrategy::tryXDL(SchedCandidate &Cand,
30643006
if (!CandSeenSuccs.insert(SuccSU).second)
30653007
continue;
30663008

3067-
MachineInstr *SuccMI = SuccSU->getInstr();
3068-
if (!SuccMI)
3069-
continue;
3070-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
3009+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
30713010
++CandReadyVALUSuccs;
30723011
}
30733012
}
@@ -3078,10 +3017,7 @@ bool GCNPostSchedStrategy::tryXDL(SchedCandidate &Cand,
30783017
if (!TrySeenSuccs.insert(SuccSU).second)
30793018
continue;
30803019

3081-
MachineInstr *SuccMI = SuccSU->getInstr();
3082-
if (!SuccMI)
3083-
continue;
3084-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
3020+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
30853021
++TryReadyVALUSuccs;
30863022
}
30873023
}
@@ -3194,7 +3130,7 @@ bool GCNPostSchedStrategy::tryCandidate(SchedCandidate &Cand,
31943130
}
31953131

31963132
GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive(
3197-
MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
3133+
MachineSchedContext *C, std::unique_ptr<GCNPostSchedStrategy> S,
31983134
bool RemoveKillFlags)
31993135
: ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}
32003136

llvm/lib/Target/AMDGPU/GCNSchedStrategy.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -945,7 +945,7 @@ class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
945945
void finalizeSchedule() override;
946946

947947
GCNPostScheduleDAGMILive(MachineSchedContext *C,
948-
std::unique_ptr<MachineSchedStrategy> S,
948+
std::unique_ptr<GCNPostSchedStrategy> S,
949949
bool RemoveKillFlags);
950950
};
951951

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "AMDGPU.h"
1616
#include "AMDGPUInstrInfo.h"
1717
#include "GCNHazardRecognizer.h"
18+
#include "GCNSchedStrategy.h"
1819
#include "GCNSubtarget.h"
1920
#include "SIMachineFunctionInfo.h"
2021
#include "Utils/AMDGPUBaseInfo.h"
@@ -9203,8 +9204,12 @@ SIInstrInfo::CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
92039204
// We would like to restrict this hazard recognizer to only
92049205
// post-RA scheduling; we can tell that we're post-RA because we don't
92059206
// track VRegLiveness.
9206-
if (!DAG->hasVRegLiveness())
9207-
return new GCNHazardRecognizer(DAG->MF);
9207+
if (!DAG->hasVRegLiveness()) {
9208+
GCNPostScheduleDAGMILive *LiveDAG = static_cast<GCNPostScheduleDAGMILive *>(
9209+
const_cast<ScheduleDAGMI *>(DAG));
9210+
if (!LiveDAG->S->CustomResTracking)
9211+
return new GCNHazardRecognizer(DAG->MF);
9212+
}
92089213
return TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG);
92099214
}
92109215

llvm/lib/Target/AMDGPU/SISchedule.td

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@ multiclass SICommonWriteRes {
164164

165165
def : HWVALUWriteRes<Write32Bit, 1>;
166166
def : HWVALUWriteRes<WriteFloatCvt, 4>;
167-
def : HWVALUWriteRes<WriteTrans32, 4>;
168167
def : HWVALUWriteRes<WriteQuarterRate32, 4>;
169168

170169
let ReleaseAtCycles = [4] in
@@ -223,6 +222,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 1>;
223222
def : HWVALUWriteRes<WriteDouble, 4>;
224223
def : HWVALUWriteRes<WriteDoubleAdd, 2>;
225224
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
225+
def : HWVALUWriteRes<WriteTrans32, 4>;
226226
def : HWVALUWriteRes<WriteTrans64, 4>;
227227
} // End RetireOOO = 1
228228

@@ -241,6 +241,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 16>;
241241
def : HWVALUWriteRes<WriteDouble, 16>;
242242
def : HWVALUWriteRes<WriteDoubleAdd, 8>;
243243
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
244+
def : HWVALUWriteRes<WriteTrans32, 16>;
244245
def : HWVALUWriteRes<WriteTrans64, 16>;
245246
} // End RetireOOO = 1
246247

@@ -261,6 +262,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 1>;
261262
def : HWVALUWriteRes<WriteDouble, 1>;
262263
def : HWVALUWriteRes<WriteDoubleAdd, 1>;
263264
def : HWVALUWriteRes<WriteDoubleCvt, 1>;
265+
def : HWVALUWriteRes<WriteTrans32, 4>;
264266
def : HWVALUWriteRes<WriteTrans64, 4>;
265267
def : HWVALUWriteRes<WriteIntMul, 1>;
266268
def : HWVALUWriteRes<Write64Bit, 1>;
@@ -284,6 +286,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 1>;
284286
def : HWVALUWriteRes<WriteDouble, 1>;
285287
def : HWVALUWriteRes<WriteDoubleAdd, 1>;
286288
def : HWVALUWriteRes<WriteDoubleCvt, 1>;
289+
def : HWVALUWriteRes<WriteTrans32, 4>;
287290
def : HWVALUWriteRes<WriteTrans64, 4>;
288291
def : HWVALUWriteRes<WriteIntMul, 1>;
289292
def : HWVALUWriteRes<Write64Bit, 1>;
@@ -319,6 +322,7 @@ def : HWVALUWriteRes<WriteDouble, 1>;
319322
def : HWVALUWriteRes<WriteDoubleAdd, 1>;
320323
def : HWVALUWriteRes<WriteDoubleCvt, 1>;
321324
def : HWVALUWriteRes<WriteTrans64, 4>;
325+
def : HWVALUWriteRes<WriteTrans32, 2>;
322326
def : HWVALUWriteRes<WriteIntMul, 1>;
323327
def : HWVALUWriteRes<Write64Bit, 1>;
324328

0 commit comments

Comments
 (0)