Skip to content

Commit 8e3ecfa

Browse files
committed
Experimental FA scheduling.
WIP Changes - Custom XDL resource tracking. Assume first 8 cycles of 8-pass mfma stalls the whole pipeline. Prefer MFMA if XDL is free, see `tryXDL` in `tryCand` for heuristic priorities. - Fix exp latency. - Force topdown scheduling in marked regions. - Add `GCNPostSchedStrategy` and `tryCand` overrides for both the default schedulers. - PostRA prefer MFMA with more successors. Change-Id: Id1afc349c575484a093e9e179a93b1e93f2f4b4b
1 parent 83e9c44 commit 8e3ecfa

File tree

6 files changed

+41
-90
lines changed

6 files changed

+41
-90
lines changed

llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2667,6 +2667,12 @@ void IGroupLPDAGMutation::initSchedGroupBarrierPipelineStage(
26672667
bool IGroupLPDAGMutation::initIGLPOpt(SUnit &SU) {
26682668
IGLPStrategyID StrategyID =
26692669
(IGLPStrategyID)SU.getInstr()->getOperand(0).getImm();
2670+
if (StrategyID == 10) {
2671+
for (auto &SU : DAG->SUnits)
2672+
SU.hasReservedResource = false;
2673+
2674+
return false;
2675+
}
26702676
auto S = createIGLPStrategy(StrategyID, DAG, TII);
26712677
if (!S->shouldApplyStrategy(DAG, Phase))
26722678
return false;

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,7 @@ GCNTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
11451145
ScheduleDAGInstrs *
11461146
GCNTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
11471147
ScheduleDAGMI *DAG =
1148-
new GCNPostScheduleDAGMILive(C, std::make_unique<PostGenericScheduler>(C),
1148+
new GCNPostScheduleDAGMILive(C, std::make_unique<GCNPostSchedStrategy>(C),
11491149
/*RemoveKillFlags=*/true);
11501150
const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
11511151
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 21 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -505,9 +505,7 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
505505
#endif
506506

507507
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
508-
MachineInstr *MI = SU->getInstr();
509-
bool IsXDL = MI ? TII->isXDL(*SU->getInstr()) : false;
510-
bool IsALU = MI ? TII->isVALU(*SU->getInstr()) || TII->isSALU(*SU->getInstr()) : false;
508+
bool IsXDL = TII->isXDL(*SU->getInstr());
511509
unsigned Cycles = SU->Latency;
512510
if (IsXDL) {
513511
// FIXME: Hack since XDL is only actually occupying for 24 cycles with 8
@@ -516,10 +514,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
516514
Cycles -= 2;
517515
XDLProcRes.reset();
518516
XDLProcRes.reserve(Cycles);
519-
} else if (IsALU) {
520-
XDLProcRes.release(Cycles);
521517
} else {
522-
XDLProcRes.release(1);
518+
XDLProcRes.release(Cycles);
523519
}
524520

525521
LLVM_DEBUG(dbgs() << "OldXDLProcRes: " << XDLCyclesBefore
@@ -778,11 +774,7 @@ bool GCNSchedStrategy::tryXDL(SchedCandidate &Cand, SchedCandidate &TryCand,
778774
SchedBoundary *Zone) const {
779775
assert(Zone->isTop());
780776
MachineInstr *CInst = Cand.SU->getInstr();
781-
if (!CInst)
782-
return false;
783777
MachineInstr *TCInst = TryCand.SU->getInstr();
784-
if (!TCInst)
785-
return false;
786778
const SIInstrInfo *TII = DAG->MF.getSubtarget<GCNSubtarget>().getInstrInfo();
787779

788780
bool CandIsXDL = TII->isXDL(*CInst);
@@ -819,10 +811,7 @@ bool GCNSchedStrategy::tryXDL(SchedCandidate &Cand, SchedCandidate &TryCand,
819811
if (!CandSeenSuccs.insert(SuccSU).second)
820812
continue;
821813

822-
MachineInstr *SuccMI = SuccSU->getInstr();
823-
if (!SuccMI)
824-
continue;
825-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
814+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
826815
++CandReadyVALUSuccs;
827816
}
828817
}
@@ -833,10 +822,7 @@ bool GCNSchedStrategy::tryXDL(SchedCandidate &Cand, SchedCandidate &TryCand,
833822
if (!TrySeenSuccs.insert(SuccSU).second)
834823
continue;
835824

836-
MachineInstr *SuccMI = SuccSU->getInstr();
837-
if (!SuccMI)
838-
continue;
839-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
825+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
840826
++TryReadyVALUSuccs;
841827
}
842828
}
@@ -872,21 +858,15 @@ bool GCNSchedStrategy::tryXDL(SchedCandidate &Cand, SchedCandidate &TryCand,
872858
SUnit *SuccSU = Succ.getSUnit();
873859
if (!CandSeenSuccs.insert(SuccSU).second)
874860
continue;
875-
MachineInstr *SuccMI = SuccSU->getInstr();
876-
if (!SuccMI)
877-
continue;
878-
if (TII->isVALU(*SuccMI))
861+
if (TII->isVALU(*SuccSU->getInstr()))
879862
++CandVALUSuccs;
880863
}
881864

882865
for (SDep &Succ : TryCand.SU->Succs) {
883866
SUnit *SuccSU = Succ.getSUnit();
884867
if (!TrySeenSuccs.insert(SuccSU).second)
885868
continue;
886-
MachineInstr *SuccMI = SuccSU->getInstr();
887-
if (!SuccMI)
888-
continue;
889-
if (TII->isVALU(*SuccMI))
869+
if (TII->isVALU(*SuccSU->getInstr()))
890870
++TryVALUSuccs;
891871
}
892872

@@ -928,10 +908,7 @@ bool GCNSchedStrategy::tryXDL(SchedCandidate &Cand, SchedCandidate &TryCand,
928908
if (!CandSeenSuccs.insert(SuccSU).second)
929909
continue;
930910

931-
MachineInstr *SuccMI = SuccSU->getInstr();
932-
if (!SuccMI)
933-
continue;
934-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
911+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
935912
++CandReadyVALUSuccs;
936913
}
937914
}
@@ -942,10 +919,7 @@ bool GCNSchedStrategy::tryXDL(SchedCandidate &Cand, SchedCandidate &TryCand,
942919
if (!TrySeenSuccs.insert(SuccSU).second)
943920
continue;
944921

945-
MachineInstr *SuccMI = SuccSU->getInstr();
946-
if (!SuccMI)
947-
continue;
948-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
922+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
949923
++TryReadyVALUSuccs;
950924
}
951925
}
@@ -984,10 +958,7 @@ bool GCNSchedStrategy::tryXDL(SchedCandidate &Cand, SchedCandidate &TryCand,
984958
if (!CandSeenSuccs.insert(SuccSU).second)
985959
continue;
986960

987-
MachineInstr *SuccMI = SuccSU->getInstr();
988-
if (!SuccMI)
989-
continue;
990-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
961+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
991962
++CandReadyVALUSuccs;
992963
}
993964
}
@@ -998,10 +969,7 @@ bool GCNSchedStrategy::tryXDL(SchedCandidate &Cand, SchedCandidate &TryCand,
998969
if (!TrySeenSuccs.insert(SuccSU).second)
999970
continue;
1000971

1001-
MachineInstr *SuccMI = SuccSU->getInstr();
1002-
if (!SuccMI)
1003-
continue;
1004-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
972+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
1005973
++TryReadyVALUSuccs;
1006974
}
1007975
}
@@ -2815,9 +2783,7 @@ SUnit *GCNPostSchedStrategy::pickNode(bool &IsTopNode) {
28152783
#endif
28162784

28172785
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
2818-
MachineInstr *MI = SU->getInstr();
2819-
bool IsXDL = MI ? TII->isXDL(*SU->getInstr()) : false;
2820-
bool IsALU = MI ? TII->isVALU(*SU->getInstr()) || TII->isSALU(*SU->getInstr()) : false;
2786+
bool IsXDL = TII->isXDL(*SU->getInstr());
28212787
unsigned Cycles = SU->Latency;
28222788
if (IsXDL) {
28232789
// FIXME: Hack since XDL is only actually occupying for 24 cycles with 8
@@ -2826,10 +2792,8 @@ SUnit *GCNPostSchedStrategy::pickNode(bool &IsTopNode) {
28262792
Cycles -= 2;
28272793
XDLProcRes.reset();
28282794
XDLProcRes.reserve(Cycles);
2829-
} else if (IsALU) {
2830-
XDLProcRes.release(Cycles);
28312795
} else {
2832-
XDLProcRes.release(1);
2796+
XDLProcRes.release(Cycles);
28332797
}
28342798

28352799
LLVM_DEBUG(dbgs() << "OldXDLProcRes: " << XDLCyclesBefore
@@ -2845,11 +2809,7 @@ SUnit *GCNPostSchedStrategy::pickNode(bool &IsTopNode) {
28452809
bool GCNPostSchedStrategy::tryXDL(SchedCandidate &Cand,
28462810
SchedCandidate &TryCand) {
28472811
MachineInstr *CInst = Cand.SU->getInstr();
2848-
if (!CInst)
2849-
return false;
28502812
MachineInstr *TCInst = TryCand.SU->getInstr();
2851-
if (!TCInst)
2852-
return false;
28532813
const SIInstrInfo *TII = DAG->MF.getSubtarget<GCNSubtarget>().getInstrInfo();
28542814

28552815
bool CandIsXDL = TII->isXDL(*CInst);
@@ -2886,10 +2846,7 @@ bool GCNPostSchedStrategy::tryXDL(SchedCandidate &Cand,
28862846
if (!CandSeenSuccs.insert(SuccSU).second)
28872847
continue;
28882848

2889-
MachineInstr *SuccMI = SuccSU->getInstr();
2890-
if (!SuccMI)
2891-
continue;
2892-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
2849+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
28932850
++CandReadyVALUSuccs;
28942851
}
28952852
}
@@ -2900,10 +2857,7 @@ bool GCNPostSchedStrategy::tryXDL(SchedCandidate &Cand,
29002857
if (!TrySeenSuccs.insert(SuccSU).second)
29012858
continue;
29022859

2903-
MachineInstr *SuccMI = SuccSU->getInstr();
2904-
if (!SuccMI)
2905-
continue;
2906-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
2860+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
29072861
++TryReadyVALUSuccs;
29082862
}
29092863
}
@@ -2939,21 +2893,15 @@ bool GCNPostSchedStrategy::tryXDL(SchedCandidate &Cand,
29392893
SUnit *SuccSU = Succ.getSUnit();
29402894
if (!CandSeenSuccs.insert(SuccSU).second)
29412895
continue;
2942-
MachineInstr *SuccMI = SuccSU->getInstr();
2943-
if (!SuccMI)
2944-
continue;
2945-
if (TII->isVALU(*SuccMI))
2896+
if (TII->isVALU(*SuccSU->getInstr()))
29462897
++CandVALUSuccs;
29472898
}
29482899

29492900
for (SDep &Succ : TryCand.SU->Succs) {
29502901
SUnit *SuccSU = Succ.getSUnit();
29512902
if (!TrySeenSuccs.insert(SuccSU).second)
29522903
continue;
2953-
MachineInstr *SuccMI = SuccSU->getInstr();
2954-
if (!SuccMI)
2955-
continue;
2956-
if (TII->isVALU(*SuccMI))
2904+
if (TII->isVALU(*SuccSU->getInstr()))
29572905
++TryVALUSuccs;
29582906
}
29592907

@@ -2995,10 +2943,7 @@ bool GCNPostSchedStrategy::tryXDL(SchedCandidate &Cand,
29952943
if (!CandSeenSuccs.insert(SuccSU).second)
29962944
continue;
29972945

2998-
MachineInstr *SuccMI = SuccSU->getInstr();
2999-
if (!SuccMI)
3000-
continue;
3001-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
2946+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
30022947
++CandReadyVALUSuccs;
30032948
}
30042949
}
@@ -3009,10 +2954,7 @@ bool GCNPostSchedStrategy::tryXDL(SchedCandidate &Cand,
30092954
if (!TrySeenSuccs.insert(SuccSU).second)
30102955
continue;
30112956

3012-
MachineInstr *SuccMI = SuccSU->getInstr();
3013-
if (!SuccMI)
3014-
continue;
3015-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
2957+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
30162958
++TryReadyVALUSuccs;
30172959
}
30182960
}
@@ -3051,10 +2993,7 @@ bool GCNPostSchedStrategy::tryXDL(SchedCandidate &Cand,
30512993
if (!CandSeenSuccs.insert(SuccSU).second)
30522994
continue;
30532995

3054-
MachineInstr *SuccMI = SuccSU->getInstr();
3055-
if (!SuccMI)
3056-
continue;
3057-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
2996+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
30582997
++CandReadyVALUSuccs;
30592998
}
30602999
}
@@ -3065,10 +3004,7 @@ bool GCNPostSchedStrategy::tryXDL(SchedCandidate &Cand,
30653004
if (!TrySeenSuccs.insert(SuccSU).second)
30663005
continue;
30673006

3068-
MachineInstr *SuccMI = SuccSU->getInstr();
3069-
if (!SuccMI)
3070-
continue;
3071-
if (TII->isVALU(*SuccMI) && SuccSU->NumPredsLeft == 1) {
3007+
if (TII->isVALU(*SuccSU->getInstr()) && SuccSU->NumPredsLeft == 1) {
30723008
++TryReadyVALUSuccs;
30733009
}
30743010
}
@@ -3181,7 +3117,7 @@ bool GCNPostSchedStrategy::tryCandidate(SchedCandidate &Cand,
31813117
}
31823118

31833119
GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive(
3184-
MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
3120+
MachineSchedContext *C, std::unique_ptr<GCNPostSchedStrategy> S,
31853121
bool RemoveKillFlags)
31863122
: ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}
31873123

llvm/lib/Target/AMDGPU/GCNSchedStrategy.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -943,7 +943,7 @@ class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
943943
void finalizeSchedule() override;
944944

945945
GCNPostScheduleDAGMILive(MachineSchedContext *C,
946-
std::unique_ptr<MachineSchedStrategy> S,
946+
std::unique_ptr<GCNPostSchedStrategy> S,
947947
bool RemoveKillFlags);
948948
};
949949

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "AMDGPU.h"
1616
#include "AMDGPUInstrInfo.h"
1717
#include "GCNHazardRecognizer.h"
18+
#include "GCNSchedStrategy.h"
1819
#include "GCNSubtarget.h"
1920
#include "SIMachineFunctionInfo.h"
2021
#include "Utils/AMDGPUBaseInfo.h"
@@ -9246,8 +9247,12 @@ SIInstrInfo::CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
92469247
// We would like to restrict this hazard recognizer to only
92479248
// post-RA scheduling; we can tell that we're post-RA because we don't
92489249
// track VRegLiveness.
9249-
if (!DAG->hasVRegLiveness())
9250-
return new GCNHazardRecognizer(DAG->MF);
9250+
if (!DAG->hasVRegLiveness()) {
9251+
GCNPostScheduleDAGMILive *LiveDAG = static_cast<GCNPostScheduleDAGMILive *>(
9252+
const_cast<ScheduleDAGMI *>(DAG));
9253+
if (!LiveDAG->S->CustomResTracking)
9254+
return new GCNHazardRecognizer(DAG->MF);
9255+
}
92519256
return TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG);
92529257
}
92539258

llvm/lib/Target/AMDGPU/SISchedule.td

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@ multiclass SICommonWriteRes {
164164

165165
def : HWVALUWriteRes<Write32Bit, 1>;
166166
def : HWVALUWriteRes<WriteFloatCvt, 4>;
167-
def : HWVALUWriteRes<WriteTrans32, 4>;
168167
def : HWVALUWriteRes<WriteQuarterRate32, 4>;
169168

170169
let ReleaseAtCycles = [4] in
@@ -223,6 +222,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 1>;
223222
def : HWVALUWriteRes<WriteDouble, 4>;
224223
def : HWVALUWriteRes<WriteDoubleAdd, 2>;
225224
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
225+
def : HWVALUWriteRes<WriteTrans32, 4>;
226226
def : HWVALUWriteRes<WriteTrans64, 4>;
227227
} // End RetireOOO = 1
228228

@@ -241,6 +241,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 16>;
241241
def : HWVALUWriteRes<WriteDouble, 16>;
242242
def : HWVALUWriteRes<WriteDoubleAdd, 8>;
243243
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
244+
def : HWVALUWriteRes<WriteTrans32, 16>;
244245
def : HWVALUWriteRes<WriteTrans64, 16>;
245246
} // End RetireOOO = 1
246247

@@ -261,6 +262,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 1>;
261262
def : HWVALUWriteRes<WriteDouble, 1>;
262263
def : HWVALUWriteRes<WriteDoubleAdd, 1>;
263264
def : HWVALUWriteRes<WriteDoubleCvt, 1>;
265+
def : HWVALUWriteRes<WriteTrans32, 4>;
264266
def : HWVALUWriteRes<WriteTrans64, 4>;
265267
def : HWVALUWriteRes<WriteIntMul, 1>;
266268
def : HWVALUWriteRes<Write64Bit, 1>;
@@ -284,6 +286,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 1>;
284286
def : HWVALUWriteRes<WriteDouble, 1>;
285287
def : HWVALUWriteRes<WriteDoubleAdd, 1>;
286288
def : HWVALUWriteRes<WriteDoubleCvt, 1>;
289+
def : HWVALUWriteRes<WriteTrans32, 4>;
287290
def : HWVALUWriteRes<WriteTrans64, 4>;
288291
def : HWVALUWriteRes<WriteIntMul, 1>;
289292
def : HWVALUWriteRes<Write64Bit, 1>;
@@ -319,6 +322,7 @@ def : HWVALUWriteRes<WriteDouble, 1>;
319322
def : HWVALUWriteRes<WriteDoubleAdd, 1>;
320323
def : HWVALUWriteRes<WriteDoubleCvt, 1>;
321324
def : HWVALUWriteRes<WriteTrans64, 4>;
325+
def : HWVALUWriteRes<WriteTrans32, 2>;
322326
def : HWVALUWriteRes<WriteIntMul, 1>;
323327
def : HWVALUWriteRes<Write64Bit, 1>;
324328

0 commit comments

Comments
 (0)