Skip to content

Commit e6ae246

Browse files
authored
[AMDGPU] Refactor hazard recognizer for VALU-pipeline hazards. NFCI. (#168801)
This is in preparation of handling these in scheduler. I do not expect any changes to the produced code here, it is just an infrastructure. Our current problem with the VALU pipeline hazards is that we only insert V_NOP instructions in the hazard recognizer mode, but ignore it during scheduling. This patch is meant to create a mechanism to actually account for that during scheduling.
1 parent 0ff0f52 commit e6ae246

File tree

2 files changed

+55
-39
lines changed

2 files changed

+55
-39
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 47 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -437,9 +437,6 @@ void GCNHazardRecognizer::RecedeCycle() {
437437

438438
enum HazardFnResult { HazardFound, HazardExpired, NoHazardFound };
439439

440-
using IsExpiredFn = function_ref<bool(const MachineInstr &, int WaitStates)>;
441-
using GetNumWaitStatesFn = function_ref<unsigned int(const MachineInstr &)>;
442-
443440
// Search for a hazard in a block and its predecessors.
444441
template <typename StateT>
445442
static bool
@@ -546,11 +543,14 @@ hasHazard(StateT InitialState,
546543
// Returns a minimum wait states since \p I walking all predecessors.
547544
// Only scans until \p IsExpired does not return true.
548545
// Can only be run in a hazard recognizer mode.
549-
static int getWaitStatesSince(
550-
GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB,
551-
MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates,
552-
IsExpiredFn IsExpired, DenseSet<const MachineBasicBlock *> &Visited,
553-
GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) {
546+
static int
547+
getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
548+
const MachineBasicBlock *MBB,
549+
MachineBasicBlock::const_reverse_instr_iterator I,
550+
int WaitStates, GCNHazardRecognizer::IsExpiredFn IsExpired,
551+
DenseSet<const MachineBasicBlock *> &Visited,
552+
GCNHazardRecognizer::GetNumWaitStatesFn GetNumWaitStates =
553+
SIInstrInfo::getNumWaitStates) {
554554
for (auto E = MBB->instr_rend(); I != E; ++I) {
555555
// Don't add WaitStates for parent BUNDLE instructions.
556556
if (I->isBundle())
@@ -582,20 +582,26 @@ static int getWaitStatesSince(
582582
return MinWaitStates;
583583
}
584584

585-
static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
586-
const MachineInstr *MI, IsExpiredFn IsExpired) {
585+
static int
586+
getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
587+
const MachineInstr *MI,
588+
GCNHazardRecognizer::IsExpiredFn IsExpired,
589+
GCNHazardRecognizer::GetNumWaitStatesFn GetNumWaitStates =
590+
SIInstrInfo::getNumWaitStates) {
587591
DenseSet<const MachineBasicBlock *> Visited;
588592
return getWaitStatesSince(IsHazard, MI->getParent(),
589593
std::next(MI->getReverseIterator()), 0, IsExpired,
590-
Visited, SIInstrInfo::getNumWaitStates);
594+
Visited, GetNumWaitStates);
591595
}
592596

593-
int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
597+
int GCNHazardRecognizer::getWaitStatesSince(
598+
IsHazardFn IsHazard, int Limit, GetNumWaitStatesFn GetNumWaitStates) {
594599
if (IsHazardRecognizerMode) {
595600
auto IsExpiredFn = [Limit](const MachineInstr &, int WaitStates) {
596601
return WaitStates >= Limit;
597602
};
598-
return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
603+
return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn,
604+
GetNumWaitStates);
599605
}
600606

601607
int WaitStates = 0;
@@ -607,14 +613,18 @@ int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
607613
if (MI->isInlineAsm())
608614
continue;
609615
}
610-
++WaitStates;
616+
WaitStates += MI ? GetNumWaitStates(*MI) : 1;
611617

612618
if (WaitStates >= Limit)
613619
break;
614620
}
615621
return std::numeric_limits<int>::max();
616622
}
617623

624+
int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
625+
return getWaitStatesSince(IsHazard, Limit, SIInstrInfo::getNumWaitStates);
626+
}
627+
618628
int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
619629
IsHazardFn IsHazardDef,
620630
int Limit) {
@@ -1243,6 +1253,20 @@ int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
12431253
getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, ReadM0WaitStates);
12441254
}
12451255

1256+
// emit V_NOP instructions. \p WaitStatesNeeded is the number of V_NOPs we need
1257+
// to insert, negative means not needed.
1258+
bool GCNHazardRecognizer::emitVNops(MachineInstr *MI, int WaitStatesNeeded) {
1259+
if (WaitStatesNeeded <= 0)
1260+
return false;
1261+
1262+
const SIInstrInfo *TII = ST.getInstrInfo();
1263+
for (int I = 0; I < WaitStatesNeeded; ++I)
1264+
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1265+
TII->get(AMDGPU::V_NOP_e32));
1266+
1267+
return true;
1268+
}
1269+
12461270
void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
12471271
fixVMEMtoScalarWriteHazards(MI);
12481272
fixVcmpxPermlaneHazards(MI);
@@ -1257,7 +1281,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
12571281
fixVALUTransUseHazard(MI);
12581282
fixVALUTransCoexecutionHazards(MI);
12591283
fixWMMAHazards(MI); // fall-through if co-execution is enabled.
1260-
fixWMMACoexecutionHazards(MI);
1284+
emitVNops(MI, checkWMMACoexecutionHazards(MI));
12611285
fixShift64HighRegBug(MI);
12621286
fixVALUMaskWriteHazard(MI);
12631287
fixRequiredExportPriority(MI);
@@ -2045,13 +2069,13 @@ static bool IsWMMAHazardInstInCategory(const MachineInstr &MI,
20452069
return false;
20462070
}
20472071

2048-
bool GCNHazardRecognizer::fixWMMACoexecutionHazards(MachineInstr *MI) {
2072+
int GCNHazardRecognizer::checkWMMACoexecutionHazards(MachineInstr *MI) {
20492073
if (!AMDGPU::isGFX1250(ST))
2050-
return false;
2074+
return 0;
20512075

20522076
const SIInstrInfo *TII = ST.getInstrInfo();
20532077
if (!TII->isXDLWMMA(*MI) && !isCoexecutableVALUInst(*MI))
2054-
return false;
2078+
return 0;
20552079

20562080
const SIRegisterInfo *TRI = ST.getRegisterInfo();
20572081

@@ -2129,9 +2153,6 @@ bool GCNHazardRecognizer::fixWMMACoexecutionHazards(MachineInstr *MI) {
21292153
};
21302154

21312155
int Limit = 0;
2132-
auto IsExpiredFn = [&Limit](const MachineInstr &, int WaitStates) {
2133-
return WaitStates >= Limit;
2134-
};
21352156

21362157
auto GetWaitStatesFn = [](const MachineInstr &I) {
21372158
return SIInstrInfo::isVALU(I) ? 1 : 0;
@@ -2141,38 +2162,26 @@ bool GCNHazardRecognizer::fixWMMACoexecutionHazards(MachineInstr *MI) {
21412162
if (TII->isXDLWMMA(*MI)) {
21422163
for (Category = 0; WaitStatesNeeded < 0 && Category < 4; Category++) {
21432164
Limit = WMMAWaitStates[Category]; // for IsExpiredFn.
2144-
DenseSet<const MachineBasicBlock *> Visited;
2145-
// '::getWaitStatesSince' returns the number of VALUs in between if hazard
2165+
// 'getWaitStatesSince' returns the number of VALUs in between if hazard
21462166
// exists, and INT_MAX if there is no hazard. As a result, a negative
21472167
// WaitStatesNeeded here means no hazard, and we will continue to search
21482168
// for other categories.
21492169
WaitStatesNeeded =
2150-
Limit - ::getWaitStatesSince(IsWMMAHazardFn, MI->getParent(),
2151-
std::next(MI->getReverseIterator()), 0,
2152-
IsExpiredFn, Visited, GetWaitStatesFn);
2170+
Limit - getWaitStatesSince(IsWMMAHazardFn, Limit, GetWaitStatesFn);
21532171
}
21542172
} else { // Must be a co-executable VALU.
21552173
for (Category = 0; WaitStatesNeeded < 0 && Category < 4; Category++) {
21562174
Limit = VALUWaitStates[Category]; // for IsExpiredFn.
2157-
DenseSet<const MachineBasicBlock *> Visited;
2158-
// '::getWaitStatesSince' returns the number of VALUs in between if hazard
2175+
// 'getWaitStatesSince' returns the number of VALUs in between if hazard
21592176
// exists, and INT_MAX if there is no hazard. As a result, a negative
21602177
// WaitStatesNeeded here means no hazard, and we will continue to search
21612178
// for other categories.
21622179
WaitStatesNeeded =
2163-
Limit - ::getWaitStatesSince(IsVALUHazardFn, MI->getParent(),
2164-
std::next(MI->getReverseIterator()), 0,
2165-
IsExpiredFn, Visited, GetWaitStatesFn);
2180+
Limit - getWaitStatesSince(IsVALUHazardFn, Limit, GetWaitStatesFn);
21662181
}
21672182
}
21682183

2169-
// WaitStatesNeeded now is the number of V_NOPs we need to insert, negative
2170-
// means not needed.
2171-
for (int i = 0; i < WaitStatesNeeded; i++)
2172-
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
2173-
TII->get(AMDGPU::V_NOP_e32));
2174-
2175-
return true;
2184+
return WaitStatesNeeded;
21762185
}
21772186

21782187
bool GCNHazardRecognizer::fixShift64HighRegBug(MachineInstr *MI) {

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ class GCNSubtarget;
3232
class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
3333
public:
3434
typedef function_ref<bool(const MachineInstr &)> IsHazardFn;
35+
typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn;
36+
typedef function_ref<unsigned int(const MachineInstr &)> GetNumWaitStatesFn;
3537

3638
private:
3739
// Distinguish if we are called from scheduler or hazard recognizer
@@ -74,6 +76,8 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
7476
// used on a newly inserted instruction before returning from PreEmitNoops.
7577
void runOnInstruction(MachineInstr *MI);
7678

79+
int getWaitStatesSince(IsHazardFn IsHazard, int Limit,
80+
GetNumWaitStatesFn GetNumWaitStates);
7781
int getWaitStatesSince(IsHazardFn IsHazard, int Limit);
7882
int getWaitStatesSinceDef(unsigned Reg, IsHazardFn IsHazardDef, int Limit);
7983
int getWaitStatesSinceSetReg(IsHazardFn IsHazard, int Limit);
@@ -94,6 +98,9 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
9498
int checkReadM0Hazards(MachineInstr *SMovRel);
9599
int checkNSAtoVMEMHazard(MachineInstr *MI);
96100
int checkFPAtomicToDenormModeHazard(MachineInstr *MI);
101+
// Emit V_NOP instructions. \p WaitStatesNeeded is the number of V_NOPs we
102+
// need to insert, negative means not needed.
103+
bool emitVNops(MachineInstr *MI, int WaitStatesNeeded);
97104
void fixHazards(MachineInstr *MI);
98105
bool fixVcmpxPermlaneHazards(MachineInstr *MI);
99106
bool fixVMEMtoScalarWriteHazards(MachineInstr *MI);
@@ -106,7 +113,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
106113
bool fixVALUTransUseHazard(MachineInstr *MI);
107114
bool fixVALUTransCoexecutionHazards(MachineInstr *MI);
108115
bool fixWMMAHazards(MachineInstr *MI);
109-
bool fixWMMACoexecutionHazards(MachineInstr *MI);
116+
int checkWMMACoexecutionHazards(MachineInstr *MI);
110117
bool fixShift64HighRegBug(MachineInstr *MI);
111118
bool fixVALUMaskWriteHazard(MachineInstr *MI);
112119
bool fixRequiredExportPriority(MachineInstr *MI);

0 commit comments

Comments
 (0)