@@ -437,9 +437,6 @@ void GCNHazardRecognizer::RecedeCycle() {
437437
438438enum HazardFnResult { HazardFound, HazardExpired, NoHazardFound };
439439
440- using IsExpiredFn = function_ref<bool (const MachineInstr &, int WaitStates)>;
441- using GetNumWaitStatesFn = function_ref<unsigned int (const MachineInstr &)>;
442-
443440// Search for a hazard in a block and its predecessors.
444441template <typename StateT>
445442static bool
@@ -546,11 +543,14 @@ hasHazard(StateT InitialState,
546543// Returns a minimum wait states since \p I walking all predecessors.
547544// Only scans until \p IsExpired does not return true.
548545// Can only be run in a hazard recognizer mode.
549- static int getWaitStatesSince (
550- GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB,
551- MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates,
552- IsExpiredFn IsExpired, DenseSet<const MachineBasicBlock *> &Visited,
553- GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) {
546+ static int
547+ getWaitStatesSince (GCNHazardRecognizer::IsHazardFn IsHazard,
548+ const MachineBasicBlock *MBB,
549+ MachineBasicBlock::const_reverse_instr_iterator I,
550+ int WaitStates, GCNHazardRecognizer::IsExpiredFn IsExpired,
551+ DenseSet<const MachineBasicBlock *> &Visited,
552+ GCNHazardRecognizer::GetNumWaitStatesFn GetNumWaitStates =
553+ SIInstrInfo::getNumWaitStates) {
554554 for (auto E = MBB->instr_rend (); I != E; ++I) {
555555 // Don't add WaitStates for parent BUNDLE instructions.
556556 if (I->isBundle ())
@@ -582,20 +582,26 @@ static int getWaitStatesSince(
582582 return MinWaitStates;
583583}
584584
585- static int getWaitStatesSince (GCNHazardRecognizer::IsHazardFn IsHazard,
586- const MachineInstr *MI, IsExpiredFn IsExpired) {
585+ static int
586+ getWaitStatesSince (GCNHazardRecognizer::IsHazardFn IsHazard,
587+ const MachineInstr *MI,
588+ GCNHazardRecognizer::IsExpiredFn IsExpired,
589+ GCNHazardRecognizer::GetNumWaitStatesFn GetNumWaitStates =
590+ SIInstrInfo::getNumWaitStates) {
587591 DenseSet<const MachineBasicBlock *> Visited;
588592 return getWaitStatesSince (IsHazard, MI->getParent (),
589593 std::next (MI->getReverseIterator ()), 0 , IsExpired,
590- Visited, SIInstrInfo::getNumWaitStates );
594+ Visited, GetNumWaitStates );
591595}
592596
593- int GCNHazardRecognizer::getWaitStatesSince (IsHazardFn IsHazard, int Limit) {
597+ int GCNHazardRecognizer::getWaitStatesSince (
598+ IsHazardFn IsHazard, int Limit, GetNumWaitStatesFn GetNumWaitStates) {
594599 if (IsHazardRecognizerMode) {
595600 auto IsExpiredFn = [Limit](const MachineInstr &, int WaitStates) {
596601 return WaitStates >= Limit;
597602 };
598- return ::getWaitStatesSince (IsHazard, CurrCycleInstr, IsExpiredFn);
603+ return ::getWaitStatesSince (IsHazard, CurrCycleInstr, IsExpiredFn,
604+ GetNumWaitStates);
599605 }
600606
601607 int WaitStates = 0 ;
@@ -607,14 +613,18 @@ int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
607613 if (MI->isInlineAsm ())
608614 continue ;
609615 }
610- ++ WaitStates;
616+ WaitStates += MI ? GetNumWaitStates (*MI) : 1 ;
611617
612618 if (WaitStates >= Limit)
613619 break ;
614620 }
615621 return std::numeric_limits<int >::max ();
616622}
617623
624+ int GCNHazardRecognizer::getWaitStatesSince (IsHazardFn IsHazard, int Limit) {
625+ return getWaitStatesSince (IsHazard, Limit, SIInstrInfo::getNumWaitStates);
626+ }
627+
618628int GCNHazardRecognizer::getWaitStatesSinceDef (unsigned Reg,
619629 IsHazardFn IsHazardDef,
620630 int Limit) {
@@ -1243,6 +1253,20 @@ int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
12431253 getWaitStatesSinceDef (AMDGPU::M0, IsHazardFn, ReadM0WaitStates);
12441254}
12451255
1256+ // emit V_NOP instructions. \p WaitStatesNeeded is the number of V_NOPs we need
1257+ // to insert, negative means not needed.
1258+ bool GCNHazardRecognizer::emitVNops (MachineInstr *MI, int WaitStatesNeeded) {
1259+ if (WaitStatesNeeded <= 0 )
1260+ return false ;
1261+
1262+ const SIInstrInfo *TII = ST.getInstrInfo ();
1263+ for (int I = 0 ; I < WaitStatesNeeded; ++I)
1264+ BuildMI (*MI->getParent (), MI, MI->getDebugLoc (),
1265+ TII->get (AMDGPU::V_NOP_e32));
1266+
1267+ return true ;
1268+ }
1269+
12461270void GCNHazardRecognizer::fixHazards (MachineInstr *MI) {
12471271 fixVMEMtoScalarWriteHazards (MI);
12481272 fixVcmpxPermlaneHazards (MI);
@@ -1257,7 +1281,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
12571281 fixVALUTransUseHazard (MI);
12581282 fixVALUTransCoexecutionHazards (MI);
12591283 fixWMMAHazards (MI); // fall-through if co-execution is enabled.
1260- fixWMMACoexecutionHazards (MI);
1284+ emitVNops (MI, checkWMMACoexecutionHazards (MI) );
12611285 fixShift64HighRegBug (MI);
12621286 fixVALUMaskWriteHazard (MI);
12631287 fixRequiredExportPriority (MI);
@@ -2045,13 +2069,13 @@ static bool IsWMMAHazardInstInCategory(const MachineInstr &MI,
20452069 return false ;
20462070}
20472071
2048- bool GCNHazardRecognizer::fixWMMACoexecutionHazards (MachineInstr *MI) {
2072+ int GCNHazardRecognizer::checkWMMACoexecutionHazards (MachineInstr *MI) {
20492073 if (!AMDGPU::isGFX1250 (ST))
2050- return false ;
2074+ return 0 ;
20512075
20522076 const SIInstrInfo *TII = ST.getInstrInfo ();
20532077 if (!TII->isXDLWMMA (*MI) && !isCoexecutableVALUInst (*MI))
2054- return false ;
2078+ return 0 ;
20552079
20562080 const SIRegisterInfo *TRI = ST.getRegisterInfo ();
20572081
@@ -2129,9 +2153,6 @@ bool GCNHazardRecognizer::fixWMMACoexecutionHazards(MachineInstr *MI) {
21292153 };
21302154
21312155 int Limit = 0 ;
2132- auto IsExpiredFn = [&Limit](const MachineInstr &, int WaitStates) {
2133- return WaitStates >= Limit;
2134- };
21352156
21362157 auto GetWaitStatesFn = [](const MachineInstr &I) {
21372158 return SIInstrInfo::isVALU (I) ? 1 : 0 ;
@@ -2141,38 +2162,26 @@ bool GCNHazardRecognizer::fixWMMACoexecutionHazards(MachineInstr *MI) {
21412162 if (TII->isXDLWMMA (*MI)) {
21422163 for (Category = 0 ; WaitStatesNeeded < 0 && Category < 4 ; Category++) {
21432164 Limit = WMMAWaitStates[Category]; // for IsExpiredFn.
2144- DenseSet<const MachineBasicBlock *> Visited;
2145- // '::getWaitStatesSince' returns the number of VALUs in between if hazard
2165+ // 'getWaitStatesSince' returns the number of VALUs in between if hazard
21462166 // exists, and INT_MAX if there is no hazard. As a result, a negative
21472167 // WaitStatesNeeded here means no hazard, and we will continue to search
21482168 // for other categories.
21492169 WaitStatesNeeded =
2150- Limit - ::getWaitStatesSince (IsWMMAHazardFn, MI->getParent (),
2151- std::next (MI->getReverseIterator ()), 0 ,
2152- IsExpiredFn, Visited, GetWaitStatesFn);
2170+ Limit - getWaitStatesSince (IsWMMAHazardFn, Limit, GetWaitStatesFn);
21532171 }
21542172 } else { // Must be a co-executable VALU.
21552173 for (Category = 0 ; WaitStatesNeeded < 0 && Category < 4 ; Category++) {
21562174 Limit = VALUWaitStates[Category]; // for IsExpiredFn.
2157- DenseSet<const MachineBasicBlock *> Visited;
2158- // '::getWaitStatesSince' returns the number of VALUs in between if hazard
2175+ // 'getWaitStatesSince' returns the number of VALUs in between if hazard
21592176 // exists, and INT_MAX if there is no hazard. As a result, a negative
21602177 // WaitStatesNeeded here means no hazard, and we will continue to search
21612178 // for other categories.
21622179 WaitStatesNeeded =
2163- Limit - ::getWaitStatesSince (IsVALUHazardFn, MI->getParent (),
2164- std::next (MI->getReverseIterator ()), 0 ,
2165- IsExpiredFn, Visited, GetWaitStatesFn);
2180+ Limit - getWaitStatesSince (IsVALUHazardFn, Limit, GetWaitStatesFn);
21662181 }
21672182 }
21682183
2169- // WaitStatesNeeded now is the number of V_NOPs we need to insert, negative
2170- // means not needed.
2171- for (int i = 0 ; i < WaitStatesNeeded; i++)
2172- BuildMI (*MI->getParent (), MI, MI->getDebugLoc (),
2173- TII->get (AMDGPU::V_NOP_e32));
2174-
2175- return true ;
2184+ return WaitStatesNeeded;
21762185}
21772186
21782187bool GCNHazardRecognizer::fixShift64HighRegBug (MachineInstr *MI) {
0 commit comments