@@ -190,6 +190,21 @@ class AMDGPUWaitSGPRHazards {
190190 return Mask;
191191 }
192192
193+ bool mergeSubsequentWaitAlus (MachineBasicBlock::instr_iterator &MI,
194+ unsigned Mask) {
195+ auto MBB = MI->getParent ();
196+ if (MI != MBB->instr_begin ()) {
197+ MachineBasicBlock::instr_iterator It = std::prev (MI);
198+ while (It != MBB->instr_begin () && It->isDebugInstr ())
199+ --It;
200+ if (It->getOpcode () == AMDGPU::S_WAITCNT_DEPCTR) {
201+ It->getOperand (0 ).setImm (mergeMasks (Mask, It->getOperand (0 ).getImm ()));
202+ return true ;
203+ }
204+ }
205+ return false ;
206+ }
207+
193208 bool runOnMachineBasicBlock (MachineBasicBlock &MBB, bool Emit) {
194209 enum { WA_VALU = 0x1 , WA_SALU = 0x2 , WA_VCC = 0x4 };
195210
@@ -388,21 +403,12 @@ class AMDGPUWaitSGPRHazards {
388403 Mask = AMDGPU::DepCtr::encodeFieldVaSdst (Mask, 0 );
389404 }
390405 if (Emit) {
391- if (MI != MBB.instr_begin ()) {
392- MachineBasicBlock::instr_iterator It = std::prev (MI);
393- while (It != MBB.instr_begin () && It->isDebugInstr ())
394- --It;
395- if (It->getOpcode () == AMDGPU::S_WAITCNT_DEPCTR) {
396- Mask = mergeMasks (Mask, It->getOperand (0 ).getImm ());
397- It->getOperand (0 ).setImm (Mask);
398- continue ;
399- }
406+ if (!mergeSubsequentWaitAlus (MI, Mask)) {
407+ auto NewMI = BuildMI (MBB, MI, MI->getDebugLoc (),
408+ TII->get (AMDGPU::S_WAITCNT_DEPCTR))
409+ .addImm (Mask);
410+ updateGetPCBundle (NewMI);
400411 }
401-
402- auto NewMI = BuildMI (MBB, MI, MI->getDebugLoc (),
403- TII->get (AMDGPU::S_WAITCNT_DEPCTR))
404- .addImm (Mask);
405- updateGetPCBundle (NewMI);
406412 Emitted = true ;
407413 }
408414 }
0 commit comments