@@ -10628,7 +10628,31 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1062810628 if (SrcReg2 && !getFoldableImm (SrcReg2, *MRI, CmpValue))
1062910629 return false ;
1063010630
10631- const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
10631+ // SCC is already valid after SCCValid.
10632+ // SCCRedefine will redefine SCC to the same value already available after
10633+ // SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
10634+ // update kill/dead flags if necessary.
10635+ const auto optimizeSCC = [this ](MachineInstr *SCCValid,
10636+ MachineInstr *SCCRedefine) -> bool {
10637+ MachineInstr *KillsSCC = nullptr ;
10638+ for (MachineInstr &MI : make_range (std::next (SCCValid->getIterator ()),
10639+ SCCRedefine->getIterator ())) {
10640+ if (MI.modifiesRegister (AMDGPU::SCC, &RI))
10641+ return false ;
10642+ if (MI.killsRegister (AMDGPU::SCC, &RI))
10643+ KillsSCC = &MI;
10644+ }
10645+ if (MachineOperand *SccDef =
10646+ SCCValid->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr ))
10647+ SccDef->setIsDead (false );
10648+ if (KillsSCC)
10649+ KillsSCC->clearRegisterKills (AMDGPU::SCC, /* TRI=*/ nullptr );
10650+ SCCRedefine->eraseFromParent ();
10651+
10652+ return true ;
10653+ };
10654+
10655+ const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
1063210656 this ]() -> bool {
1063310657 if (CmpValue != 0 )
1063410658 return false ;
@@ -10663,25 +10687,13 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1066310687 if (!setsSCCifResultIsNonZero (*Def) && !foldableSelect (Def))
1066410688 return false ;
1066510689
10666- MachineInstr *KillsSCC = nullptr ;
10667- for (MachineInstr &MI :
10668- make_range (std::next (Def->getIterator ()), CmpInstr.getIterator ())) {
10669- if (MI.modifiesRegister (AMDGPU::SCC, &RI))
10670- return false ;
10671- if (MI.killsRegister (AMDGPU::SCC, &RI))
10672- KillsSCC = &MI;
10673- }
10690+ if (!optimizeSCC (Def, &CmpInstr))
10691+ return false ;
1067410692
10675- if (MachineOperand *SccDef =
10676- Def->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr ))
10677- SccDef->setIsDead (false );
10678- if (KillsSCC)
10679- KillsSCC->clearRegisterKills (AMDGPU::SCC, /* TRI=*/ nullptr );
10680- CmpInstr.eraseFromParent ();
1068110693 return true ;
1068210694 };
1068310695
10684- const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
10696+ const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
1068510697 this ](int64_t ExpectedValue, unsigned SrcSize,
1068610698 bool IsReversible, bool IsSigned) -> bool {
1068710699 // s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
@@ -10755,21 +10767,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1075510767 if (IsReversedCC && !MRI->hasOneNonDBGUse (DefReg))
1075610768 return false ;
1075710769
10758- MachineInstr *KillsSCC = nullptr ;
10759- for (MachineInstr &MI :
10760- make_range (std::next (Def->getIterator ()), CmpInstr.getIterator ())) {
10761- if (MI.modifiesRegister (AMDGPU::SCC, &RI))
10762- return false ;
10763- if (MI.killsRegister (AMDGPU::SCC, &RI))
10764- KillsSCC = &MI;
10765- }
10766-
10767- MachineOperand *SccDef =
10768- Def->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr );
10769- SccDef->setIsDead (false );
10770- if (KillsSCC)
10771- KillsSCC->clearRegisterKills (AMDGPU::SCC, /* TRI=*/ nullptr );
10772- CmpInstr.eraseFromParent ();
10770+ if (!optimizeSCC (Def, &CmpInstr))
10771+ return false ;
1077310772
1077410773 if (!MRI->use_nodbg_empty (DefReg)) {
1077510774 assert (!IsReversedCC);
0 commit comments