@@ -10618,6 +10618,29 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
1061810618 return false ;
1061910619}
1062010620
10621+ // SCC is already valid after SCCValid.
10622+ // SCCRedefine will redefine SCC to the same value already available after
10623+ // SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
10624+ // update kill/dead flags if necessary.
10625+ static bool optimizeSCC (MachineInstr *SCCValid, MachineInstr *SCCRedefine,
10626+ const SIRegisterInfo *RI) {
10627+ MachineInstr *KillsSCC = nullptr ;
10628+ for (MachineInstr &MI : make_range (std::next (SCCValid->getIterator ()),
10629+ SCCRedefine->getIterator ())) {
10630+ if (MI.modifiesRegister (AMDGPU::SCC, RI))
10631+ return false ;
10632+ if (MI.killsRegister (AMDGPU::SCC, RI))
10633+ KillsSCC = &MI;
10634+ }
10635+ if (MachineOperand *SccDef =
10636+ SCCValid->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr ))
10637+ SccDef->setIsDead (false );
10638+ if (KillsSCC)
10639+ KillsSCC->clearRegisterKills (AMDGPU::SCC, /* TRI=*/ nullptr );
10640+ SCCRedefine->eraseFromParent ();
10641+ return true ;
10642+ }
10643+
1062110644bool SIInstrInfo::optimizeCompareInstr (MachineInstr &CmpInstr, Register SrcReg,
1062210645 Register SrcReg2, int64_t CmpMask,
1062310646 int64_t CmpValue,
@@ -10628,32 +10651,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1062810651 if (SrcReg2 && !getFoldableImm (SrcReg2, *MRI, CmpValue))
1062910652 return false ;
1063010653
10631- // SCC is already valid after SCCValid.
10632- // SCCRedefine will redefine SCC to the same value already available after
10633- // SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
10634- // update kill/dead flags if necessary.
10635- const auto optimizeSCC = [this ](MachineInstr *SCCValid,
10636- MachineInstr *SCCRedefine) -> bool {
10637- MachineInstr *KillsSCC = nullptr ;
10638- for (MachineInstr &MI : make_range (std::next (SCCValid->getIterator ()),
10639- SCCRedefine->getIterator ())) {
10640- if (MI.modifiesRegister (AMDGPU::SCC, &RI))
10641- return false ;
10642- if (MI.killsRegister (AMDGPU::SCC, &RI))
10643- KillsSCC = &MI;
10644- }
10645- if (MachineOperand *SccDef =
10646- SCCValid->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr ))
10647- SccDef->setIsDead (false );
10648- if (KillsSCC)
10649- KillsSCC->clearRegisterKills (AMDGPU::SCC, /* TRI=*/ nullptr );
10650- SCCRedefine->eraseFromParent ();
10651-
10652- return true ;
10653- };
10654-
1065510654 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
10656- optimizeSCC ]() -> bool {
10655+ this ]() -> bool {
1065710656 if (CmpValue != 0 )
1065810657 return false ;
1065910658
@@ -10687,13 +10686,13 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1068710686 if (!setsSCCifResultIsNonZero (*Def) && !foldableSelect (Def))
1068810687 return false ;
1068910688
10690- if (!optimizeSCC (Def, &CmpInstr))
10689+ if (!optimizeSCC (Def, &CmpInstr, &RI ))
1069110690 return false ;
1069210691
1069310692 return true ;
1069410693 };
1069510694
10696- const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
10695+ const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
1069710696 this ](int64_t ExpectedValue, unsigned SrcSize,
1069810697 bool IsReversible, bool IsSigned) -> bool {
1069910698 // s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
@@ -10767,7 +10766,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1076710766 if (IsReversedCC && !MRI->hasOneNonDBGUse (DefReg))
1076810767 return false ;
1076910768
10770- if (!optimizeSCC (Def, &CmpInstr))
10769+ if (!optimizeSCC (Def, &CmpInstr, &RI ))
1077110770 return false ;
1077210771
1077310772 if (!MRI->use_nodbg_empty (DefReg)) {
0 commit comments