@@ -10212,7 +10212,7 @@ static bool followSubRegDef(MachineInstr &MI,
1021210212}
1021310213
1021410214MachineInstr *llvm::getVRegSubRegDef (const TargetInstrInfo::RegSubRegPair &P,
10215- MachineRegisterInfo &MRI) {
10215+ const MachineRegisterInfo &MRI) {
1021610216 assert (MRI.isSSA ());
1021710217 if (!P.Reg .isVirtual ())
1021810218 return nullptr ;
@@ -10748,7 +10748,31 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1074810748 if (SrcReg2 && !getFoldableImm (SrcReg2, *MRI, CmpValue))
1074910749 return false ;
1075010750
10751- const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
10751+ // SCC is already valid after SCCValid.
10752+ // SCCRedefine will redefine SCC to the same value already available after
10753+ // SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
10754+ // update kill/dead flags if necessary.
10755+ const auto optimizeSCC = [this ](MachineInstr *SCCValid,
10756+ MachineInstr *SCCRedefine) -> bool {
10757+ MachineInstr *KillsSCC = nullptr ;
10758+ for (MachineInstr &MI : make_range (std::next (SCCValid->getIterator ()),
10759+ SCCRedefine->getIterator ())) {
10760+ if (MI.modifiesRegister (AMDGPU::SCC, &RI))
10761+ return false ;
10762+ if (MI.killsRegister (AMDGPU::SCC, &RI))
10763+ KillsSCC = &MI;
10764+ }
10765+ if (MachineOperand *SccDef =
10766+ SCCValid->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr ))
10767+ SccDef->setIsDead (false );
10768+ if (KillsSCC)
10769+ KillsSCC->clearRegisterKills (AMDGPU::SCC, /* TRI=*/ nullptr );
10770+ SCCRedefine->eraseFromParent ();
10771+
10772+ return true ;
10773+ };
10774+
10775+ const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
1075210776 this ]() -> bool {
1075310777 if (CmpValue != 0 )
1075410778 return false ;
@@ -10783,25 +10807,32 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1078310807 if (!setsSCCifResultIsNonZero (*Def) && !foldableSelect (Def))
1078410808 return false ;
1078510809
10786- MachineInstr *KillsSCC = nullptr ;
10787- for (MachineInstr &MI :
10788- make_range (std::next (Def->getIterator ()), CmpInstr.getIterator ())) {
10789- if (MI.modifiesRegister (AMDGPU::SCC, &RI))
10790- return false ;
10791- if (MI.killsRegister (AMDGPU::SCC, &RI))
10792- KillsSCC = &MI;
10793- }
10810+ if (!optimizeSCC (Def, &CmpInstr))
10811+ return false ;
1079410812
10795- if (MachineOperand *SccDef =
10796- Def->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr ))
10797- SccDef->setIsDead (false );
10798- if (KillsSCC)
10799- KillsSCC->clearRegisterKills (AMDGPU::SCC, /* TRI=*/ nullptr );
10800- CmpInstr.eraseFromParent ();
10813+ // If s_or_32 result is unused (i.e. it is effectively a 64-bit s_cmp_lg of
10814+ // a register pair) and the input is a 64-bit foldableSelect then transform:
10815+ //
10816+ // (s_or_b32 (S_CSELECT_B64 (non-zero imm), 0), 0 => (S_CSELECT_B64
10817+ // (non-zero
10818+ // imm), 0)
10819+ if (Def->getOpcode () == AMDGPU::S_OR_B32 &&
10820+ MRI->use_nodbg_empty (Def->getOperand (0 ).getReg ())) {
10821+ MachineOperand OrOpnd1 = Def->getOperand (1 );
10822+ MachineOperand OrOpnd2 = Def->getOperand (2 );
10823+
10824+ if (OrOpnd1.isReg () && OrOpnd2.isReg () &&
10825+ OrOpnd1.getReg () != OrOpnd2.getReg ()) {
10826+ auto *Def1 = getVRegSubRegDef (getRegSubRegPair (OrOpnd1), *MRI);
10827+ auto *Def2 = getVRegSubRegDef (getRegSubRegPair (OrOpnd2), *MRI);
10828+ if (Def1 == Def2 && foldableSelect (Def1))
10829+ optimizeSCC (Def1, Def);
10830+ }
10831+ }
1080110832 return true ;
1080210833 };
1080310834
10804- const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
10835+ const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
1080510836 this ](int64_t ExpectedValue, unsigned SrcSize,
1080610837 bool IsReversible, bool IsSigned) -> bool {
1080710838 // s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
@@ -10875,21 +10906,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1087510906 if (IsReversedCC && !MRI->hasOneNonDBGUse (DefReg))
1087610907 return false ;
1087710908
10878- MachineInstr *KillsSCC = nullptr ;
10879- for (MachineInstr &MI :
10880- make_range (std::next (Def->getIterator ()), CmpInstr.getIterator ())) {
10881- if (MI.modifiesRegister (AMDGPU::SCC, &RI))
10882- return false ;
10883- if (MI.killsRegister (AMDGPU::SCC, &RI))
10884- KillsSCC = &MI;
10885- }
10886-
10887- MachineOperand *SccDef =
10888- Def->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr );
10889- SccDef->setIsDead (false );
10890- if (KillsSCC)
10891- KillsSCC->clearRegisterKills (AMDGPU::SCC, /* TRI=*/ nullptr );
10892- CmpInstr.eraseFromParent ();
10909+ if (!optimizeSCC (Def, &CmpInstr))
10910+ return false ;
1089310911
1089410912 if (!MRI->use_nodbg_empty (DefReg)) {
1089510913 assert (!IsReversedCC);
0 commit comments