@@ -10618,6 +10618,42 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
1061810618 return false ;
1061910619}
1062010620
10621+ // SCC is already valid after SCCValid.
10622+ // SCCRedefine will redefine SCC to the same value already available after
10623+ // SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
10624+ // update kill/dead flags if necessary.
10625+ static bool optimizeSCC (MachineInstr *SCCValid, MachineInstr *SCCRedefine,
10626+ const SIRegisterInfo &RI) {
10627+ MachineInstr *KillsSCC = nullptr ;
10628+ for (MachineInstr &MI : make_range (std::next (SCCValid->getIterator ()),
10629+ SCCRedefine->getIterator ())) {
10630+ if (MI.modifiesRegister (AMDGPU::SCC, &RI))
10631+ return false ;
10632+ if (MI.killsRegister (AMDGPU::SCC, &RI))
10633+ KillsSCC = &MI;
10634+ }
10635+ if (MachineOperand *SccDef =
10636+ SCCValid->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr ))
10637+ SccDef->setIsDead (false );
10638+ if (KillsSCC)
10639+ KillsSCC->clearRegisterKills (AMDGPU::SCC, /* TRI=*/ nullptr );
10640+ SCCRedefine->eraseFromParent ();
10641+ return true ;
10642+ }
10643+
10644+ static bool foldableSelect (const MachineInstr &Def) {
10645+ if (Def.getOpcode () != AMDGPU::S_CSELECT_B32 &&
10646+ Def.getOpcode () != AMDGPU::S_CSELECT_B64)
10647+ return false ;
10648+ bool Op1IsNonZeroImm =
10649+ Def.getOperand (1 ).isImm () && Def.getOperand (1 ).getImm () != 0 ;
10650+ bool Op2IsZeroImm =
10651+ Def.getOperand (2 ).isImm () && Def.getOperand (2 ).getImm () == 0 ;
10652+ if (!Op1IsNonZeroImm || !Op2IsZeroImm)
10653+ return false ;
10654+ return true ;
10655+ }
10656+
1062110657bool SIInstrInfo::optimizeCompareInstr (MachineInstr &CmpInstr, Register SrcReg,
1062210658 Register SrcReg2, int64_t CmpMask,
1062310659 int64_t CmpValue,
@@ -10637,19 +10673,6 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1063710673 if (!Def || Def->getParent () != CmpInstr.getParent ())
1063810674 return false ;
1063910675
10640- const auto foldableSelect = [](MachineInstr *Def) -> bool {
10641- if (Def->getOpcode () == AMDGPU::S_CSELECT_B32 ||
10642- Def->getOpcode () == AMDGPU::S_CSELECT_B64) {
10643- bool Op1IsNonZeroImm =
10644- Def->getOperand (1 ).isImm () && Def->getOperand (1 ).getImm () != 0 ;
10645- bool Op2IsZeroImm =
10646- Def->getOperand (2 ).isImm () && Def->getOperand (2 ).getImm () == 0 ;
10647- if (Op1IsNonZeroImm && Op2IsZeroImm)
10648- return true ;
10649- }
10650- return false ;
10651- };
10652-
1065310676 // For S_OP that set SCC = DST!=0, do the transformation
1065410677 //
1065510678 // s_cmp_lg_* (S_OP ...), 0 => (S_OP ...)
@@ -10660,24 +10683,12 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1066010683 //
1066110684 // s_cmp_lg_* (S_CSELECT* (non-zero imm), 0), 0 => (S_CSELECT* (non-zero
1066210685 // imm), 0)
10663- if (!setsSCCifResultIsNonZero (*Def) && !foldableSelect (Def))
10686+ if (!setsSCCifResultIsNonZero (*Def) && !foldableSelect (* Def))
1066410687 return false ;
1066510688
10666- MachineInstr *KillsSCC = nullptr ;
10667- for (MachineInstr &MI :
10668- make_range (std::next (Def->getIterator ()), CmpInstr.getIterator ())) {
10669- if (MI.modifiesRegister (AMDGPU::SCC, &RI))
10670- return false ;
10671- if (MI.killsRegister (AMDGPU::SCC, &RI))
10672- KillsSCC = &MI;
10673- }
10689+ if (!optimizeSCC (Def, &CmpInstr, RI))
10690+ return false ;
1067410691
10675- if (MachineOperand *SccDef =
10676- Def->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr ))
10677- SccDef->setIsDead (false );
10678- if (KillsSCC)
10679- KillsSCC->clearRegisterKills (AMDGPU::SCC, /* TRI=*/ nullptr );
10680- CmpInstr.eraseFromParent ();
1068110692 return true ;
1068210693 };
1068310694
@@ -10755,21 +10766,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1075510766 if (IsReversedCC && !MRI->hasOneNonDBGUse (DefReg))
1075610767 return false ;
1075710768
10758- MachineInstr *KillsSCC = nullptr ;
10759- for (MachineInstr &MI :
10760- make_range (std::next (Def->getIterator ()), CmpInstr.getIterator ())) {
10761- if (MI.modifiesRegister (AMDGPU::SCC, &RI))
10762- return false ;
10763- if (MI.killsRegister (AMDGPU::SCC, &RI))
10764- KillsSCC = &MI;
10765- }
10766-
10767- MachineOperand *SccDef =
10768- Def->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr );
10769- SccDef->setIsDead (false );
10770- if (KillsSCC)
10771- KillsSCC->clearRegisterKills (AMDGPU::SCC, /* TRI=*/ nullptr );
10772- CmpInstr.eraseFromParent ();
10769+ if (!optimizeSCC (Def, &CmpInstr, RI))
10770+ return false ;
1077310771
1077410772 if (!MRI->use_nodbg_empty (DefReg)) {
1077510773 assert (!IsReversedCC);
0 commit comments