@@ -10577,6 +10577,73 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1057710577 if (SrcReg2 && !getFoldableImm (SrcReg2, *MRI, CmpValue))
1057810578 return false ;
1057910579
10580+ const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
10581+ this ]() -> bool {
10582+ if (CmpValue != 0 )
10583+ return false ;
10584+
10585+ MachineInstr *Def = MRI->getUniqueVRegDef (SrcReg);
10586+ if (!Def || Def->getParent () != CmpInstr.getParent ())
10587+ return false ;
10588+
10589+ if (!(Def->getOpcode () == AMDGPU::S_LSHL_B32 ||
10590+ Def->getOpcode () == AMDGPU::S_LSHL_B64 ||
10591+ Def->getOpcode () == AMDGPU::S_LSHR_B32 ||
10592+ Def->getOpcode () == AMDGPU::S_LSHR_B64 ||
10593+ Def->getOpcode () == AMDGPU::S_AND_B32 ||
10594+ Def->getOpcode () == AMDGPU::S_AND_B64 ||
10595+ Def->getOpcode () == AMDGPU::S_OR_B32 ||
10596+ Def->getOpcode () == AMDGPU::S_OR_B64 ||
10597+ Def->getOpcode () == AMDGPU::S_XOR_B32 ||
10598+ Def->getOpcode () == AMDGPU::S_XOR_B64 ||
10599+ Def->getOpcode () == AMDGPU::S_NAND_B32 ||
10600+ Def->getOpcode () == AMDGPU::S_NAND_B64 ||
10601+ Def->getOpcode () == AMDGPU::S_NOR_B32 ||
10602+ Def->getOpcode () == AMDGPU::S_NOR_B64 ||
10603+ Def->getOpcode () == AMDGPU::S_XNOR_B32 ||
10604+ Def->getOpcode () == AMDGPU::S_XNOR_B64 ||
10605+ Def->getOpcode () == AMDGPU::S_ANDN2_B32 ||
10606+ Def->getOpcode () == AMDGPU::S_ANDN2_B64 ||
10607+ Def->getOpcode () == AMDGPU::S_ORN2_B32 ||
10608+ Def->getOpcode () == AMDGPU::S_ORN2_B64 ||
10609+ Def->getOpcode () == AMDGPU::S_BFE_I32 ||
10610+ Def->getOpcode () == AMDGPU::S_BFE_I64 ||
10611+ Def->getOpcode () == AMDGPU::S_BFE_U32 ||
10612+ Def->getOpcode () == AMDGPU::S_BFE_U64 ||
10613+ Def->getOpcode () == AMDGPU::S_BCNT0_I32_B32 ||
10614+ Def->getOpcode () == AMDGPU::S_BCNT0_I32_B64 ||
10615+ Def->getOpcode () == AMDGPU::S_BCNT1_I32_B32 ||
10616+ Def->getOpcode () == AMDGPU::S_BCNT1_I32_B64 ||
10617+ Def->getOpcode () == AMDGPU::S_QUADMASK_B32 ||
10618+ Def->getOpcode () == AMDGPU::S_QUADMASK_B64 ||
10619+ Def->getOpcode () == AMDGPU::S_NOT_B32 ||
10620+ Def->getOpcode () == AMDGPU::S_NOT_B64 ||
10621+
10622+ ((Def->getOpcode () == AMDGPU::S_CSELECT_B32 ||
10623+ Def->getOpcode () == AMDGPU::S_CSELECT_B64) &&
10624+ Def->getOperand (1 ).isImm () && Def->getOperand (1 ).getImm () &&
10625+ !Def->getOperand (2 ).isImm () && !Def->getOperand (2 ).getImm ())))
10626+ return false ;
10627+
10628+ for (auto I = std::next (Def->getIterator ()), E = CmpInstr.getIterator ();
10629+ I != E; ++I) {
10630+ if (I->modifiesRegister (AMDGPU::SCC, &RI) ||
10631+ I->killsRegister (AMDGPU::SCC, &RI))
10632+ return false ;
10633+ }
10634+
10635+ if (!(Def->getOpcode () == AMDGPU::S_CSELECT_B32 ||
10636+ Def->getOpcode () == AMDGPU::S_CSELECT_B64)) {
10637+ MachineOperand *SccDef =
10638+ Def->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr );
10639+ assert (SccDef && " Def instruction must define SCC" );
10640+ SccDef->setIsDead (false );
10641+ }
10642+
10643+ CmpInstr.eraseFromParent ();
10644+ return true ;
10645+ };
10646+
1058010647 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
1058110648 this ](int64_t ExpectedValue, unsigned SrcSize,
1058210649 bool IsReversible, bool IsSigned) -> bool {
@@ -10704,15 +10771,15 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1070410771 case AMDGPU::S_CMP_LG_I32:
1070510772 case AMDGPU::S_CMPK_LG_U32:
1070610773 case AMDGPU::S_CMPK_LG_I32:
10707- return optimizeCmpAnd (0 , 32 , true , false );
10774+ return optimizeCmpAnd (0 , 32 , true , false ) || optimizeCmpSelect () ;
1070810775 case AMDGPU::S_CMP_GT_U32:
1070910776 case AMDGPU::S_CMPK_GT_U32:
1071010777 return optimizeCmpAnd (0 , 32 , false , false );
1071110778 case AMDGPU::S_CMP_GT_I32:
1071210779 case AMDGPU::S_CMPK_GT_I32:
1071310780 return optimizeCmpAnd (0 , 32 , false , true );
1071410781 case AMDGPU::S_CMP_LG_U64:
10715- return optimizeCmpAnd (0 , 64 , true , false );
10782+ return optimizeCmpAnd (0 , 64 , true , false ) || optimizeCmpSelect () ;
1071610783 }
1071710784
1071810785 return false ;
0 commit comments