@@ -10586,43 +10586,38 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1058610586 if (!Def || Def->getParent () != CmpInstr.getParent ())
1058710587 return false ;
1058810588
10589- if (!(Def->getOpcode () == AMDGPU::S_LSHL_B32 ||
10590- Def->getOpcode () == AMDGPU::S_LSHL_B64 ||
10591- Def->getOpcode () == AMDGPU::S_LSHR_B32 ||
10592- Def->getOpcode () == AMDGPU::S_LSHR_B64 ||
10593- Def->getOpcode () == AMDGPU::S_AND_B32 ||
10594- Def->getOpcode () == AMDGPU::S_AND_B64 ||
10595- Def->getOpcode () == AMDGPU::S_OR_B32 ||
10596- Def->getOpcode () == AMDGPU::S_OR_B64 ||
10597- Def->getOpcode () == AMDGPU::S_XOR_B32 ||
10598- Def->getOpcode () == AMDGPU::S_XOR_B64 ||
10599- Def->getOpcode () == AMDGPU::S_NAND_B32 ||
10600- Def->getOpcode () == AMDGPU::S_NAND_B64 ||
10601- Def->getOpcode () == AMDGPU::S_NOR_B32 ||
10602- Def->getOpcode () == AMDGPU::S_NOR_B64 ||
10603- Def->getOpcode () == AMDGPU::S_XNOR_B32 ||
10604- Def->getOpcode () == AMDGPU::S_XNOR_B64 ||
10605- Def->getOpcode () == AMDGPU::S_ANDN2_B32 ||
10606- Def->getOpcode () == AMDGPU::S_ANDN2_B64 ||
10607- Def->getOpcode () == AMDGPU::S_ORN2_B32 ||
10608- Def->getOpcode () == AMDGPU::S_ORN2_B64 ||
10609- Def->getOpcode () == AMDGPU::S_BFE_I32 ||
10610- Def->getOpcode () == AMDGPU::S_BFE_I64 ||
10611- Def->getOpcode () == AMDGPU::S_BFE_U32 ||
10612- Def->getOpcode () == AMDGPU::S_BFE_U64 ||
10613- Def->getOpcode () == AMDGPU::S_BCNT0_I32_B32 ||
10614- Def->getOpcode () == AMDGPU::S_BCNT0_I32_B64 ||
10615- Def->getOpcode () == AMDGPU::S_BCNT1_I32_B32 ||
10616- Def->getOpcode () == AMDGPU::S_BCNT1_I32_B64 ||
10617- Def->getOpcode () == AMDGPU::S_QUADMASK_B32 ||
10618- Def->getOpcode () == AMDGPU::S_QUADMASK_B64 ||
10619- Def->getOpcode () == AMDGPU::S_NOT_B32 ||
10620- Def->getOpcode () == AMDGPU::S_NOT_B64 ||
10621-
10622- ((Def->getOpcode () == AMDGPU::S_CSELECT_B32 ||
10623- Def->getOpcode () == AMDGPU::S_CSELECT_B64) &&
10624- Def->getOperand (1 ).isImm () && Def->getOperand (1 ).getImm () &&
10625- !Def->getOperand (2 ).isImm () && !Def->getOperand (2 ).getImm ())))
10589+ bool CanOptimize = false ;
10590+ MachineOperand *SccDef =
10591+ Def->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr );
10592+
10593+ // For S_OP that set SCC = DST!=0, do the transformation
10594+ //
10595+ // s_cmp_lg_* (S_OP ...), 0 => (S_OP ...)
10596+ if (SccDef && Def->getOpcode () != AMDGPU::S_ADD_I32 &&
10597+ Def->getOpcode () != AMDGPU::S_ADD_U32 &&
10598+ Def->getOpcode () != AMDGPU::S_ADDC_U32 &&
10599+ Def->getOpcode () != AMDGPU::S_SUB_I32 &&
10600+ Def->getOpcode () != AMDGPU::S_SUB_U32 &&
10601+ Def->getOpcode () != AMDGPU::S_SUBB_U32 &&
10602+ Def->getOpcode () != AMDGPU::S_MIN_I32 &&
10603+ Def->getOpcode () != AMDGPU::S_MIN_U32 &&
10604+ Def->getOpcode () != AMDGPU::S_MAX_I32 &&
10605+ Def->getOpcode () != AMDGPU::S_MAX_U32 &&
10606+ Def->getOpcode () != AMDGPU::S_ADDK_I32)
10607+ CanOptimize = true ;
10608+
10609+ // s_cmp_lg_* is redundant because the SCC input value for S_CSELECT* has
10610+ // the same value that will be calculated by s_cmp_lg_*
10611+ //
10612+ // s_cmp_lg_* (S_CSELECT* (non-zero imm), 0), 0 => (S_CSELECT* (non-zero
10613+ // imm), 0)
10614+ if ((Def->getOpcode () == AMDGPU::S_CSELECT_B32 ||
10615+ Def->getOpcode () == AMDGPU::S_CSELECT_B64) &&
10616+ Def->getOperand (1 ).isImm () && Def->getOperand (1 ).getImm () &&
10617+ !Def->getOperand (2 ).isImm () && !Def->getOperand (2 ).getImm ())
10618+ CanOptimize = true ;
10619+
10620+ if (!CanOptimize)
1062610621 return false ;
1062710622
1062810623 for (auto I = std::next (Def->getIterator ()), E = CmpInstr.getIterator ();
@@ -10632,13 +10627,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1063210627 return false ;
1063310628 }
1063410629
10635- if (!(Def->getOpcode () == AMDGPU::S_CSELECT_B32 ||
10636- Def->getOpcode () == AMDGPU::S_CSELECT_B64)) {
10637- MachineOperand *SccDef =
10638- Def->findRegisterDefOperand (AMDGPU::SCC, /* TRI=*/ nullptr );
10639- assert (SccDef && " Def instruction must define SCC" );
10630+ if (SccDef)
1064010631 SccDef->setIsDead (false );
10641- }
1064210632
1064310633 CmpInstr.eraseFromParent ();
1064410634 return true ;
0 commit comments