@@ -10737,16 +10737,18 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
1073710737 return false ;
1073810738}
1073910739
10740+ // Invert all uses of SCC following SCCDef because SCCDef may be deleted and
10741+ // (incoming SCC) = !(SCC defined by SCCDef).
10742+ // Return true if all uses can be re-written, false otherwise.
1074010743bool SIInstrInfo::invertSCCUse (MachineInstr *SCCDef) const {
1074110744 MachineBasicBlock *MBB = SCCDef->getParent ();
10742- const SIRegisterInfo *TRI = ST.getRegisterInfo ();
10743- SmallVector<MachineInstr *, 2 > InvertInstr;
10745+ SmallVector<MachineInstr *> InvertInstr;
1074410746 bool SCCIsDead = false ;
1074510747
1074610748 // Scan instructions for SCC uses that need to be inverted until SCC is dead.
1074710749 for (MachineInstr &MI :
1074810750 make_range (std::next (MachineBasicBlock::iterator (SCCDef)), MBB->end ())) {
10749- if (MI.readsRegister (AMDGPU::SCC, TRI )) {
10751+ if (MI.readsRegister (AMDGPU::SCC, &RI )) {
1075010752 if (MI.getOpcode () == AMDGPU::S_CSELECT_B32 ||
1075110753 MI.getOpcode () == AMDGPU::S_CSELECT_B64 ||
1075210754 MI.getOpcode () == AMDGPU::S_CBRANCH_SCC0 ||
@@ -10755,18 +10757,18 @@ bool SIInstrInfo::invertSCCUse(MachineInstr *SCCDef) const {
1075510757 else
1075610758 return false ;
1075710759 }
10758- if (MI.definesRegister (AMDGPU::SCC, TRI)) {
10760+ if (MI.definesRegister (AMDGPU::SCC, &RI) ||
10761+ MI.killsRegister (AMDGPU::SCC, &RI)) {
1075910762 SCCIsDead = true ;
1076010763 break ;
1076110764 }
1076210765 }
1076310766
10764- const MachineRegisterInfo &MRI =
10765- SCCDef->getParent ()->getParent ()->getRegInfo ();
10767+ const MachineRegisterInfo &MRI = SCCDef->getMF ()->getRegInfo ();
1076610768 // If SCC is still live, verify that it is not live past the end of this
1076710769 // block.
1076810770 if (!SCCIsDead && MRI.tracksLiveness ())
10769- SCCIsDead = MBB->computeRegisterLiveness (TRI , AMDGPU::SCC, MBB->end (), 0 ) ==
10771+ SCCIsDead = MBB->computeRegisterLiveness (&RI , AMDGPU::SCC, MBB->end (), 0 ) ==
1077010772 MachineBasicBlock::LQR_Dead;
1077110773
1077210774 if (!SCCIsDead)
@@ -10775,15 +10777,16 @@ bool SIInstrInfo::invertSCCUse(MachineInstr *SCCDef) const {
1077510777 // Invert uses
1077610778 for (MachineInstr *MI : InvertInstr) {
1077710779 if (MI->getOpcode () == AMDGPU::S_CSELECT_B32 ||
10778- MI->getOpcode () == AMDGPU::S_CSELECT_B64)
10780+ MI->getOpcode () == AMDGPU::S_CSELECT_B64) {
1077910781 swapOperands (*MI);
10780- else if (MI->getOpcode () == AMDGPU::S_CBRANCH_SCC0 ||
10781- MI->getOpcode () == AMDGPU::S_CBRANCH_SCC1)
10782+ } else if (MI->getOpcode () == AMDGPU::S_CBRANCH_SCC0 ||
10783+ MI->getOpcode () == AMDGPU::S_CBRANCH_SCC1) {
1078210784 MI->setDesc (get (MI->getOpcode () == AMDGPU::S_CBRANCH_SCC0
1078310785 ? AMDGPU::S_CBRANCH_SCC1
1078410786 : AMDGPU::S_CBRANCH_SCC0));
10785- else
10787+ } else {
1078610788 llvm_unreachable (" SCC used but no inversion handling" );
10789+ }
1078710790 }
1078810791 return true ;
1078910792}
@@ -10793,7 +10796,6 @@ bool SIInstrInfo::invertSCCUse(MachineInstr *SCCDef) const {
1079310796// SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
1079410797// update kill/dead flags if necessary.
1079510798bool SIInstrInfo::optimizeSCC (MachineInstr *SCCValid, MachineInstr *SCCRedefine,
10796- const SIRegisterInfo &RI,
1079710799 bool NeedInversion) const {
1079810800 MachineInstr *KillsSCC = nullptr ;
1079910801 if (SCCValid->getParent () != SCCRedefine->getParent ())
@@ -10861,7 +10863,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1086110863 if (!setsSCCifResultIsNonZero (*Def) && !foldableSelect (*Def))
1086210864 return false ;
1086310865
10864- if (!optimizeSCC (Def, &CmpInstr, RI, NeedInversion))
10866+ if (!optimizeSCC (Def, &CmpInstr, NeedInversion))
1086510867 return false ;
1086610868
1086710869 // If s_or_b32 result, sY, is unused (i.e. it is effectively a 64-bit
@@ -10886,7 +10888,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1088610888 Def1->getOperand (1 ).getReg () == Def2->getOperand (1 ).getReg ()) {
1088710889 MachineInstr *Select = MRI->getVRegDef (Def1->getOperand (1 ).getReg ());
1088810890 if (Select && foldableSelect (*Select))
10889- optimizeSCC (Select, Def, RI, false );
10891+ optimizeSCC (Select, Def, false );
1089010892 }
1089110893 }
1089210894 }
@@ -10967,7 +10969,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1096710969 if (IsReversedCC && !MRI->hasOneNonDBGUse (DefReg))
1096810970 return false ;
1096910971
10970- if (!optimizeSCC (Def, &CmpInstr, RI, false ))
10972+ if (!optimizeSCC (Def, &CmpInstr, false ))
1097110973 return false ;
1097210974
1097310975 if (!MRI->use_nodbg_empty (DefReg)) {
0 commit comments