@@ -10715,16 +10715,18 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
1071510715 return false ;
1071610716}
1071710717
10718+ // Invert all uses of SCC following SCCDef because SCCDef may be deleted and
10719+ // (incoming SCC) = !(SCC defined by SCCDef).
10720+ // Return true if all uses can be re-written, false otherwise.
1071810721bool SIInstrInfo::invertSCCUse (MachineInstr *SCCDef) const {
1071910722 MachineBasicBlock *MBB = SCCDef->getParent ();
10720- const SIRegisterInfo *TRI = ST.getRegisterInfo ();
10721- SmallVector<MachineInstr *, 2 > InvertInstr;
10723+ SmallVector<MachineInstr *> InvertInstr;
1072210724 bool SCCIsDead = false ;
1072310725
1072410726 // Scan instructions for SCC uses that need to be inverted until SCC is dead.
1072510727 for (MachineInstr &MI :
1072610728 make_range (std::next (MachineBasicBlock::iterator (SCCDef)), MBB->end ())) {
10727- if (MI.readsRegister (AMDGPU::SCC, TRI )) {
10729+ if (MI.readsRegister (AMDGPU::SCC, &RI )) {
1072810730 if (MI.getOpcode () == AMDGPU::S_CSELECT_B32 ||
1072910731 MI.getOpcode () == AMDGPU::S_CSELECT_B64 ||
1073010732 MI.getOpcode () == AMDGPU::S_CBRANCH_SCC0 ||
@@ -10733,18 +10735,18 @@ bool SIInstrInfo::invertSCCUse(MachineInstr *SCCDef) const {
1073310735 else
1073410736 return false ;
1073510737 }
10736- if (MI.definesRegister (AMDGPU::SCC, TRI)) {
10738+ if (MI.definesRegister (AMDGPU::SCC, &RI) ||
10739+ MI.killsRegister (AMDGPU::SCC, &RI)) {
1073710740 SCCIsDead = true ;
1073810741 break ;
1073910742 }
1074010743 }
1074110744
10742- const MachineRegisterInfo &MRI =
10743- SCCDef->getParent ()->getParent ()->getRegInfo ();
10745+ const MachineRegisterInfo &MRI = SCCDef->getMF ()->getRegInfo ();
1074410746 // If SCC is still live, verify that it is not live past the end of this
1074510747 // block.
1074610748 if (!SCCIsDead && MRI.tracksLiveness ())
10747- SCCIsDead = MBB->computeRegisterLiveness (TRI , AMDGPU::SCC, MBB->end (), 0 ) ==
10749+ SCCIsDead = MBB->computeRegisterLiveness (&RI , AMDGPU::SCC, MBB->end (), 0 ) ==
1074810750 MachineBasicBlock::LQR_Dead;
1074910751
1075010752 if (!SCCIsDead)
@@ -10753,15 +10755,16 @@ bool SIInstrInfo::invertSCCUse(MachineInstr *SCCDef) const {
1075310755 // Invert uses
1075410756 for (MachineInstr *MI : InvertInstr) {
1075510757 if (MI->getOpcode () == AMDGPU::S_CSELECT_B32 ||
10756- MI->getOpcode () == AMDGPU::S_CSELECT_B64)
10758+ MI->getOpcode () == AMDGPU::S_CSELECT_B64) {
1075710759 swapOperands (*MI);
10758- else if (MI->getOpcode () == AMDGPU::S_CBRANCH_SCC0 ||
10759- MI->getOpcode () == AMDGPU::S_CBRANCH_SCC1)
10760+ } else if (MI->getOpcode () == AMDGPU::S_CBRANCH_SCC0 ||
10761+ MI->getOpcode () == AMDGPU::S_CBRANCH_SCC1) {
1076010762 MI->setDesc (get (MI->getOpcode () == AMDGPU::S_CBRANCH_SCC0
1076110763 ? AMDGPU::S_CBRANCH_SCC1
1076210764 : AMDGPU::S_CBRANCH_SCC0));
10763- else
10765+ } else {
1076410766 llvm_unreachable (" SCC used but no inversion handling" );
10767+ }
1076510768 }
1076610769 return true ;
1076710770}
@@ -10771,7 +10774,6 @@ bool SIInstrInfo::invertSCCUse(MachineInstr *SCCDef) const {
1077110774// SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
1077210775// update kill/dead flags if necessary.
1077310776bool SIInstrInfo::optimizeSCC (MachineInstr *SCCValid, MachineInstr *SCCRedefine,
10774- const SIRegisterInfo &RI,
1077510777 bool NeedInversion) const {
1077610778 MachineInstr *KillsSCC = nullptr ;
1077710779 if (SCCValid->getParent () != SCCRedefine->getParent ())
@@ -10839,7 +10841,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1083910841 if (!setsSCCifResultIsNonZero (*Def) && !foldableSelect (*Def))
1084010842 return false ;
1084110843
10842- if (!optimizeSCC (Def, &CmpInstr, RI, NeedInversion))
10844+ if (!optimizeSCC (Def, &CmpInstr, NeedInversion))
1084310845 return false ;
1084410846
1084510847 // If s_or_b32 result, sY, is unused (i.e. it is effectively a 64-bit
@@ -10864,7 +10866,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1086410866 Def1->getOperand (1 ).getReg () == Def2->getOperand (1 ).getReg ()) {
1086510867 MachineInstr *Select = MRI->getVRegDef (Def1->getOperand (1 ).getReg ());
1086610868 if (Select && foldableSelect (*Select))
10867- optimizeSCC (Select, Def, RI, false );
10869+ optimizeSCC (Select, Def, false );
1086810870 }
1086910871 }
1087010872 }
@@ -10945,7 +10947,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1094510947 if (IsReversedCC && !MRI->hasOneNonDBGUse (DefReg))
1094610948 return false ;
1094710949
10948- if (!optimizeSCC (Def, &CmpInstr, RI, false ))
10950+ if (!optimizeSCC (Def, &CmpInstr, false ))
1094910951 return false ;
1095010952
1095110953 if (!MRI->use_nodbg_empty (DefReg)) {
0 commit comments