@@ -6103,50 +6103,11 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
61036103 Src2.setReg(RegOp2);
61046104 }
61056105
6106- <<<<<<< HEAD
6107- if (ST.isWave64()) {
6108- if (ST.hasScalarCompareEq64()) {
6109- BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
6110- .addReg(Src2.getReg())
6111- .addImm(0);
6112- } else {
6113- const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
6114- const TargetRegisterClass *SubRC =
6115- TRI->getSubRegisterClass(Src2RC, AMDGPU::sub0);
6116- MachineOperand Src2Sub0 = TII->buildExtractSubRegOrImm(
6117- MII, MRI, Src2, Src2RC, AMDGPU::sub0, SubRC);
6118- MachineOperand Src2Sub1 = TII->buildExtractSubRegOrImm(
6119- MII, MRI, Src2, Src2RC, AMDGPU::sub1, SubRC);
6120- Register Src2_32 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6121-
6122- BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_OR_B32), Src2_32)
6123- .add(Src2Sub0)
6124- .add(Src2Sub1);
6125-
6126- BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U32))
6127- .addReg(Src2_32, RegState::Kill)
6128- .addImm(0);
6129- }
6130- } else {
6131- BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U32))
6132- .addReg(Src2.getReg())
6133- .addImm(0);
6134- }
6135-
6136- // clang-format off
6137- BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg())
6138- .add(Src0)
6139- .add(Src1);
6140- // clang-format on
6141-
6142- unsigned SelOpc =
6143- ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
6144- =======
61456106 const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
61466107 unsigned WaveSize = TRI->getRegSizeInBits(*Src2RC);
61476108 assert(WaveSize == 64 || WaveSize == 32);
61486109
6149- unsigned SelOpc =
6110+ unsigned SelectOpc =
61506111 (WaveSize == 64) ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
61516112 unsigned AddcSubbOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
61526113 unsigned AddSubOpc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32;
@@ -6169,22 +6130,27 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
61696130 // dead S_CSELECT*.
61706131
61716132 bool RecalculateSCC{true};
6172- MachineInstr *Def = MRI.getVRegDef(Src2.getReg());
6173- if (Def && Def->getParent() == BB && Def->getOpcode() == SelOpc &&
6174- Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0 &&
6175- Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0) {
6176-
6177- auto I1 = std::next(MachineBasicBlock::reverse_iterator(Def));
6133+ MachineInstr *SelectDef = MRI.getVRegDef(Src2.getReg());
6134+ if (SelectDef && SelectDef->getParent() == BB &&
6135+ SelectDef->getOpcode() == SelectOpc &&
6136+ SelectDef->getOperand(1).isImm() &&
6137+ SelectDef->getOperand(1).getImm() != 0 &&
6138+ SelectDef->getOperand(2).isImm() &&
6139+ SelectDef->getOperand(2).getImm() == 0) {
6140+ auto I1 = std::next(MachineBasicBlock::reverse_iterator(SelectDef));
61786141 if (I1 != BB->rend() &&
61796142 (I1->getOpcode() == AddSubOpc || I1->getOpcode() == AddcSubbOpc)) {
6180- RecalculateSCC = false;
6181- // Ensure there are no intervening definitions of SCC.
6143+ // Ensure there are no intervening definitions of SCC between ADDs/SUBs
6144+ const unsigned SearchLimit = 6;
6145+ unsigned Count = 0;
61826146 for (auto I2 = std::next(MachineBasicBlock::reverse_iterator(MI));
6183- I2 != I1 ; I2++) {
6184- if (I2->definesRegister(AMDGPU::SCC, TRI) ) {
6185- RecalculateSCC = true ;
6147+ Count < SearchLimit ; I2++, Count ++) {
6148+ if (I2 == I1 ) {
6149+ RecalculateSCC = false ;
61866150 break;
61876151 }
6152+ if (I2->definesRegister(AMDGPU::SCC, TRI))
6153+ break;
61886154 }
61896155 }
61906156 }
@@ -6223,9 +6189,8 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
62236189 BuildMI(*BB, MII, DL, TII->get(AddcSubbOpc), Dest.getReg())
62246190 .add(Src0)
62256191 .add(Src1);
6226- >>>>>>> 0cb43743ea30 (Do not generate S_CMP if add/sub carryout is available)
62276192
6228- BuildMI(*BB, MII, DL, TII->get(SelOpc ), CarryDest.getReg())
6193+ BuildMI(*BB, MII, DL, TII->get(SelectOpc ), CarryDest.getReg())
62296194 .addImm(-1)
62306195 .addImm(0);
62316196
0 commit comments