@@ -6103,50 +6103,11 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
6103
6103
Src2.setReg(RegOp2);
6104
6104
}
6105
6105
6106
- <<<<<<< HEAD
6107
- if (ST.isWave64()) {
6108
- if (ST.hasScalarCompareEq64()) {
6109
- BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
6110
- .addReg(Src2.getReg())
6111
- .addImm(0);
6112
- } else {
6113
- const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
6114
- const TargetRegisterClass *SubRC =
6115
- TRI->getSubRegisterClass(Src2RC, AMDGPU::sub0);
6116
- MachineOperand Src2Sub0 = TII->buildExtractSubRegOrImm(
6117
- MII, MRI, Src2, Src2RC, AMDGPU::sub0, SubRC);
6118
- MachineOperand Src2Sub1 = TII->buildExtractSubRegOrImm(
6119
- MII, MRI, Src2, Src2RC, AMDGPU::sub1, SubRC);
6120
- Register Src2_32 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6121
-
6122
- BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_OR_B32), Src2_32)
6123
- .add(Src2Sub0)
6124
- .add(Src2Sub1);
6125
-
6126
- BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U32))
6127
- .addReg(Src2_32, RegState::Kill)
6128
- .addImm(0);
6129
- }
6130
- } else {
6131
- BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U32))
6132
- .addReg(Src2.getReg())
6133
- .addImm(0);
6134
- }
6135
-
6136
- // clang-format off
6137
- BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg())
6138
- .add(Src0)
6139
- .add(Src1);
6140
- // clang-format on
6141
-
6142
- unsigned SelOpc =
6143
- ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
6144
- =======
6145
6106
const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
6146
6107
unsigned WaveSize = TRI->getRegSizeInBits(*Src2RC);
6147
6108
assert(WaveSize == 64 || WaveSize == 32);
6148
6109
6149
- unsigned SelOpc =
6110
+ unsigned SelectOpc =
6150
6111
(WaveSize == 64) ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
6151
6112
unsigned AddcSubbOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
6152
6113
unsigned AddSubOpc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32;
@@ -6169,22 +6130,27 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
6169
6130
// dead S_CSELECT*.
6170
6131
6171
6132
bool RecalculateSCC{true};
6172
- MachineInstr *Def = MRI.getVRegDef(Src2.getReg());
6173
- if (Def && Def->getParent() == BB && Def->getOpcode() == SelOpc &&
6174
- Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0 &&
6175
- Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0) {
6176
-
6177
- auto I1 = std::next(MachineBasicBlock::reverse_iterator(Def));
6133
+ MachineInstr *SelectDef = MRI.getVRegDef(Src2.getReg());
6134
+ if (SelectDef && SelectDef->getParent() == BB &&
6135
+ SelectDef->getOpcode() == SelectOpc &&
6136
+ SelectDef->getOperand(1).isImm() &&
6137
+ SelectDef->getOperand(1).getImm() != 0 &&
6138
+ SelectDef->getOperand(2).isImm() &&
6139
+ SelectDef->getOperand(2).getImm() == 0) {
6140
+ auto I1 = std::next(MachineBasicBlock::reverse_iterator(SelectDef));
6178
6141
if (I1 != BB->rend() &&
6179
6142
(I1->getOpcode() == AddSubOpc || I1->getOpcode() == AddcSubbOpc)) {
6180
- RecalculateSCC = false;
6181
- // Ensure there are no intervening definitions of SCC.
6143
+ // Ensure there are no intervening definitions of SCC between ADDs/SUBs
6144
+ const unsigned SearchLimit = 6;
6145
+ unsigned Count = 0;
6182
6146
for (auto I2 = std::next(MachineBasicBlock::reverse_iterator(MI));
6183
- I2 != I1 ; I2++) {
6184
- if (I2->definesRegister(AMDGPU::SCC, TRI) ) {
6185
- RecalculateSCC = true ;
6147
+ Count < SearchLimit ; I2++, Count ++) {
6148
+ if (I2 == I1 ) {
6149
+ RecalculateSCC = false ;
6186
6150
break;
6187
6151
}
6152
+ if (I2->definesRegister(AMDGPU::SCC, TRI))
6153
+ break;
6188
6154
}
6189
6155
}
6190
6156
}
@@ -6223,9 +6189,8 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
6223
6189
BuildMI(*BB, MII, DL, TII->get(AddcSubbOpc), Dest.getReg())
6224
6190
.add(Src0)
6225
6191
.add(Src1);
6226
- >>>>>>> 0cb43743ea30 (Do not generate S_CMP if add/sub carryout is available)
6227
6192
6228
- BuildMI(*BB, MII, DL, TII->get(SelOpc ), CarryDest.getReg())
6193
+ BuildMI(*BB, MII, DL, TII->get(SelectOpc ), CarryDest.getReg())
6229
6194
.addImm(-1)
6230
6195
.addImm(0);
6231
6196
0 commit comments