Skip to content

Commit d88ffc5

Browse files
committed
Limit search. Make code clearer.
Signed-off-by: John Lu <[email protected]>
1 parent 7c30e38 commit d88ffc5

File tree

1 file changed

+18
-53
lines changed

1 file changed

+18
-53
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 18 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -6103,50 +6103,11 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
61036103
Src2.setReg(RegOp2);
61046104
}
61056105

6106-
<<<<<<< HEAD
6107-
if (ST.isWave64()) {
6108-
if (ST.hasScalarCompareEq64()) {
6109-
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
6110-
.addReg(Src2.getReg())
6111-
.addImm(0);
6112-
} else {
6113-
const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
6114-
const TargetRegisterClass *SubRC =
6115-
TRI->getSubRegisterClass(Src2RC, AMDGPU::sub0);
6116-
MachineOperand Src2Sub0 = TII->buildExtractSubRegOrImm(
6117-
MII, MRI, Src2, Src2RC, AMDGPU::sub0, SubRC);
6118-
MachineOperand Src2Sub1 = TII->buildExtractSubRegOrImm(
6119-
MII, MRI, Src2, Src2RC, AMDGPU::sub1, SubRC);
6120-
Register Src2_32 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6121-
6122-
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_OR_B32), Src2_32)
6123-
.add(Src2Sub0)
6124-
.add(Src2Sub1);
6125-
6126-
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U32))
6127-
.addReg(Src2_32, RegState::Kill)
6128-
.addImm(0);
6129-
}
6130-
} else {
6131-
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U32))
6132-
.addReg(Src2.getReg())
6133-
.addImm(0);
6134-
}
6135-
6136-
// clang-format off
6137-
BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg())
6138-
.add(Src0)
6139-
.add(Src1);
6140-
// clang-format on
6141-
6142-
unsigned SelOpc =
6143-
ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
6144-
=======
61456106
const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
61466107
unsigned WaveSize = TRI->getRegSizeInBits(*Src2RC);
61476108
assert(WaveSize == 64 || WaveSize == 32);
61486109

6149-
unsigned SelOpc =
6110+
unsigned SelectOpc =
61506111
(WaveSize == 64) ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
61516112
unsigned AddcSubbOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
61526113
unsigned AddSubOpc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32;
@@ -6169,22 +6130,27 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
61696130
// dead S_CSELECT*.
61706131

61716132
bool RecalculateSCC{true};
6172-
MachineInstr *Def = MRI.getVRegDef(Src2.getReg());
6173-
if (Def && Def->getParent() == BB && Def->getOpcode() == SelOpc &&
6174-
Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0 &&
6175-
Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0) {
6176-
6177-
auto I1 = std::next(MachineBasicBlock::reverse_iterator(Def));
6133+
MachineInstr *SelectDef = MRI.getVRegDef(Src2.getReg());
6134+
if (SelectDef && SelectDef->getParent() == BB &&
6135+
SelectDef->getOpcode() == SelectOpc &&
6136+
SelectDef->getOperand(1).isImm() &&
6137+
SelectDef->getOperand(1).getImm() != 0 &&
6138+
SelectDef->getOperand(2).isImm() &&
6139+
SelectDef->getOperand(2).getImm() == 0) {
6140+
auto I1 = std::next(MachineBasicBlock::reverse_iterator(SelectDef));
61786141
if (I1 != BB->rend() &&
61796142
(I1->getOpcode() == AddSubOpc || I1->getOpcode() == AddcSubbOpc)) {
6180-
RecalculateSCC = false;
6181-
// Ensure there are no intervening definitions of SCC.
6143+
// Ensure there are no intervening definitions of SCC between ADDs/SUBs
6144+
const unsigned SearchLimit = 6;
6145+
unsigned Count = 0;
61826146
for (auto I2 = std::next(MachineBasicBlock::reverse_iterator(MI));
6183-
I2 != I1; I2++) {
6184-
if (I2->definesRegister(AMDGPU::SCC, TRI)) {
6185-
RecalculateSCC = true;
6147+
Count < SearchLimit; I2++, Count++) {
6148+
if (I2 == I1) {
6149+
RecalculateSCC = false;
61866150
break;
61876151
}
6152+
if (I2->definesRegister(AMDGPU::SCC, TRI))
6153+
break;
61886154
}
61896155
}
61906156
}
@@ -6223,9 +6189,8 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
62236189
BuildMI(*BB, MII, DL, TII->get(AddcSubbOpc), Dest.getReg())
62246190
.add(Src0)
62256191
.add(Src1);
6226-
>>>>>>> 0cb43743ea30 (Do not generate S_CMP if add/sub carryout is available)
62276192

6228-
BuildMI(*BB, MII, DL, TII->get(SelOpc), CarryDest.getReg())
6193+
BuildMI(*BB, MII, DL, TII->get(SelectOpc), CarryDest.getReg())
62296194
.addImm(-1)
62306195
.addImm(0);
62316196

0 commit comments

Comments
 (0)