Skip to content

Commit 054964c

Browse files
committed
Delete unnecessary S_CMP in separate PR
Signed-off-by: John Lu <[email protected]>
1 parent d88ffc5 commit 054964c

File tree

1 file changed

+21
-68
lines changed

1 file changed

+21
-68
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 21 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -6110,81 +6110,34 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
61106110
unsigned SelectOpc =
61116111
(WaveSize == 64) ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
61126112
unsigned AddcSubbOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
6113-
unsigned AddSubOpc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32;
6114-
// Lowering for:
6115-
//
6116-
// S_UADDO_PSEUDO|S_ADD_CO_PSEUDO
6117-
// <no SCC def code>
6118-
// S_ADD_CO_PSEUDO
6119-
//
6120-
// produces:
6121-
//
6122-
// S_ADD_I32|S_ADDC_U32 ; lowered from S_UADDO_PSEUDO
6123-
// SREG = S_CSELECT_B32|64 [1,-1], 0 ; lowered from S_UADDO_PSEUDO
6124-
// <no SCC def code>
6125-
// S_CMP32|64 SREG, 0 ; lowered from S_ADD_CO_PSEUDO
6126-
// S_ADDC_U32 ; lowered from S_ADD_CO_PSEUDO
6127-
//
6128-
// At this point before generating the S_CMP check if it is redundant. If
6129-
// so do not recalculate it. Subsequent optimizations will also delete the
6130-
// dead S_CSELECT*.
6131-
6132-
bool RecalculateSCC{true};
6133-
MachineInstr *SelectDef = MRI.getVRegDef(Src2.getReg());
6134-
if (SelectDef && SelectDef->getParent() == BB &&
6135-
SelectDef->getOpcode() == SelectOpc &&
6136-
SelectDef->getOperand(1).isImm() &&
6137-
SelectDef->getOperand(1).getImm() != 0 &&
6138-
SelectDef->getOperand(2).isImm() &&
6139-
SelectDef->getOperand(2).getImm() == 0) {
6140-
auto I1 = std::next(MachineBasicBlock::reverse_iterator(SelectDef));
6141-
if (I1 != BB->rend() &&
6142-
(I1->getOpcode() == AddSubOpc || I1->getOpcode() == AddcSubbOpc)) {
6143-
// Ensure there are no intervening definitions of SCC between ADDs/SUBs
6144-
const unsigned SearchLimit = 6;
6145-
unsigned Count = 0;
6146-
for (auto I2 = std::next(MachineBasicBlock::reverse_iterator(MI));
6147-
Count < SearchLimit; I2++, Count++) {
6148-
if (I2 == I1) {
6149-
RecalculateSCC = false;
6150-
break;
6151-
}
6152-
if (I2->definesRegister(AMDGPU::SCC, TRI))
6153-
break;
6154-
}
6155-
}
6156-
}
61576113

6158-
if (RecalculateSCC) {
6159-
if (WaveSize == 64) {
6160-
if (ST.hasScalarCompareEq64()) {
6161-
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
6162-
.addReg(Src2.getReg())
6163-
.addImm(0);
6164-
} else {
6165-
const TargetRegisterClass *SubRC =
6166-
TRI->getSubRegisterClass(Src2RC, AMDGPU::sub0);
6167-
MachineOperand Src2Sub0 = TII->buildExtractSubRegOrImm(
6168-
MII, MRI, Src2, Src2RC, AMDGPU::sub0, SubRC);
6169-
MachineOperand Src2Sub1 = TII->buildExtractSubRegOrImm(
6170-
MII, MRI, Src2, Src2RC, AMDGPU::sub1, SubRC);
6171-
Register Src2_32 =
6172-
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6173-
6174-
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_OR_B32), Src2_32)
6175-
.add(Src2Sub0)
6176-
.add(Src2Sub1);
6114+
if (WaveSize == 64) {
6115+
if (ST.hasScalarCompareEq64()) {
6116+
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
6117+
.addReg(Src2.getReg())
6118+
.addImm(0);
6119+
} else {
6120+
const TargetRegisterClass *SubRC =
6121+
TRI->getSubRegisterClass(Src2RC, AMDGPU::sub0);
6122+
MachineOperand Src2Sub0 = TII->buildExtractSubRegOrImm(
6123+
MII, MRI, Src2, Src2RC, AMDGPU::sub0, SubRC);
6124+
MachineOperand Src2Sub1 = TII->buildExtractSubRegOrImm(
6125+
MII, MRI, Src2, Src2RC, AMDGPU::sub1, SubRC);
6126+
Register Src2_32 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6127+
6128+
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_OR_B32), Src2_32)
6129+
.add(Src2Sub0)
6130+
.add(Src2Sub1);
61776131

6178-
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U32))
6179-
.addReg(Src2_32, RegState::Kill)
6180-
.addImm(0);
6181-
}
6132+
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U32))
6133+
.addReg(Src2_32, RegState::Kill)
6134+
.addImm(0);
6135+
}
61826136
} else {
61836137
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U32))
61846138
.addReg(Src2.getReg())
61856139
.addImm(0);
61866140
}
6187-
}
61886141

61896142
BuildMI(*BB, MII, DL, TII->get(AddcSubbOpc), Dest.getReg())
61906143
.add(Src0)

0 commit comments

Comments
 (0)