Skip to content

Commit 272942b

Browse files
committed
Delete s_cmp sX, 0 if it is redundant
Signed-off-by: John Lu <[email protected]>
1 parent dca3d5a commit 272942b

29 files changed

+1263
-1779
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10577,6 +10577,73 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1057710577
if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue))
1057810578
return false;
1057910579

10580+
const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
10581+
this]() -> bool {
10582+
if (CmpValue != 0)
10583+
return false;
10584+
10585+
MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg);
10586+
if (!Def || Def->getParent() != CmpInstr.getParent())
10587+
return false;
10588+
10589+
if (!(Def->getOpcode() == AMDGPU::S_LSHL_B32 ||
10590+
Def->getOpcode() == AMDGPU::S_LSHL_B64 ||
10591+
Def->getOpcode() == AMDGPU::S_LSHR_B32 ||
10592+
Def->getOpcode() == AMDGPU::S_LSHR_B64 ||
10593+
Def->getOpcode() == AMDGPU::S_AND_B32 ||
10594+
Def->getOpcode() == AMDGPU::S_AND_B64 ||
10595+
Def->getOpcode() == AMDGPU::S_OR_B32 ||
10596+
Def->getOpcode() == AMDGPU::S_OR_B64 ||
10597+
Def->getOpcode() == AMDGPU::S_XOR_B32 ||
10598+
Def->getOpcode() == AMDGPU::S_XOR_B64 ||
10599+
Def->getOpcode() == AMDGPU::S_NAND_B32 ||
10600+
Def->getOpcode() == AMDGPU::S_NAND_B64 ||
10601+
Def->getOpcode() == AMDGPU::S_NOR_B32 ||
10602+
Def->getOpcode() == AMDGPU::S_NOR_B64 ||
10603+
Def->getOpcode() == AMDGPU::S_XNOR_B32 ||
10604+
Def->getOpcode() == AMDGPU::S_XNOR_B64 ||
10605+
Def->getOpcode() == AMDGPU::S_ANDN2_B32 ||
10606+
Def->getOpcode() == AMDGPU::S_ANDN2_B64 ||
10607+
Def->getOpcode() == AMDGPU::S_ORN2_B32 ||
10608+
Def->getOpcode() == AMDGPU::S_ORN2_B64 ||
10609+
Def->getOpcode() == AMDGPU::S_BFE_I32 ||
10610+
Def->getOpcode() == AMDGPU::S_BFE_I64 ||
10611+
Def->getOpcode() == AMDGPU::S_BFE_U32 ||
10612+
Def->getOpcode() == AMDGPU::S_BFE_U64 ||
10613+
Def->getOpcode() == AMDGPU::S_BCNT0_I32_B32 ||
10614+
Def->getOpcode() == AMDGPU::S_BCNT0_I32_B64 ||
10615+
Def->getOpcode() == AMDGPU::S_BCNT1_I32_B32 ||
10616+
Def->getOpcode() == AMDGPU::S_BCNT1_I32_B64 ||
10617+
Def->getOpcode() == AMDGPU::S_QUADMASK_B32 ||
10618+
Def->getOpcode() == AMDGPU::S_QUADMASK_B64 ||
10619+
Def->getOpcode() == AMDGPU::S_NOT_B32 ||
10620+
Def->getOpcode() == AMDGPU::S_NOT_B64 ||
10621+
10622+
((Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10623+
Def->getOpcode() == AMDGPU::S_CSELECT_B64) &&
10624+
Def->getOperand(1).isImm() && Def->getOperand(1).getImm() &&
10625+
!Def->getOperand(2).isImm() && !Def->getOperand(2).getImm())))
10626+
return false;
10627+
10628+
for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator();
10629+
I != E; ++I) {
10630+
if (I->modifiesRegister(AMDGPU::SCC, &RI) ||
10631+
I->killsRegister(AMDGPU::SCC, &RI))
10632+
return false;
10633+
}
10634+
10635+
if (!(Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10636+
Def->getOpcode() == AMDGPU::S_CSELECT_B64)) {
10637+
MachineOperand *SccDef =
10638+
Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
10639+
assert(SccDef && "Def instruction must define SCC");
10640+
SccDef->setIsDead(false);
10641+
}
10642+
10643+
CmpInstr.eraseFromParent();
10644+
return true;
10645+
};
10646+
1058010647
const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
1058110648
this](int64_t ExpectedValue, unsigned SrcSize,
1058210649
bool IsReversible, bool IsSigned) -> bool {
@@ -10704,15 +10771,15 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1070410771
case AMDGPU::S_CMP_LG_I32:
1070510772
case AMDGPU::S_CMPK_LG_U32:
1070610773
case AMDGPU::S_CMPK_LG_I32:
10707-
return optimizeCmpAnd(0, 32, true, false);
10774+
return optimizeCmpAnd(0, 32, true, false) || optimizeCmpSelect();
1070810775
case AMDGPU::S_CMP_GT_U32:
1070910776
case AMDGPU::S_CMPK_GT_U32:
1071010777
return optimizeCmpAnd(0, 32, false, false);
1071110778
case AMDGPU::S_CMP_GT_I32:
1071210779
case AMDGPU::S_CMPK_GT_I32:
1071310780
return optimizeCmpAnd(0, 32, false, true);
1071410781
case AMDGPU::S_CMP_LG_U64:
10715-
return optimizeCmpAnd(0, 64, true, false);
10782+
return optimizeCmpAnd(0, 64, true, false) || optimizeCmpSelect();
1071610783
}
1071710784

1071810785
return false;

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) {
140140
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
141141
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
142142
; CHECK-NEXT: s_and_b32 s0, vcc_lo, exec_lo
143-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
144143
; CHECK-NEXT: s_cbranch_scc0 .LBB9_2
145144
; CHECK-NEXT: ; %bb.1: ; %false
146145
; CHECK-NEXT: s_mov_b32 s0, 33
@@ -345,7 +344,6 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
345344
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
346345
; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
347346
; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0
348-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
349347
; CHECK-NEXT: s_cbranch_scc0 .LBB17_2
350348
; CHECK-NEXT: ; %bb.1: ; %false
351349
; CHECK-NEXT: s_mov_b32 s0, 33

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,6 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) {
143143
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
144144
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
145145
; CHECK-NEXT: s_and_b64 s[0:1], vcc, exec
146-
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
147146
; CHECK-NEXT: s_cbranch_scc0 .LBB9_2
148147
; CHECK-NEXT: ; %bb.1: ; %false
149148
; CHECK-NEXT: s_mov_b32 s0, 33
@@ -348,7 +347,6 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
348347
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0
349348
; CHECK-NEXT: v_cmp_lt_u32_e64 s[0:1], 34, v1
350349
; CHECK-NEXT: s_and_b64 s[0:1], vcc, s[0:1]
351-
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
352350
; CHECK-NEXT: s_cbranch_scc0 .LBB17_2
353351
; CHECK-NEXT: ; %bb.1: ; %false
354352
; CHECK-NEXT: s_mov_b32 s0, 33

0 commit comments

Comments
 (0)