Skip to content

Commit dc14a4f

Browse files
committed
Streamline code and handle more opcodes
Signed-off-by: John Lu <[email protected]>
1 parent 272942b commit dc14a4f

File tree

2 files changed

+33
-46
lines changed

2 files changed

+33
-46
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 33 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -10586,43 +10586,38 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1058610586
if (!Def || Def->getParent() != CmpInstr.getParent())
1058710587
return false;
1058810588

10589-
if (!(Def->getOpcode() == AMDGPU::S_LSHL_B32 ||
10590-
Def->getOpcode() == AMDGPU::S_LSHL_B64 ||
10591-
Def->getOpcode() == AMDGPU::S_LSHR_B32 ||
10592-
Def->getOpcode() == AMDGPU::S_LSHR_B64 ||
10593-
Def->getOpcode() == AMDGPU::S_AND_B32 ||
10594-
Def->getOpcode() == AMDGPU::S_AND_B64 ||
10595-
Def->getOpcode() == AMDGPU::S_OR_B32 ||
10596-
Def->getOpcode() == AMDGPU::S_OR_B64 ||
10597-
Def->getOpcode() == AMDGPU::S_XOR_B32 ||
10598-
Def->getOpcode() == AMDGPU::S_XOR_B64 ||
10599-
Def->getOpcode() == AMDGPU::S_NAND_B32 ||
10600-
Def->getOpcode() == AMDGPU::S_NAND_B64 ||
10601-
Def->getOpcode() == AMDGPU::S_NOR_B32 ||
10602-
Def->getOpcode() == AMDGPU::S_NOR_B64 ||
10603-
Def->getOpcode() == AMDGPU::S_XNOR_B32 ||
10604-
Def->getOpcode() == AMDGPU::S_XNOR_B64 ||
10605-
Def->getOpcode() == AMDGPU::S_ANDN2_B32 ||
10606-
Def->getOpcode() == AMDGPU::S_ANDN2_B64 ||
10607-
Def->getOpcode() == AMDGPU::S_ORN2_B32 ||
10608-
Def->getOpcode() == AMDGPU::S_ORN2_B64 ||
10609-
Def->getOpcode() == AMDGPU::S_BFE_I32 ||
10610-
Def->getOpcode() == AMDGPU::S_BFE_I64 ||
10611-
Def->getOpcode() == AMDGPU::S_BFE_U32 ||
10612-
Def->getOpcode() == AMDGPU::S_BFE_U64 ||
10613-
Def->getOpcode() == AMDGPU::S_BCNT0_I32_B32 ||
10614-
Def->getOpcode() == AMDGPU::S_BCNT0_I32_B64 ||
10615-
Def->getOpcode() == AMDGPU::S_BCNT1_I32_B32 ||
10616-
Def->getOpcode() == AMDGPU::S_BCNT1_I32_B64 ||
10617-
Def->getOpcode() == AMDGPU::S_QUADMASK_B32 ||
10618-
Def->getOpcode() == AMDGPU::S_QUADMASK_B64 ||
10619-
Def->getOpcode() == AMDGPU::S_NOT_B32 ||
10620-
Def->getOpcode() == AMDGPU::S_NOT_B64 ||
10621-
10622-
((Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10623-
Def->getOpcode() == AMDGPU::S_CSELECT_B64) &&
10624-
Def->getOperand(1).isImm() && Def->getOperand(1).getImm() &&
10625-
!Def->getOperand(2).isImm() && !Def->getOperand(2).getImm())))
10589+
bool CanOptimize = false;
10590+
MachineOperand *SccDef =
10591+
Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
10592+
10593+
// For S_OP that set SCC = DST!=0, do the transformation
10594+
//
10595+
// s_cmp_lg_* (S_OP ...), 0 => (S_OP ...)
10596+
if (SccDef && Def->getOpcode() != AMDGPU::S_ADD_I32 &&
10597+
Def->getOpcode() != AMDGPU::S_ADD_U32 &&
10598+
Def->getOpcode() != AMDGPU::S_ADDC_U32 &&
10599+
Def->getOpcode() != AMDGPU::S_SUB_I32 &&
10600+
Def->getOpcode() != AMDGPU::S_SUB_U32 &&
10601+
Def->getOpcode() != AMDGPU::S_SUBB_U32 &&
10602+
Def->getOpcode() != AMDGPU::S_MIN_I32 &&
10603+
Def->getOpcode() != AMDGPU::S_MIN_U32 &&
10604+
Def->getOpcode() != AMDGPU::S_MAX_I32 &&
10605+
Def->getOpcode() != AMDGPU::S_MAX_U32 &&
10606+
Def->getOpcode() != AMDGPU::S_ADDK_I32)
10607+
CanOptimize = true;
10608+
10609+
// s_cmp_lg_* is redundant because the SCC input value for S_CSELECT* has
10610+
// the same value that will be calculated by s_cmp_lg_*
10611+
//
10612+
// s_cmp_lg_* (S_CSELECT* (non-zero imm), 0), 0 => (S_CSELECT* (non-zero
10613+
// imm), 0)
10614+
if ((Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10615+
Def->getOpcode() == AMDGPU::S_CSELECT_B64) &&
10616+
Def->getOperand(1).isImm() && Def->getOperand(1).getImm() &&
10617+
!Def->getOperand(2).isImm() && !Def->getOperand(2).getImm())
10618+
CanOptimize = true;
10619+
10620+
if (!CanOptimize)
1062610621
return false;
1062710622

1062810623
for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator();
@@ -10632,13 +10627,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1063210627
return false;
1063310628
}
1063410629

10635-
if (!(Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10636-
Def->getOpcode() == AMDGPU::S_CSELECT_B64)) {
10637-
MachineOperand *SccDef =
10638-
Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
10639-
assert(SccDef && "Def instruction must define SCC");
10630+
if (SccDef)
1064010631
SccDef->setIsDead(false);
10641-
}
1064210632

1064310633
CmpInstr.eraseFromParent();
1064410634
return true;

llvm/test/CodeGen/AMDGPU/s_cmp_0.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ define amdgpu_ps i32 @ashr32(i32 inreg %val0, i32 inreg %val1) {
6666
; CHECK-LABEL: ashr32:
6767
; CHECK: ; %bb.0:
6868
; CHECK-NEXT: s_ashr_i32 s0, s0, s1
69-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
7069
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
7170
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
7271
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
@@ -81,7 +80,6 @@ define amdgpu_ps i32 @ashr64(i64 inreg %val0, i64 inreg %val1) {
8180
; CHECK-LABEL: ashr64:
8281
; CHECK: ; %bb.0:
8382
; CHECK-NEXT: s_ashr_i64 s[0:1], s[0:1], s2
84-
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
8583
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
8684
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
8785
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
@@ -96,7 +94,6 @@ define amdgpu_ps i32 @abs32(i32 inreg %val0, ptr addrspace(1) %ptr) {
9694
; CHECK-LABEL: abs32:
9795
; CHECK: ; %bb.0:
9896
; CHECK-NEXT: s_abs_i32 s0, s0
99-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
10097
; CHECK-NEXT: v_mov_b32_e32 v2, s0
10198
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
10299
; CHECK-NEXT: global_store_dword v[0:1], v2, off

0 commit comments

Comments
 (0)