Skip to content

Commit d4c5b8f

Browse files
rampitecmemfrob
authored andcommitted
[AMDGPU] Use S_BITCMP0_* to replace AND in optimizeCompareInstr
These can be used for reversed conditions if result of the AND is unused except in the compare: s_cmp_eq_u32 (s_and_b32 $src, 1), 0 => s_bitcmp0_b32 $src, 0 s_cmp_eq_i32 (s_and_b32 $src, 1), 0 => s_bitcmp0_b32 $src, 0 s_cmp_eq_u64 (s_and_b64 $src, 1), 0 => s_bitcmp0_b64 $src, 0 s_cmp_lg_u32 (s_and_b32 $src, 1), 1 => s_bitcmp0_b32 $src, 0 s_cmp_lg_i32 (s_and_b32 $src, 1), 1 => s_bitcmp0_b32 $src, 0 s_cmp_lg_u64 (s_and_b64 $src, 1), 1 => s_bitcmp0_b64 $src, 0 Differential Revision: https://reviews.llvm.org/D109099
1 parent 70a0713 commit d4c5b8f

File tree

2 files changed

+336
-20
lines changed

2 files changed

+336
-20
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8009,7 +8009,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
80098009

80108010
const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
80118011
this](int64_t ExpectedValue,
8012-
unsigned SrcSize) -> bool {
8012+
unsigned SrcSize,
8013+
bool IsReversable) -> bool {
80138014
// s_cmp_eq_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
80148015
// s_cmp_eq_i32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
80158016
// s_cmp_ge_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
@@ -8023,9 +8024,22 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
80238024
//
80248025
// If result of the AND is unused except in the compare:
80258026
// s_and_b(32|64) $src, 1 => s_bitcmp1_b(32|64) $src, 0
8026-
8027-
if (CmpValue != ExpectedValue)
8028-
return false;
8027+
//
8028+
// s_cmp_eq_u32 (s_and_b32 $src, 1), 0 => s_bitcmp0_b32 $src, 0
8029+
// s_cmp_eq_i32 (s_and_b32 $src, 1), 0 => s_bitcmp0_b32 $src, 0
8030+
// s_cmp_eq_u64 (s_and_b64 $src, 1), 0 => s_bitcmp0_b64 $src, 0
8031+
// s_cmp_lg_u32 (s_and_b32 $src, 1), 1 => s_bitcmp0_b32 $src, 0
8032+
// s_cmp_lg_i32 (s_and_b32 $src, 1), 1 => s_bitcmp0_b32 $src, 0
8033+
// s_cmp_lg_u64 (s_and_b64 $src, 1), 1 => s_bitcmp0_b64 $src, 0
8034+
8035+
bool IsReversedCC = false;
8036+
if (CmpValue != ExpectedValue) {
8037+
if (!IsReversable)
8038+
return false;
8039+
IsReversedCC = CmpValue == (ExpectedValue ^ 1);
8040+
if (!IsReversedCC)
8041+
return false;
8042+
}
80298043

80308044
MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg);
80318045
if (!Def || Def->getParent() != CmpInstr.getParent())
@@ -8041,6 +8055,10 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
80418055
else if (!Def->getOperand(2).isImm() || Def->getOperand(2).getImm() != 1)
80428056
return false;
80438057

8058+
Register DefReg = Def->getOperand(0).getReg();
8059+
if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
8060+
return false;
8061+
80448062
for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator();
80458063
I != E; ++I) {
80468064
if (I->modifiesRegister(AMDGPU::SCC, &RI) ||
@@ -8052,17 +8070,20 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
80528070
SccDef->setIsDead(false);
80538071
CmpInstr.eraseFromParent();
80548072

8055-
if (!MRI->use_nodbg_empty(Def->getOperand(0).getReg()))
8073+
if (!MRI->use_nodbg_empty(DefReg)) {
8074+
assert(!IsReversedCC);
80568075
return true;
8076+
}
80578077

80588078
// Replace AND with unused result with a S_BITCMP.
80598079
// TODO: If s_bitcmp can be used we are not limited to 1 and 0 but can
80608080
// process any power of 2.
80618081
MachineBasicBlock *MBB = Def->getParent();
80628082

8063-
// TODO: Reverse conditions can use S_BITCMP0_*.
8064-
unsigned NewOpc = (SrcSize == 32) ? AMDGPU::S_BITCMP1_B32
8065-
: AMDGPU::S_BITCMP1_B64;
8083+
unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
8084+
: AMDGPU::S_BITCMP1_B32
8085+
: IsReversedCC ? AMDGPU::S_BITCMP0_B64
8086+
: AMDGPU::S_BITCMP1_B64;
80668087

80678088
BuildMI(*MBB, Def, Def->getDebugLoc(), get(NewOpc))
80688089
.add(*SrcOp)
@@ -8077,26 +8098,28 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
80778098
break;
80788099
case AMDGPU::S_CMP_EQ_U32:
80798100
case AMDGPU::S_CMP_EQ_I32:
8080-
case AMDGPU::S_CMP_GE_U32:
8081-
case AMDGPU::S_CMP_GE_I32:
80828101
case AMDGPU::S_CMPK_EQ_U32:
80838102
case AMDGPU::S_CMPK_EQ_I32:
8103+
return optimizeCmpAnd(1, 32, true);
8104+
case AMDGPU::S_CMP_GE_U32:
8105+
case AMDGPU::S_CMP_GE_I32:
80848106
case AMDGPU::S_CMPK_GE_U32:
80858107
case AMDGPU::S_CMPK_GE_I32:
8086-
return optimizeCmpAnd(1, 32);
8108+
return optimizeCmpAnd(1, 32, false);
80878109
case AMDGPU::S_CMP_EQ_U64:
8088-
return optimizeCmpAnd(1, 64);
8110+
return optimizeCmpAnd(1, 64, true);
80898111
case AMDGPU::S_CMP_LG_U32:
80908112
case AMDGPU::S_CMP_LG_I32:
8091-
case AMDGPU::S_CMP_GT_U32:
8092-
case AMDGPU::S_CMP_GT_I32:
80938113
case AMDGPU::S_CMPK_LG_U32:
80948114
case AMDGPU::S_CMPK_LG_I32:
8115+
return optimizeCmpAnd(0, 32, true);
8116+
case AMDGPU::S_CMP_GT_U32:
8117+
case AMDGPU::S_CMP_GT_I32:
80958118
case AMDGPU::S_CMPK_GT_U32:
80968119
case AMDGPU::S_CMPK_GT_I32:
8097-
return optimizeCmpAnd(0, 32);
8120+
return optimizeCmpAnd(0, 32, false);
80988121
case AMDGPU::S_CMP_LG_U64:
8099-
return optimizeCmpAnd(0, 64);
8122+
return optimizeCmpAnd(0, 64, true);
81008123
}
81018124

81028125
return false;

0 commit comments

Comments
 (0)