Skip to content

Commit ba95310

Browse files
committed
Delete redundant s_or_b32
Signed-off-by: John Lu <[email protected]>
1 parent 80d7e59 commit ba95310

File tree

11 files changed

+1088
-1236
lines changed

11 files changed

+1088
-1236
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 50 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -10212,7 +10212,7 @@ static bool followSubRegDef(MachineInstr &MI,
1021210212
}
1021310213

1021410214
MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
10215-
MachineRegisterInfo &MRI) {
10215+
const MachineRegisterInfo &MRI) {
1021610216
assert(MRI.isSSA());
1021710217
if (!P.Reg.isVirtual())
1021810218
return nullptr;
@@ -10748,7 +10748,31 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1074810748
if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue))
1074910749
return false;
1075010750

10751-
const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
10751+
// SCC is already valid after SCCValid.
10752+
// SCCRedefine will redefine SCC to the same value already available after
10753+
// SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
10754+
// update kill/dead flags if necessary.
10755+
const auto optimizeSCC = [this](MachineInstr *SCCValid,
10756+
MachineInstr *SCCRedefine) -> bool {
10757+
MachineInstr *KillsSCC = nullptr;
10758+
for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
10759+
SCCRedefine->getIterator())) {
10760+
if (MI.modifiesRegister(AMDGPU::SCC, &RI))
10761+
return false;
10762+
if (MI.killsRegister(AMDGPU::SCC, &RI))
10763+
KillsSCC = &MI;
10764+
}
10765+
if (MachineOperand *SccDef =
10766+
SCCValid->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
10767+
SccDef->setIsDead(false);
10768+
if (KillsSCC)
10769+
KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
10770+
SCCRedefine->eraseFromParent();
10771+
10772+
return true;
10773+
};
10774+
10775+
const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
1075210776
this]() -> bool {
1075310777
if (CmpValue != 0)
1075410778
return false;
@@ -10783,25 +10807,32 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1078310807
if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(Def))
1078410808
return false;
1078510809

10786-
MachineInstr *KillsSCC = nullptr;
10787-
for (MachineInstr &MI :
10788-
make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
10789-
if (MI.modifiesRegister(AMDGPU::SCC, &RI))
10790-
return false;
10791-
if (MI.killsRegister(AMDGPU::SCC, &RI))
10792-
KillsSCC = &MI;
10793-
}
10810+
if (!optimizeSCC(Def, &CmpInstr))
10811+
return false;
1079410812

10795-
if (MachineOperand *SccDef =
10796-
Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
10797-
SccDef->setIsDead(false);
10798-
if (KillsSCC)
10799-
KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
10800-
CmpInstr.eraseFromParent();
10813+
// If s_or_32 result is unused (i.e. it is effectively a 64-bit s_cmp_lg of
10814+
// a register pair) and the input is a 64-bit foldableSelect then transform:
10815+
//
10816+
// (s_or_b32 (S_CSELECT_B64 (non-zero imm), 0), 0 => (S_CSELECT_B64
10817+
// (non-zero
10818+
// imm), 0)
10819+
if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
10820+
MRI->use_nodbg_empty(Def->getOperand(0).getReg())) {
10821+
MachineOperand OrOpnd1 = Def->getOperand(1);
10822+
MachineOperand OrOpnd2 = Def->getOperand(2);
10823+
10824+
if (OrOpnd1.isReg() && OrOpnd2.isReg() &&
10825+
OrOpnd1.getReg() != OrOpnd2.getReg()) {
10826+
auto *Def1 = getVRegSubRegDef(getRegSubRegPair(OrOpnd1), *MRI);
10827+
auto *Def2 = getVRegSubRegDef(getRegSubRegPair(OrOpnd2), *MRI);
10828+
if (Def1 == Def2 && foldableSelect(Def1))
10829+
optimizeSCC(Def1, Def);
10830+
}
10831+
}
1080110832
return true;
1080210833
};
1080310834

10804-
const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
10835+
const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, optimizeSCC,
1080510836
this](int64_t ExpectedValue, unsigned SrcSize,
1080610837
bool IsReversible, bool IsSigned) -> bool {
1080710838
// s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
@@ -10875,21 +10906,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1087510906
if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
1087610907
return false;
1087710908

10878-
MachineInstr *KillsSCC = nullptr;
10879-
for (MachineInstr &MI :
10880-
make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
10881-
if (MI.modifiesRegister(AMDGPU::SCC, &RI))
10882-
return false;
10883-
if (MI.killsRegister(AMDGPU::SCC, &RI))
10884-
KillsSCC = &MI;
10885-
}
10886-
10887-
MachineOperand *SccDef =
10888-
Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
10889-
SccDef->setIsDead(false);
10890-
if (KillsSCC)
10891-
KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
10892-
CmpInstr.eraseFromParent();
10909+
if (!optimizeSCC(Def, &CmpInstr))
10910+
return false;
1089310911

1089410912
if (!MRI->use_nodbg_empty(DefReg)) {
1089510913
assert(!IsReversedCC);

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1704,7 +1704,7 @@ TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
17041704
/// skipping copy like instructions and subreg-manipulation pseudos.
17051705
/// Following another subreg of a reg:subreg isn't supported.
17061706
MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
1707-
MachineRegisterInfo &MRI);
1707+
const MachineRegisterInfo &MRI);
17081708

17091709
/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
17101710
/// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not

0 commit comments

Comments
 (0)