Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 41 additions & 43 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10618,6 +10618,42 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
return false;
}

// SCC is already valid after SCCValid.
// SCCRedefine will redefine SCC to the same value already available after
// SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
// update kill/dead flags if necessary.
static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
const SIRegisterInfo &RI) {
MachineInstr *KillsSCC = nullptr;
for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
SCCRedefine->getIterator())) {
if (MI.modifiesRegister(AMDGPU::SCC, &RI))
return false;
if (MI.killsRegister(AMDGPU::SCC, &RI))
KillsSCC = &MI;
}
if (MachineOperand *SccDef =
SCCValid->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
SccDef->setIsDead(false);
if (KillsSCC)
KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
SCCRedefine->eraseFromParent();
return true;
}

static bool foldableSelect(const MachineInstr &Def) {
if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
Def.getOpcode() != AMDGPU::S_CSELECT_B64)
return false;
bool Op1IsNonZeroImm =
Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
bool Op2IsZeroImm =
Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
if (!Op1IsNonZeroImm || !Op2IsZeroImm)
return false;
return true;
}

bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
Register SrcReg2, int64_t CmpMask,
int64_t CmpValue,
Expand All @@ -10637,19 +10673,6 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (!Def || Def->getParent() != CmpInstr.getParent())
return false;

const auto foldableSelect = [](MachineInstr *Def) -> bool {
if (Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
Def->getOpcode() == AMDGPU::S_CSELECT_B64) {
bool Op1IsNonZeroImm =
Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0;
bool Op2IsZeroImm =
Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0;
if (Op1IsNonZeroImm && Op2IsZeroImm)
return true;
}
return false;
};

// For S_OP that set SCC = DST!=0, do the transformation
//
// s_cmp_lg_* (S_OP ...), 0 => (S_OP ...)
Expand All @@ -10660,24 +10683,12 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
//
// s_cmp_lg_* (S_CSELECT* (non-zero imm), 0), 0 => (S_CSELECT* (non-zero
// imm), 0)
if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(Def))
if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(*Def))
return false;

MachineInstr *KillsSCC = nullptr;
for (MachineInstr &MI :
make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
if (MI.modifiesRegister(AMDGPU::SCC, &RI))
return false;
if (MI.killsRegister(AMDGPU::SCC, &RI))
KillsSCC = &MI;
}
if (!optimizeSCC(Def, &CmpInstr, RI))
return false;

if (MachineOperand *SccDef =
Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
SccDef->setIsDead(false);
if (KillsSCC)
KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
CmpInstr.eraseFromParent();
return true;
};

Expand Down Expand Up @@ -10755,21 +10766,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
return false;

MachineInstr *KillsSCC = nullptr;
for (MachineInstr &MI :
make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
if (MI.modifiesRegister(AMDGPU::SCC, &RI))
return false;
if (MI.killsRegister(AMDGPU::SCC, &RI))
KillsSCC = &MI;
}

MachineOperand *SccDef =
Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
SccDef->setIsDead(false);
if (KillsSCC)
KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
CmpInstr.eraseFromParent();
if (!optimizeSCC(Def, &CmpInstr, RI))
return false;

if (!MRI->use_nodbg_empty(DefReg)) {
assert(!IsReversedCC);
Expand Down