Skip to content

Commit 7ed2f1b

Browse files
authored
[AMDGPU][NFC] Refactor SCC optimization (#165871)
Refactor SCC optimization --------- Signed-off-by: John Lu <[email protected]>
1 parent a943132 commit 7ed2f1b

File tree

1 file changed

+41
-43
lines changed

1 file changed

+41
-43
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 41 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -10618,6 +10618,42 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
1061810618
return false;
1061910619
}
1062010620

10621+
// SCC is already valid after SCCValid.
10622+
// SCCRedefine will redefine SCC to the same value already available after
10623+
// SCCValid. If there are no intervening SCC conflicts delete SCCRedefine and
10624+
// update kill/dead flags if necessary.
10625+
static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
10626+
const SIRegisterInfo &RI) {
10627+
MachineInstr *KillsSCC = nullptr;
10628+
for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
10629+
SCCRedefine->getIterator())) {
10630+
if (MI.modifiesRegister(AMDGPU::SCC, &RI))
10631+
return false;
10632+
if (MI.killsRegister(AMDGPU::SCC, &RI))
10633+
KillsSCC = &MI;
10634+
}
10635+
if (MachineOperand *SccDef =
10636+
SCCValid->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
10637+
SccDef->setIsDead(false);
10638+
if (KillsSCC)
10639+
KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
10640+
SCCRedefine->eraseFromParent();
10641+
return true;
10642+
}
10643+
10644+
static bool foldableSelect(const MachineInstr &Def) {
10645+
if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
10646+
Def.getOpcode() != AMDGPU::S_CSELECT_B64)
10647+
return false;
10648+
bool Op1IsNonZeroImm =
10649+
Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
10650+
bool Op2IsZeroImm =
10651+
Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
10652+
if (!Op1IsNonZeroImm || !Op2IsZeroImm)
10653+
return false;
10654+
return true;
10655+
}
10656+
1062110657
bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1062210658
Register SrcReg2, int64_t CmpMask,
1062310659
int64_t CmpValue,
@@ -10637,19 +10673,6 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1063710673
if (!Def || Def->getParent() != CmpInstr.getParent())
1063810674
return false;
1063910675

10640-
const auto foldableSelect = [](MachineInstr *Def) -> bool {
10641-
if (Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10642-
Def->getOpcode() == AMDGPU::S_CSELECT_B64) {
10643-
bool Op1IsNonZeroImm =
10644-
Def->getOperand(1).isImm() && Def->getOperand(1).getImm() != 0;
10645-
bool Op2IsZeroImm =
10646-
Def->getOperand(2).isImm() && Def->getOperand(2).getImm() == 0;
10647-
if (Op1IsNonZeroImm && Op2IsZeroImm)
10648-
return true;
10649-
}
10650-
return false;
10651-
};
10652-
1065310676
// For S_OP that set SCC = DST!=0, do the transformation
1065410677
//
1065510678
// s_cmp_lg_* (S_OP ...), 0 => (S_OP ...)
@@ -10660,24 +10683,12 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1066010683
//
1066110684
// s_cmp_lg_* (S_CSELECT* (non-zero imm), 0), 0 => (S_CSELECT* (non-zero
1066210685
// imm), 0)
10663-
if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(Def))
10686+
if (!setsSCCifResultIsNonZero(*Def) && !foldableSelect(*Def))
1066410687
return false;
1066510688

10666-
MachineInstr *KillsSCC = nullptr;
10667-
for (MachineInstr &MI :
10668-
make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
10669-
if (MI.modifiesRegister(AMDGPU::SCC, &RI))
10670-
return false;
10671-
if (MI.killsRegister(AMDGPU::SCC, &RI))
10672-
KillsSCC = &MI;
10673-
}
10689+
if (!optimizeSCC(Def, &CmpInstr, RI))
10690+
return false;
1067410691

10675-
if (MachineOperand *SccDef =
10676-
Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr))
10677-
SccDef->setIsDead(false);
10678-
if (KillsSCC)
10679-
KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
10680-
CmpInstr.eraseFromParent();
1068110692
return true;
1068210693
};
1068310694

@@ -10755,21 +10766,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1075510766
if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
1075610767
return false;
1075710768

10758-
MachineInstr *KillsSCC = nullptr;
10759-
for (MachineInstr &MI :
10760-
make_range(std::next(Def->getIterator()), CmpInstr.getIterator())) {
10761-
if (MI.modifiesRegister(AMDGPU::SCC, &RI))
10762-
return false;
10763-
if (MI.killsRegister(AMDGPU::SCC, &RI))
10764-
KillsSCC = &MI;
10765-
}
10766-
10767-
MachineOperand *SccDef =
10768-
Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
10769-
SccDef->setIsDead(false);
10770-
if (KillsSCC)
10771-
KillsSCC->clearRegisterKills(AMDGPU::SCC, /*TRI=*/nullptr);
10772-
CmpInstr.eraseFromParent();
10769+
if (!optimizeSCC(Def, &CmpInstr, RI))
10770+
return false;
1077310771

1077410772
if (!MRI->use_nodbg_empty(DefReg)) {
1077510773
assert(!IsReversedCC);

0 commit comments

Comments
 (0)