Skip to content
21 changes: 20 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10160,7 +10160,7 @@ static bool followSubRegDef(MachineInstr &MI,
}

MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
MachineRegisterInfo &MRI) {
const MachineRegisterInfo &MRI) {
assert(MRI.isSSA());
if (!P.Reg.isVirtual())
return nullptr;
Expand Down Expand Up @@ -10689,6 +10689,25 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (!optimizeSCC(Def, &CmpInstr, RI))
return false;

// If s_or_b32 result is unused (i.e. it is effectively a 64-bit s_cmp_lg of
// a register pair) and the input is a 64-bit foldableSelect then transform:
//
// (s_or_b32 (S_CSELECT_B64 (non-zero imm), 0), 0 => (S_CSELECT_B64
// (non-zero
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The example is malformed, the or can't directly consume the s_cselect_b64). It's also mixing different capitalizations

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated comment.

// imm), 0)
if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
MRI->use_nodbg_empty(Def->getOperand(0).getReg())) {
MachineOperand OrOpnd1 = Def->getOperand(1);
MachineOperand OrOpnd2 = Def->getOperand(2);

if (OrOpnd1.isReg() && OrOpnd2.isReg() &&
OrOpnd1.getReg() != OrOpnd2.getReg()) {
auto *Def1 = getVRegSubRegDef(getRegSubRegPair(OrOpnd1), *MRI);
auto *Def2 = getVRegSubRegDef(getRegSubRegPair(OrOpnd2), *MRI);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This kind of subreg handling needs mir testing

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added mir testing.

if (Def1 == Def2 && foldableSelect(*Def1))
optimizeSCC(Def1, Def, RI);
}
}
return true;
};

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1687,7 +1687,7 @@ TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
/// skipping copy like instructions and subreg-manipulation pseudos.
/// Following another subreg of a reg:subreg isn't supported.
MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
MachineRegisterInfo &MRI);
const MachineRegisterInfo &MRI);

/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
/// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
Expand Down
Loading
Loading