Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,8 @@ static MachineOperand *lookUpCopyChain(const SIInstrInfo &TII,
for (MachineInstr *SubDef = MRI.getVRegDef(SrcReg);
SubDef && TII.isFoldableCopy(*SubDef);
SubDef = MRI.getVRegDef(Sub->getReg())) {
unsigned SrcIdx = TII.getFoldableCopySrcIdx(*SubDef);
const int SrcIdx =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add some unit-tests to make sure this is working as expected ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(SubDef->getOpcode()) == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
MachineOperand &SrcOp = SubDef->getOperand(SrcIdx);

if (SrcOp.isImm())
Expand Down
47 changes: 12 additions & 35 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3408,10 +3408,9 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
}
}

bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case AMDGPU::V_MOV_B16_t16_e32:
case AMDGPU::V_MOV_B16_t16_e64:
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
case AMDGPU::V_MOV_B64_PSEUDO:
Expand All @@ -3428,34 +3427,10 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
return true;
default:
return false;
}
}

unsigned SIInstrInfo::getFoldableCopySrcIdx(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AMDGPU::V_MOV_B16_t16_e32:
case AMDGPU::V_MOV_B16_t16_e64:
return 2;
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
case AMDGPU::V_MOV_B64_PSEUDO:
case AMDGPU::V_MOV_B64_e32:
case AMDGPU::V_MOV_B64_e64:
case AMDGPU::S_MOV_B32:
case AMDGPU::S_MOV_B64:
case AMDGPU::S_MOV_B64_IMM_PSEUDO:
case AMDGPU::COPY:
case AMDGPU::WWM_COPY:
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
case AMDGPU::V_ACCVGPR_READ_B32_e64:
case AMDGPU::V_ACCVGPR_MOV_B32:
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
return 1;
return !hasAnyModifiersSet(MI);
default:
llvm_unreachable("MI is not a foldable copy");
return false;
}
}

Expand Down Expand Up @@ -3976,12 +3951,13 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
return false;
}

static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
int64_t &Imm, MachineInstr **DefMI = nullptr) {
bool SIInstrInfo::getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
int64_t &Imm,
MachineInstr **DefMI = nullptr) const {
if (Reg.isPhysical())
return false;
auto *Def = MRI.getUniqueVRegDef(Reg);
if (Def && SIInstrInfo::isFoldableCopy(*Def) && Def->getOperand(1).isImm()) {
if (Def && isFoldableCopy(*Def) && Def->getOperand(1).isImm()) {
Imm = Def->getOperand(1).getImm();
if (DefMI)
*DefMI = Def;
Expand All @@ -3990,8 +3966,8 @@ static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
return false;
}

static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm,
MachineInstr **DefMI = nullptr) {
bool SIInstrInfo::getFoldableImm(const MachineOperand *MO, int64_t &Imm,
MachineInstr **DefMI = nullptr) const {
if (!MO->isReg())
return false;
const MachineFunction *MF = MO->getParent()->getParent()->getParent();
Expand Down Expand Up @@ -10643,10 +10619,11 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
return false;

int64_t Mask;
const auto isMask = [&Mask, SrcSize](const MachineOperand *MO) -> bool {
const auto isMask = [&Mask, SrcSize,
this](const MachineOperand *MO) -> bool {
if (MO->isImm())
Mask = MO->getImm();
else if (!getFoldableImm(MO, Mask))
else if (!this->getFoldableImm(MO, Mask))
return false;
Mask &= maxUIntN(SrcSize);
return isPowerOf2_64(Mask);
Expand Down
8 changes: 6 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
const MachineInstr &MIb) const override;

static bool isFoldableCopy(const MachineInstr &MI);
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI);
bool isFoldableCopy(const MachineInstr &MI) const;

bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
int64_t &Imm, MachineInstr **DefMI) const;
bool getFoldableImm(const MachineOperand *MO, int64_t &Imm,
MachineInstr **DefMI) const;

void removeModOperands(MachineInstr &MI) const;

Expand Down