Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -931,7 +931,8 @@ static MachineOperand *lookUpCopyChain(const SIInstrInfo &TII,
for (MachineInstr *SubDef = MRI.getVRegDef(SrcReg);
SubDef && TII.isFoldableCopy(*SubDef);
SubDef = MRI.getVRegDef(Sub->getReg())) {
unsigned SrcIdx = TII.getFoldableCopySrcIdx(*SubDef);
const int SrcIdx =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have any tests that fail without this fix ?

Upstream doens't have seem to have any UT either.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

None of our tests are affected by this. The upstream PR mentions that this fixes a failing case, so I would imagine the lack of a LIT test is what's keeping it from being merged right now

(SubDef->getOpcode()) == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
MachineOperand &SrcOp = SubDef->getOperand(SrcIdx);

if (SrcOp.isImm())
Expand Down
47 changes: 12 additions & 35 deletions external/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3454,10 +3454,9 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
}
}

bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case AMDGPU::V_MOV_B16_t16_e32:
case AMDGPU::V_MOV_B16_t16_e64:
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
case AMDGPU::V_MOV_B64_PSEUDO:
Expand All @@ -3474,34 +3473,10 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
return true;
default:
return false;
}
}

unsigned SIInstrInfo::getFoldableCopySrcIdx(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AMDGPU::V_MOV_B16_t16_e32:
case AMDGPU::V_MOV_B16_t16_e64:
return 2;
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
case AMDGPU::V_MOV_B64_PSEUDO:
case AMDGPU::V_MOV_B64_e32:
case AMDGPU::V_MOV_B64_e64:
case AMDGPU::S_MOV_B32:
case AMDGPU::S_MOV_B64:
case AMDGPU::S_MOV_B64_IMM_PSEUDO:
case AMDGPU::COPY:
case AMDGPU::WWM_COPY:
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
case AMDGPU::V_ACCVGPR_READ_B32_e64:
case AMDGPU::V_ACCVGPR_MOV_B32:
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
return 1;
return !hasAnyModifiersSet(MI);
default:
llvm_unreachable("MI is not a foldable copy");
return false;
}
}

Expand Down Expand Up @@ -4022,12 +3997,13 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
return false;
}

static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
int64_t &Imm, MachineInstr **DefMI = nullptr) {
bool SIInstrInfo::getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
int64_t &Imm,
MachineInstr **DefMI = nullptr) const {
if (Reg.isPhysical())
return false;
auto *Def = MRI.getUniqueVRegDef(Reg);
if (Def && SIInstrInfo::isFoldableCopy(*Def) && Def->getOperand(1).isImm()) {
if (Def && isFoldableCopy(*Def) && Def->getOperand(1).isImm()) {
Imm = Def->getOperand(1).getImm();
if (DefMI)
*DefMI = Def;
Expand All @@ -4036,8 +4012,8 @@ static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
return false;
}

static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm,
MachineInstr **DefMI = nullptr) {
bool SIInstrInfo::getFoldableImm(const MachineOperand *MO, int64_t &Imm,
MachineInstr **DefMI = nullptr) const {
if (!MO->isReg())
return false;
const MachineFunction *MF = MO->getParent()->getParent()->getParent();
Expand Down Expand Up @@ -10712,10 +10688,11 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
return false;

int64_t Mask;
const auto isMask = [&Mask, SrcSize](const MachineOperand *MO) -> bool {
const auto isMask = [&Mask, SrcSize,
this](const MachineOperand *MO) -> bool {
if (MO->isImm())
Mask = MO->getImm();
else if (!getFoldableImm(MO, Mask))
else if (!this->getFoldableImm(MO, Mask))
return false;
Mask &= maxUIntN(SrcSize);
return isPowerOf2_64(Mask);
Expand Down
8 changes: 6 additions & 2 deletions external/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -432,8 +432,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
const MachineInstr &MIb) const override;

static bool isFoldableCopy(const MachineInstr &MI);
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI);
bool isFoldableCopy(const MachineInstr &MI) const;

bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
int64_t &Imm, MachineInstr **DefMI) const;
bool getFoldableImm(const MachineOperand *MO, int64_t &Imm,
MachineInstr **DefMI) const;

void removeModOperands(MachineInstr &MI) const;

Expand Down