Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 25 additions & 16 deletions llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
++NumOperands;
}
if (auto *SDst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) {
if (TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) {
if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::sdst)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The TII->isOperandLegal invocations for the srcX operands have been moved to a new loop below, but it seems that this one went away, i.e. it is not contained in the list of operands being checked in the loop. Is this intended?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. isOperandLegal doesn't do anything meaningful on results

DPPInst.add(*SDst);
++NumOperands;
}
Expand Down Expand Up @@ -296,11 +296,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
assert(Src0);
int Src0Idx = NumOperands;
if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n");
Fail = true;
break;
}

DPPInst.add(*Src0);
DPPInst->getOperand(NumOperands).setIsKill(false);
++NumOperands;
Expand All @@ -319,21 +315,17 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
}
auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
if (Src1) {
int OpNum = NumOperands;
assert(AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src1) &&
"dpp version of instruction missing src1");
// If subtarget does not support SGPRs for src1 operand then the
// requirements are the same as for src0. We check src0 instead because
// pseudos are shared between subtargets and allow SGPR for src1 on all.
if (!ST->hasDPPSrc1SGPR()) {
assert(getOperandSize(*DPPInst, Src0Idx, *MRI) ==
getOperandSize(*DPPInst, NumOperands, *MRI) &&
"Src0 and Src1 operands should have the same size");
OpNum = Src0Idx;
}
if (!TII->isOperandLegal(*DPPInst.getInstr(), OpNum, Src1)) {
LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n");
Fail = true;
break;
}

DPPInst.add(*Src1);
++NumOperands;
}
Expand All @@ -349,9 +341,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
}
auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
if (Src2) {
if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n");
if (!AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src2)) {
LLVM_DEBUG(dbgs() << " failed: dpp does not have src2\n");
Fail = true;
break;
}
Expand Down Expand Up @@ -431,6 +422,24 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
DPPInst.addImm(CombBCZ ? 1 : 0);

constexpr AMDGPU::OpName Srcs[] = {
AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};

// FIXME: isOperandLegal expects to operate on an completely built
// instruction. We should have better legality APIs to check if the
// candidate operands will be legal without building the instruction first.
for (auto [I, OpName] : enumerate(Srcs)) {
int OpIdx = AMDGPU::getNamedOperandIdx(DPPOp, OpName);
if (OpIdx == -1)
break;

if (!TII->isOperandLegal(*DPPInst, OpIdx)) {
LLVM_DEBUG(dbgs() << " failed: src" << I << " operand is illegal\n");
Fail = true;
break;
}
}
} while (false);

if (Fail) {
Expand Down
59 changes: 35 additions & 24 deletions llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ struct FoldCandidate {

class SIFoldOperandsImpl {
public:
MachineFunction *MF;
MachineRegisterInfo *MRI;
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
Expand Down Expand Up @@ -705,6 +706,36 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
}

MachineOperand *New = Fold.Def.OpToFold;

// Verify the register is compatible with the operand.
if (const TargetRegisterClass *OpRC =
TII->getRegClass(MI->getDesc(), Fold.UseOpNo, TRI, *MF)) {
const TargetRegisterClass *OldRC = MRI->getRegClass(Old.getReg());
const TargetRegisterClass *NewRC = MRI->getRegClass(New->getReg());
unsigned NewSubReg = New->getSubReg();
unsigned OldSubReg = Old.getSubReg();

const TargetRegisterClass *ConstrainRC = OpRC;
if (NewSubReg && OldSubReg) {
unsigned PreA, PreB;
ConstrainRC = TRI->getCommonSuperRegClass(OpRC, OldSubReg, NewRC,
NewSubReg, PreA, PreB);
} else if (OldSubReg) {
ConstrainRC = TRI->getMatchingSuperRegClass(OldRC, OpRC, OldSubReg);
} else if (NewSubReg) {
ConstrainRC = TRI->getMatchingSuperRegClass(NewRC, OpRC, NewSubReg);
}

if (!ConstrainRC)
return false;

if (!MRI->constrainRegClass(New->getReg(), ConstrainRC)) {
LLVM_DEBUG(dbgs() << "Cannot constrain " << printReg(New->getReg(), TRI)
<< TRI->getRegClassName(ConstrainRC) << '\n');
return false;
}
}

// Rework once the VS_16 register class is updated to include proper
// 16-bit SGPRs instead of 32-bit ones.
if (Old.getSubReg() == AMDGPU::lo16 && TRI->isSGPRReg(*MRI, New->getReg()))
Expand Down Expand Up @@ -1429,30 +1460,9 @@ void SIFoldOperandsImpl::foldOperand(
return;
}

if (!FoldingImmLike) {
if (OpToFold.isReg() && ST->needsAlignedVGPRs()) {
// Don't fold if OpToFold doesn't hold an aligned register.
const TargetRegisterClass *RC =
TRI->getRegClassForReg(*MRI, OpToFold.getReg());
assert(RC);
if (TRI->hasVectorRegisters(RC) && OpToFold.getSubReg()) {
unsigned SubReg = OpToFold.getSubReg();
if (const TargetRegisterClass *SubRC =
TRI->getSubRegisterClass(RC, SubReg))
RC = SubRC;
}

if (!RC || !TRI->isProperlyAlignedRC(*RC))
return;
}

tryAddToFoldList(FoldList, UseMI, UseOpIdx, OpToFold);

// FIXME: We could try to change the instruction from 64-bit to 32-bit
// to enable more folding opportunities. The shrink operands pass
// already does this.
return;
}
// FIXME: We could try to change the instruction from 64-bit to 32-bit
// to enable more folding opportunities. The shrink operands pass
// already does this.

tryAddToFoldList(FoldList, UseMI, UseOpIdx, OpToFold);
}
Expand Down Expand Up @@ -2747,6 +2757,7 @@ bool SIFoldOperandsImpl::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
}

bool SIFoldOperandsImpl::run(MachineFunction &MF) {
this->MF = &MF;
MRI = &MF.getRegInfo();
ST = &MF.getSubtarget<GCNSubtarget>();
TII = ST->getInstrInfo();
Expand Down
11 changes: 5 additions & 6 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4572,9 +4572,8 @@ static bool compareMachineOp(const MachineOperand &Op0,
}
}

bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
bool SIInstrInfo::isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
const MachineOperand &MO) const {
const MCInstrDesc &InstDesc = MI.getDesc();
const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo];

assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal());
Expand All @@ -4586,17 +4585,17 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
return false;

if (MO.isImm() && isInlineConstant(MO, OpInfo)) {
if (isMAI(MI) && ST.hasMFMAInlineLiteralBug() &&
OpNo ==(unsigned)AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::src2))
if (isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
OpNo == (unsigned)AMDGPU::getNamedOperandIdx(InstDesc.getOpcode(),
AMDGPU::OpName::src2))
return false;
return RI.opCanUseInlineConstant(OpInfo.OperandType);
}

if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
return false;

if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo))
if (!isVOP3(InstDesc) || !AMDGPU::isSISrcOperand(InstDesc, OpNo))
return true;

return ST.hasVOP3Literal();
Expand Down
27 changes: 16 additions & 11 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -533,13 +533,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return get(Opcode).TSFlags & SIInstrFlags::VOP2;
}

static bool isVOP3(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
static bool isVOP3(const MCInstrDesc &Desc) {
return Desc.TSFlags & SIInstrFlags::VOP3;
}

bool isVOP3(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VOP3;
}
static bool isVOP3(const MachineInstr &MI) { return isVOP3(MI.getDesc()); }

bool isVOP3(uint16_t Opcode) const { return isVOP3(get(Opcode)); }

static bool isSDWA(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
Expand Down Expand Up @@ -841,13 +841,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
}

static bool isMAI(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::IsMAI;
static bool isMAI(const MCInstrDesc &Desc) {
return Desc.TSFlags & SIInstrFlags::IsMAI;
}

bool isMAI(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
}
static bool isMAI(const MachineInstr &MI) { return isMAI(MI.getDesc()); }

bool isMAI(uint16_t Opcode) const { return isMAI(get(Opcode)); }

static bool isMFMA(const MachineInstr &MI) {
return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
Expand Down Expand Up @@ -1180,9 +1180,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return isInlineConstant(*MO.getParent(), MO.getOperandNo());
}

bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
const MachineOperand &MO) const;

bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
const MachineOperand &MO) const {
return isImmOperandLegal(MI.getDesc(), OpNo, MO);
}

/// Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isLegalAV64PseudoImm(uint64_t Imm) const;

Expand Down
18 changes: 7 additions & 11 deletions llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN
# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN

---

# GCN-LABEL: name: vop3
# GCN: %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64_dpp %3, %0, %1, %5, 1, 1, 15, 15, 1, implicit $exec
# GCN: %8:vgpr_32 = V_CVT_PK_U8_F32_e64_dpp %3, 4, %0, 2, %2, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec
# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
name: vop3
tracksRegLiveness: true
body: |
Expand Down Expand Up @@ -39,12 +38,9 @@ body: |

# GCN-LABEL: name: vop3_sgpr_src1
# GCN: %6:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %1, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GFX1100: %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec
# GFX1150: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GFX1100: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec
# GFX1150: %10:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %3, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec
# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN: %10:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %3, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN: %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec
name: vop3_sgpr_src1
tracksRegLiveness: true
Expand Down
15 changes: 6 additions & 9 deletions llvm/test/CodeGen/AMDGPU/si-fold-aligned-agprs.mir
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,8 @@ body: |
; GFX90A: liveins: $vgpr0_vgpr1
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
; GFX90A-NEXT: [[DEF:%[0-9]+]]:areg_64 = IMPLICIT_DEF
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:areg_64_align2 = COPY killed [[DEF]]
; GFX90A-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec
; GFX90A-NEXT: [[DEF:%[0-9]+]]:areg_64_align2 = IMPLICIT_DEF
; GFX90A-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF]], 0, 0, implicit $exec
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:areg_64 = IMPLICIT_DEF
%2:areg_64_align2 = COPY killed %1
Expand Down Expand Up @@ -105,9 +104,8 @@ body: |
; GFX90A: liveins: $vgpr0_vgpr1
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
; GFX90A-NEXT: [[DEF:%[0-9]+]]:areg_96 = IMPLICIT_DEF
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:areg_96_align2 = COPY killed [[DEF]]
; GFX90A-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec
; GFX90A-NEXT: [[DEF:%[0-9]+]]:areg_96_align2 = IMPLICIT_DEF
; GFX90A-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[DEF]], 0, 0, implicit $exec
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:areg_96 = IMPLICIT_DEF
%2:areg_96_align2 = COPY killed %1
Expand Down Expand Up @@ -234,9 +232,8 @@ body: |
; GFX90A: liveins: $vgpr0_vgpr1
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
; GFX90A-NEXT: [[DEF:%[0-9]+]]:areg_128 = IMPLICIT_DEF
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY killed [[DEF]]
; GFX90A-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec
; GFX90A-NEXT: [[DEF:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF
; GFX90A-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[DEF]], 0, 0, implicit $exec
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:areg_128 = IMPLICIT_DEF
%2:areg_128_align2 = COPY killed %1
Expand Down
Loading
Loading