Skip to content

[AMDGPU] Restrict packed math FP32 instructions to read only one SGPR per operand on gfx12+ #152465

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4933,6 +4933,43 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
return false;
}

// Packed math FP32 instructions typically accept SGPRs or VGPRs as source
// operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
// the first SGPR and use it for both the low and high operations.
if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);

const MCOperand &Src0 = Inst.getOperand(Src0Idx);
const MCOperand &Src1 = Inst.getOperand(Src1Idx);
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();

const MCRegisterInfo *TRI = getContext().getRegisterInfo();

auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
unsigned Mask = 1U << Index;
return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
};

if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
!VerifyOneSGPR(/*Index=*/0))
return false;
if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
!VerifyOneSGPR(/*Index=*/1))
return false;

int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
if (Src2Idx != -1) {
const MCOperand &Src2 = Inst.getOperand(Src2Idx);
if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
!VerifyOneSGPR(/*Index=*/2))
return false;
}
}

return true;
}

Expand Down
67 changes: 67 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/LiveIntervals.h"
Expand Down Expand Up @@ -5534,6 +5535,15 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}

// See SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more
// information.
if (AMDGPU::isPackedFP32Inst(Opcode) && AMDGPU::isGFX12Plus(ST)) {
for (unsigned I = 0; I < 3; ++I) {
if (!isLegalGFX12PlusPackedMathFP32Operand(MRI, MI, I))
return false;
}
}

return true;
}

Expand Down Expand Up @@ -6005,6 +6015,21 @@ bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
const MCOperandInfo OpInfo = MI.getDesc().operands()[OpIdx];
unsigned Opc = MI.getOpcode();

// See SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more
// information.
if (AMDGPU::isPackedFP32Inst(MI.getOpcode()) && AMDGPU::isGFX12Plus(ST) &&
MO.isReg() && RI.isSGPRReg(MRI, MO.getReg())) {
constexpr const AMDGPU::OpName OpNames[] = {
AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};

for (auto [I, OpName] : enumerate(OpNames)) {
int SrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpNames[I]);
if (static_cast<unsigned>(SrcIdx) == OpIdx &&
!isLegalGFX12PlusPackedMathFP32Operand(MRI, MI, I, &MO))
return false;
}
}

if (!isLegalRegOperand(MRI, OpInfo, MO))
return false;

Expand Down Expand Up @@ -6053,6 +6078,39 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
return true;
}

bool SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand(
const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN,
const MachineOperand *MO) const {
constexpr const unsigned NumOps = 3;
constexpr const AMDGPU::OpName OpNames[NumOps * 2] = {
AMDGPU::OpName::src0, AMDGPU::OpName::src1,
AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};

assert(SrcN < NumOps);

if (!MO) {
int SrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpNames[SrcN]);
if (SrcIdx == -1)
return true;
MO = &MI.getOperand(SrcIdx);
}

if (!MO->isReg() || !RI.isSGPRReg(MRI, MO->getReg()))
return true;

int ModsIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpNames[NumOps + SrcN]);
if (ModsIdx == -1)
return true;

unsigned Mods = MI.getOperand(ModsIdx).getImm();
bool OpSel = Mods & SISrcMods::OP_SEL_0;
bool OpSelHi = Mods & SISrcMods::OP_SEL_1;

return !OpSel && !OpSelHi;
}

bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
const MachineOperand *MO) const {
const MachineFunction &MF = *MI.getParent()->getParent();
Expand Down Expand Up @@ -6390,6 +6448,15 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
!RI.isVGPR(MRI, MI.getOperand(VOP3Idx[2]).getReg()))
legalizeOpWithMove(MI, VOP3Idx[2]);

// Fix the register class of packed FP32 instructions on gfx12+. See
// SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more information.
if (AMDGPU::isPackedFP32Inst(Opc) && AMDGPU::isGFX12Plus(ST)) {
for (unsigned I = 0; I < 3; ++I) {
if (!isLegalGFX12PlusPackedMathFP32Operand(MRI, MI, /*SrcN=*/I))
legalizeOpWithMove(MI, VOP3Idx[I]);
}
}
}

Register SIInstrInfo::readlaneVGPRToSGPR(
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1287,6 +1287,19 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
const MachineOperand &MO) const;
bool isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
const MachineOperand &MO) const;

/// Check if \p MO would be a legal operand for gfx12+ packed math FP32
/// instructions. Packed math FP32 instructions typically accept SGPRs or
/// VGPRs as source operands. On gfx12+, if a source operand uses SGPRs, the
/// HW can only read the first SGPR and use it for both the low and high
/// operations.
/// \p SrcN can be 0, 1, or 2, representing src0, src1, and src2,
/// respectively. If \p MO is nullptr, the operand corresponding to SrcN will
/// be used.
bool isLegalGFX12PlusPackedMathFP32Operand(
const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN,
const MachineOperand *MO = nullptr) const;

/// Legalize operands in \p MI by either commuting it or inserting a
/// copy of src1.
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3318,6 +3318,20 @@ unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
return 128;
}

bool isPackedFP32Inst(unsigned Opc) {
switch (Opc) {
case AMDGPU::V_PK_ADD_F32:
case AMDGPU::V_PK_ADD_F32_gfx12:
case AMDGPU::V_PK_MUL_F32:
case AMDGPU::V_PK_MUL_F32_gfx12:
case AMDGPU::V_PK_FMA_F32:
case AMDGPU::V_PK_FMA_F32_gfx12:
return true;
default:
return false;
}
}

} // namespace AMDGPU

raw_ostream &operator<<(raw_ostream &OS,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1709,6 +1709,8 @@ bool isArgPassedInSGPR(const Argument *Arg);

bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);

LLVM_READONLY bool isPackedFP32Inst(unsigned Opc);

LLVM_READONLY
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
int64_t EncodedOffset);
Expand Down
Loading