Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5043,6 +5043,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_mfma_i32_16x16x64_i8:
case Intrinsic::amdgcn_mfma_i32_32x32x32_i8:
case Intrinsic::amdgcn_mfma_f32_16x16x32_bf16: {
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
unsigned MinNumRegsRequired = DstSize / 32;

// Default for MAI intrinsics.
// srcC can also be an immediate which can be folded later.
// FIXME: Should we eventually add an alternative mapping with AGPR src
Expand All @@ -5051,29 +5054,32 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// vdst, srcA, srcB, srcC
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
OpdsMapping[0] =
Info->mayNeedAGPRs()
Info->getMinNumAGPRs() >= MinNumRegsRequired
? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
: getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[4] =
Info->mayNeedAGPRs()
Info->getMinNumAGPRs() >= MinNumRegsRequired
? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
: getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
break;
}
case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
unsigned MinNumRegsRequired = DstSize / 32;

const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
OpdsMapping[0] =
Info->mayNeedAGPRs()
Info->getMinNumAGPRs() >= MinNumRegsRequired
? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
: getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);

OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[4] =
Info->mayNeedAGPRs()
Info->getMinNumAGPRs() >= MinNumRegsRequired
? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
: getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17357,7 +17357,8 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
// use between vgpr and agpr as agpr tuples tend to be big.
if (!MI.getDesc().operands().empty()) {
unsigned Opc = MI.getOpcode();
bool HasAGPRs = Info->mayNeedAGPRs();
bool HasAGPRs =
!Subtarget->hasGFX90AInsts() || Info->getMinNumAGPRs() != 0;
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
int16_t Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
for (auto I :
Expand Down
22 changes: 12 additions & 10 deletions llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,20 @@ using namespace llvm;
// optimal RC for Opc and Dest of MFMA. In particular, there are high RP cases
// where it is better to produce the VGPR form (e.g. if there are VGPR users
// of the MFMA result).
static cl::opt<bool> MFMAVGPRForm(
"amdgpu-mfma-vgpr-form", cl::Hidden,
static cl::opt<bool, true> MFMAVGPRFormOpt(
"amdgpu-mfma-vgpr-form",
cl::desc("Whether to force use VGPR for Opc and Dest of MFMA. If "
"unspecified, default to compiler heuristics"),
cl::init(false));
cl::location(SIMachineFunctionInfo::MFMAVGPRForm), cl::init(false),
cl::Hidden);

const GCNTargetMachine &getTM(const GCNSubtarget *STI) {
const SITargetLowering *TLI = STI->getTargetLowering();
return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine());
}

bool SIMachineFunctionInfo::MFMAVGPRForm = false;

SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
const GCNSubtarget *STI)
: AMDGPUMachineFunction(F, *STI), Mode(F, *STI), GWSResourcePSV(getTM(STI)),
Expand Down Expand Up @@ -81,14 +84,13 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
}

MayNeedAGPRs = ST.hasMAIInsts();
if (ST.hasGFX90AInsts()) {
// FIXME: MayNeedAGPRs is a misnomer for how this is used. MFMA selection
// should be separated from availability of AGPRs
if (MFMAVGPRForm ||
(ST.getMaxNumVGPRs(F) <= ST.getAddressableNumArchVGPRs() &&
!mayUseAGPRs(F)))
MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
// FIXME: Extract logic out of getMaxNumVectorRegs; we need to apply the
// allocation granule and clamping.
auto [MinNumAGPRAttr, MaxNumAGPRAttr] =
AMDGPU::getIntegerPairAttribute(F, "amdgpu-agpr-alloc", {~0u, ~0u},
/*OnlyFirstRequired=*/true);
MinNumAGPRs = MinNumAGPRAttr;
}

if (AMDGPU::isChainCC(CC)) {
Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
// user arguments. This is an offset from the KernargSegmentPtr.
bool ImplicitArgPtr : 1;

bool MayNeedAGPRs : 1;
/// Minimum number of AGPRs required to allocate in the function. Only
/// relevant for gfx90a-gfx950. For gfx908, this should be infinite.
unsigned MinNumAGPRs = ~0u;

// The hard-wired high half of the address of the global information table
// for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
Expand Down Expand Up @@ -537,6 +539,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
void MRI_NoteCloneVirtualRegister(Register NewReg, Register SrcReg) override;

public:
static bool MFMAVGPRForm;

struct VGPRSpillToAGPR {
SmallVector<MCPhysReg, 32> Lanes;
bool FullyAllocated = false;
Expand Down Expand Up @@ -1196,9 +1200,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,

unsigned getMaxMemoryClusterDWords() const { return MaxMemoryClusterDWords; }

bool mayNeedAGPRs() const {
return MayNeedAGPRs;
}
unsigned getMinNumAGPRs() const { return MinNumAGPRs; }

// \returns true if a function has a use of AGPRs via inline asm or
// has a call which may use it.
Expand Down
70 changes: 33 additions & 37 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class VOP3P_Mix_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR,
class VOP3P_Mix_Profile_t16<VOPProfile P, VOP3Features Features = VOP3_REGULAR>
: VOP3P_Mix_Profile<P, Features, 0> {
let IsTrue16 = 1;
let IsRealTrue16 = 1;
let IsRealTrue16 = 1;
let DstRC64 = getVALUDstForVT<P.DstVT, 1 /*IsTrue16*/, 1 /*IsVOP3Encoding*/>.ret;
}

Expand Down Expand Up @@ -950,7 +950,7 @@ class MFMA_F8F6F4_WithSizeTable_Helper<VOP3_Pseudo ps, string F8F8Op> :
}

// Currently assumes scaled instructions never have abid
class MAIFrag<SDPatternOperator Op, code pred, bit HasAbid = true, bit Scaled = false> : PatFrag <
class MAIFrag<SDPatternOperator Op, bit HasAbid = true, bit Scaled = false> : PatFrag <
!if(Scaled, (ops node:$src0, node:$src1, node:$src2, node:$cbsz, node:$blgp,
node:$src0_modifiers, node:$scale_src0,
node:$src1_modifiers, node:$scale_src1),
Expand All @@ -959,37 +959,30 @@ class MAIFrag<SDPatternOperator Op, code pred, bit HasAbid = true, bit Scaled =
(ops node:$blgp))),
!if(Scaled, (Op $src0, $src1, $src2, $cbsz, $blgp, $src0_modifiers, $scale_src0, $src1_modifiers, $scale_src1),
!if(HasAbid, (Op $src0, $src1, $src2, $cbsz, $abid, $blgp),
(Op $src0, $src1, $src2, $cbsz, $blgp))),
pred
>;

defvar MayNeedAGPRs = [{
return MF->getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs();
}];

defvar MayNeedAGPRs_gisel = [{
return MF.getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs();
}];
(Op $src0, $src1, $src2, $cbsz, $blgp)))>;

defvar MayNotNeedAGPRs = [{
return !MF->getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs();
}];
class CanUseAGPR_MAI<ValueType vt> {
code PredicateCode = [{
return !Subtarget->hasGFX90AInsts() ||
(!SIMachineFunctionInfo::MFMAVGPRForm &&
MF->getInfo<SIMachineFunctionInfo>()->getMinNumAGPRs() >=
}] # !srl(vt.Size, 5) # ");";

defvar MayNotNeedAGPRs_gisel = [{
return !MF.getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs();
}];
code GISelPredicateCode = [{
return !Subtarget->hasGFX90AInsts() ||
(!SIMachineFunctionInfo::MFMAVGPRForm &&
MF.getInfo<SIMachineFunctionInfo>()->getMinNumAGPRs() >=
}] # !srl(vt.Size, 5) # ");";
}

class AgprMAIFrag<SDPatternOperator Op, bit HasAbid = true,
class AgprMAIFrag<SDPatternOperator Op, ValueType vt, bit HasAbid = true,
bit Scaled = false> :
MAIFrag<Op, MayNeedAGPRs, HasAbid, Scaled> {
let GISelPredicateCode = MayNeedAGPRs_gisel;
}
MAIFrag<Op, HasAbid, Scaled>,
CanUseAGPR_MAI<vt>;

class VgprMAIFrag<SDPatternOperator Op, bit HasAbid = true,
bit Scaled = false> :
MAIFrag<Op, MayNotNeedAGPRs, HasAbid, Scaled> {
let GISelPredicateCode = MayNotNeedAGPRs_gisel;
}
bit Scaled = false> :
MAIFrag<Op, HasAbid, Scaled>;

let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
defm V_ACCVGPR_READ_B32 : VOP3Inst<"v_accvgpr_read_b32", VOPProfileAccRead>;
Expand Down Expand Up @@ -1037,16 +1030,19 @@ multiclass MAIInst<string OpName, string P, SDPatternOperator node = null_frag,
bit HasAbid = true,
bit Scaled = false> {
defvar NoDstOverlap = !cast<VOPProfileMAI>("VOPProfileMAI_" # P).NoDstOverlap;
defvar ProfileAGPR = !cast<VOPProfileMAI>("VOPProfileMAI_" # P);
defvar ProfileVGPR = !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD");


let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in {
// FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported.
let Constraints = !if(NoDstOverlap, "@earlyclobber $vdst", "") in {
def _e64 : MAIInst<OpName, !cast<VOPProfileMAI>("VOPProfileMAI_" # P),
!if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, AgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
def _e64 : MAIInst<OpName, ProfileAGPR,
!if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, AgprMAIFrag<node, ProfileAGPR.DstVT, HasAbid, Scaled>), Scaled>,
MFMATable<0, "AGPR", NAME # "_e64">;

let OtherPredicates = [isGFX90APlus], Mnemonic = OpName in
def _vgprcd_e64 : MAIInst<OpName # "_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"),
def _vgprcd_e64 : MAIInst<OpName # "_vgprcd", ProfileVGPR,
!if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, VgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
MFMATable<0, "VGPR", NAME # "_vgprcd_e64", NAME # "_e64">;
}
Expand All @@ -1055,12 +1051,12 @@ multiclass MAIInst<string OpName, string P, SDPatternOperator node = null_frag,
let Constraints = !if(NoDstOverlap, "$vdst = $src2", ""),
isConvertibleToThreeAddress = NoDstOverlap,
Mnemonic = OpName in {
def "_mac_e64" : MAIInst<OpName # "_mac", !cast<VOPProfileMAI>("VOPProfileMAI_" # P),
!if(!eq(node, null_frag), null_frag, AgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
def "_mac_e64" : MAIInst<OpName # "_mac", ProfileAGPR,
!if(!eq(node, null_frag), null_frag, AgprMAIFrag<node, ProfileAGPR.DstVT, HasAbid, Scaled>), Scaled>,
MFMATable<1, "AGPR", NAME # "_e64", NAME # "_mac_e64">;

let OtherPredicates = [isGFX90APlus] in
def _mac_vgprcd_e64 : MAIInst<OpName # "_mac_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"),
def _mac_vgprcd_e64 : MAIInst<OpName # "_mac_vgprcd", ProfileVGPR,
!if(!eq(node, null_frag), null_frag, VgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
MFMATable<1, "VGPR", NAME # "_vgprcd_e64", NAME # "_mac_e64">;
}
Expand All @@ -1074,11 +1070,11 @@ multiclass ScaledMAIInst_mc<string OpName, string UnscaledOpName_, SDPatternOper
defvar UnscaledOpName = UnscaledOpName_#VariantSuffix;

defvar HasAbid = false;

defvar NoDstOverlap = !cast<VOPProfileMAI>(!cast<MAIInst>(UnscaledOpName#"_e64").Pfl).NoDstOverlap;
defvar Profile = !cast<VOPProfileMAI>(!cast<MAIInst>(UnscaledOpName#"_e64").Pfl);
defvar NoDstOverlap = Profile.NoDstOverlap;

def _e64 : ScaledMAIInst<OpName,
!cast<MAIInst>(UnscaledOpName#"_e64"), !if(NoDstOverlap, null_frag, AgprMAIFrag<node, HasAbid, true>)>,
!cast<MAIInst>(UnscaledOpName#"_e64"), !if(NoDstOverlap, null_frag, AgprMAIFrag<node, Profile.DstVT, HasAbid, true>)>,
MFMATable<0, "AGPR", NAME # "_e64">;

def _vgprcd_e64 : ScaledMAIInst<OpName # "_vgprcd",
Expand All @@ -1090,7 +1086,7 @@ multiclass ScaledMAIInst_mc<string OpName, string UnscaledOpName_, SDPatternOper
isConvertibleToThreeAddress = NoDstOverlap,
Mnemonic = UnscaledOpName_ in {
def _mac_e64 : ScaledMAIInst<OpName # "_mac",
!cast<MAIInst>(UnscaledOpName # "_mac_e64"), AgprMAIFrag<node, HasAbid, true>>,
!cast<MAIInst>(UnscaledOpName # "_mac_e64"), AgprMAIFrag<node, Profile.DstVT, HasAbid, true>>,
MFMATable<1, "AGPR", NAME # "_e64">;

def _mac_vgprcd_e64 : ScaledMAIInst<OpName # " _mac_vgprcd",
Expand Down
Loading