Skip to content

Commit bbf558d

Browse files
arsenmnzaghen
authored andcommitted
WIP: AMDGPU: Always select the VGPR version of MFMAs
We do not want to use AGPRs unless absolutely required due to register pressure. Rely on a post-regalloc pass to replace VGPR MFMAs with the AGPR version if it avoids the copies introduced due to live range splitting.
1 parent 6c3d62a commit bbf558d

File tree

5 files changed

+35
-66
lines changed

5 files changed

+35
-66
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4945,31 +4945,29 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
49454945
// for srcA/srcB?
49464946
//
49474947
// vdst, srcA, srcB, srcC
4948-
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
49494948
OpdsMapping[0] =
4950-
Info->mayNeedAGPRs()
4949+
!Subtarget.hasGFX90AInsts()
49514950
? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
49524951
: getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
49534952
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
49544953
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
49554954
OpdsMapping[4] =
4956-
Info->mayNeedAGPRs()
4955+
!Subtarget.hasGFX90AInsts()
49574956
? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
49584957
: getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
49594958
break;
49604959
}
49614960
case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
49624961
case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
4963-
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
49644962
OpdsMapping[0] =
4965-
Info->mayNeedAGPRs()
4963+
!Subtarget.hasGFX90AInsts()
49664964
? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
49674965
: getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
49684966

49694967
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
49704968
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
49714969
OpdsMapping[4] =
4972-
Info->mayNeedAGPRs()
4970+
!Subtarget.hasGFX90AInsts()
49734971
? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
49744972
: getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
49754973

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16615,7 +16615,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
1661516615

1661616616
MachineFunction *MF = MI.getParent()->getParent();
1661716617
MachineRegisterInfo &MRI = MF->getRegInfo();
16618-
SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
1661916618

1662016619
if (TII->isVOP3(MI.getOpcode())) {
1662116620
// Make sure constant bus requirements are respected.
@@ -16626,15 +16625,14 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
1662616625
// use between vgpr and agpr as agpr tuples tend to be big.
1662716626
if (!MI.getDesc().operands().empty()) {
1662816627
unsigned Opc = MI.getOpcode();
16629-
bool HasAGPRs = Info->mayNeedAGPRs();
1663016628
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
1663116629
int16_t Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1663216630
for (auto I :
1663316631
{AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
1663416632
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), Src2Idx}) {
1663516633
if (I == -1)
1663616634
break;
16637-
if ((I == Src2Idx) && (HasAGPRs))
16635+
if (I == Src2Idx)
1663816636
break;
1663916637
MachineOperand &Op = MI.getOperand(I);
1664016638
if (!Op.isReg() || !Op.getReg().isVirtual())
@@ -16668,22 +16666,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
1666816666
TII->legalizeOpWithMove(MI, Src1Idx);
1666916667
}
1667016668
}
16671-
16672-
if (!HasAGPRs)
16673-
return;
16674-
16675-
// Resolve the rest of AV operands to AGPRs.
16676-
if (auto *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2)) {
16677-
if (Src2->isReg() && Src2->getReg().isVirtual()) {
16678-
auto *RC = TRI->getRegClassForReg(MRI, Src2->getReg());
16679-
if (TRI->isVectorSuperClass(RC)) {
16680-
auto *NewRC = TRI->getEquivalentAGPRClass(RC);
16681-
MRI.setRegClass(Src2->getReg(), NewRC);
16682-
if (Src2->isTied())
16683-
MRI.setRegClass(MI.getOperand(0).getReg(), NewRC);
16684-
}
16685-
}
16686-
}
1668716669
}
1668816670

1668916671
return;

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -81,16 +81,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
8181
PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
8282
}
8383

84-
MayNeedAGPRs = ST.hasMAIInsts();
85-
if (ST.hasGFX90AInsts()) {
86-
// FIXME: MayNeedAGPRs is a misnomer for how this is used. MFMA selection
87-
// should be separated from availability of AGPRs
88-
if (MFMAVGPRForm ||
89-
(ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
90-
!mayUseAGPRs(F)))
91-
MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
92-
}
93-
9484
if (AMDGPU::isChainCC(CC)) {
9585
// Chain functions don't receive an SP from their caller, but are free to
9686
// set one up. For now, we can use s32 to match what amdgpu_gfx functions

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -499,8 +499,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
499499
// user arguments. This is an offset from the KernargSegmentPtr.
500500
bool ImplicitArgPtr : 1;
501501

502-
bool MayNeedAGPRs : 1;
503-
504502
// The hard-wired high half of the address of the global information table
505503
// for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
506504
// current hardware only allows a 16 bit value.
@@ -1178,10 +1176,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
11781176

11791177
unsigned getMaxMemoryClusterDWords() const { return MaxMemoryClusterDWords; }
11801178

1181-
bool mayNeedAGPRs() const {
1182-
return MayNeedAGPRs;
1183-
}
1184-
11851179
// \returns true if a function has a use of AGPRs via inline asm or
11861180
// has a call which may use it.
11871181
bool mayUseAGPRs(const Function &F) const;

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -932,17 +932,11 @@ defvar MayNotNeedAGPRs_gisel = [{
932932
return !MF.getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs();
933933
}];
934934

935-
class AgprMAIFrag<SDPatternOperator Op, bit HasAbid = true,
936-
bit Scaled = false> :
937-
MAIFrag<Op, MayNeedAGPRs, HasAbid, Scaled> {
938-
let GISelPredicateCode = MayNeedAGPRs_gisel;
939-
}
935+
class AgprMAIFrag<SDPatternOperator Op, bit HasAbid = true, bit Scaled = false>
936+
: MAIFrag<Op, [{}], HasAbid, Scaled> {}
940937

941-
class VgprMAIFrag<SDPatternOperator Op, bit HasAbid = true,
942-
bit Scaled = false> :
943-
MAIFrag<Op, MayNotNeedAGPRs, HasAbid, Scaled> {
944-
let GISelPredicateCode = MayNotNeedAGPRs_gisel;
945-
}
938+
class VgprMAIFrag<SDPatternOperator Op, bit HasAbid = true, bit Scaled = false>
939+
: MAIFrag<Op, [{}], HasAbid, Scaled> {}
946940

947941
let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
948942
defm V_ACCVGPR_READ_B32 : VOP3Inst<"v_accvgpr_read_b32", VOPProfileAccRead>;
@@ -993,10 +987,14 @@ multiclass MAIInst<string OpName, string P, SDPatternOperator node = null_frag,
993987
!if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, AgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
994988
MFMATable<0, "AGPR", NAME # "_e64">;
995989

996-
let OtherPredicates = [isGFX90APlus], Mnemonic = OpName in
997-
def _vgprcd_e64 : MAIInst<OpName # "_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"),
998-
!if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, VgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
999-
MFMATable<0, "VGPR", NAME # "_vgprcd_e64", NAME # "_e64">;
990+
let OtherPredicates = [isGFX90APlus], Mnemonic = OpName,
991+
AddedComplexity = 10 in def _vgprcd_e64
992+
: MAIInst<OpName#"_vgprcd",
993+
!cast<VOPProfileMAI>("VOPProfileMAI_"#P#"_VCD"),
994+
!if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag,
995+
VgprMAIFrag<node, HasAbid, Scaled>),
996+
Scaled>,
997+
MFMATable<0, "VGPR", NAME#"_vgprcd_e64", NAME#"_e64">;
1000998
}
1001999

10021000
if NoDstOverlap then {
@@ -1007,16 +1005,22 @@ multiclass MAIInst<string OpName, string P, SDPatternOperator node = null_frag,
10071005
!if(!eq(node, null_frag), null_frag, AgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
10081006
MFMATable<1, "AGPR", NAME # "_e64", NAME # "_mac_e64">;
10091007

1010-
let OtherPredicates = [isGFX90APlus] in
1011-
def _mac_vgprcd_e64 : MAIInst<OpName # "_mac_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"),
1012-
!if(!eq(node, null_frag), null_frag, VgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
1013-
MFMATable<1, "VGPR", NAME # "_vgprcd_e64", NAME # "_mac_e64">;
1008+
let OtherPredicates = [isGFX90APlus],
1009+
AddedComplexity = 10 in def _mac_vgprcd_e64
1010+
: MAIInst<OpName#"_mac_vgprcd",
1011+
!cast<VOPProfileMAI>("VOPProfileMAI_"#P#"_VCD"),
1012+
!if(!eq(node, null_frag), null_frag,
1013+
VgprMAIFrag<node, HasAbid, Scaled>),
1014+
Scaled>,
1015+
MFMATable<1, "VGPR", NAME#"_vgprcd_e64", NAME#"_mac_e64">;
10141016
}
10151017
}
10161018
} // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1
10171019
}
10181020

1019-
// Provide a wrapper around MAIInst that provides the appended operands from V_MFMA_LD_SCALE_B32
1021+
// Provide a wrapper around MAIInst that provides the appended operands from
1022+
// V_MFMA_LD_SCALE_B32 AGPR variants are never selected; VGPR is selected and
1023+
// may later be rewritten to AGPR.
10201024
multiclass ScaledMAIInst_mc<string OpName, string UnscaledOpName_, SDPatternOperator node> {
10211025
defvar VariantSuffix = !subst(!toupper(OpName), "", NAME); // Drop the main opcode name prefix to get the "_fN_fM" suffix.
10221026
defvar UnscaledOpName = UnscaledOpName_#VariantSuffix;
@@ -1025,9 +1029,9 @@ multiclass ScaledMAIInst_mc<string OpName, string UnscaledOpName_, SDPatternOper
10251029

10261030
defvar NoDstOverlap = !cast<VOPProfileMAI>(!cast<MAIInst>(UnscaledOpName#"_e64").Pfl).NoDstOverlap;
10271031

1028-
def _e64 : ScaledMAIInst<OpName,
1029-
!cast<MAIInst>(UnscaledOpName#"_e64"), !if(NoDstOverlap, null_frag, AgprMAIFrag<node, HasAbid, true>)>,
1030-
MFMATable<0, "AGPR", NAME # "_e64">;
1032+
def _e64
1033+
: ScaledMAIInst<OpName, !cast<MAIInst>(UnscaledOpName#"_e64"), null_frag>,
1034+
MFMATable<0, "AGPR", NAME#"_e64">;
10311035

10321036
def _vgprcd_e64 : ScaledMAIInst<OpName # "_vgprcd",
10331037
!cast<MAIInst>(UnscaledOpName#"_vgprcd_e64"), !if(NoDstOverlap, null_frag, VgprMAIFrag<node, HasAbid, true>)>,
@@ -1037,9 +1041,10 @@ multiclass ScaledMAIInst_mc<string OpName, string UnscaledOpName_, SDPatternOper
10371041
let Constraints = !if(NoDstOverlap, "$vdst = $src2", ""),
10381042
isConvertibleToThreeAddress = NoDstOverlap,
10391043
Mnemonic = UnscaledOpName_ in {
1040-
def _mac_e64 : ScaledMAIInst<OpName # "_mac",
1041-
!cast<MAIInst>(UnscaledOpName # "_mac_e64"), AgprMAIFrag<node, HasAbid, true>>,
1042-
MFMATable<1, "AGPR", NAME # "_e64">;
1044+
def _mac_e64
1045+
: ScaledMAIInst<OpName#"_mac",
1046+
!cast<MAIInst>(UnscaledOpName#"_mac_e64"), null_frag>,
1047+
MFMATable<1, "AGPR", NAME#"_e64">;
10431048

10441049
def _mac_vgprcd_e64 : ScaledMAIInst<OpName # " _mac_vgprcd",
10451050
!cast<MAIInst>(UnscaledOpName # "_mac_vgprcd_e64"), VgprMAIFrag<node, HasAbid, true>>,

0 commit comments

Comments
 (0)