@@ -29,6 +29,24 @@ enum { MAX_LANES = 64 };
2929
3030using namespace llvm ;
3131
32+ namespace {
33+ enum MFMARegClass {
34+ Unspecified,
35+ VGPR,
36+ AGPR,
37+ };
38+ }
39+
40+ cl::opt<MFMARegClass>
41+ MFMAForm (" amdgpu-mfma-form" , cl::Hidden,
42+ cl::desc (" Register class to use for Opc and Dest of MFMA. If "
43+ " unspecified, default to compiler heuristics" ),
44+ cl::init(MFMARegClass::Unspecified),
45+ cl::values(clEnumValN(MFMARegClass::VGPR, " vgpr" ,
46+ " Use the VGPR MFMA form." ),
47+ clEnumValN(MFMARegClass::AGPR, " agpr" ,
48+ " Use the VGPR MFMA form." )));
49+
3250const GCNTargetMachine &getTM (const GCNSubtarget *STI) {
3351 const SITargetLowering *TLI = STI->getTargetLowering ();
3452 return static_cast <const GCNTargetMachine &>(TLI->getTargetMachine ());
@@ -70,11 +88,14 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
7088 }
7189
7290 MayNeedAGPRs = ST.hasMAIInsts ();
73- if (ST.hasGFX90AInsts () &&
91+ if (MFMAForm == MFMARegClass::Unspecified && ST.hasGFX90AInsts () &&
7492 ST.getMaxNumVGPRs (F) <= AMDGPU::VGPR_32RegClass.getNumRegs () &&
7593 !mayUseAGPRs (F))
7694 MayNeedAGPRs = false ; // We will select all MAI with VGPR operands.
7795
96+ else if (MFMAForm != MFMARegClass::Unspecified)
97+ MayNeedAGPRs = MFMAForm == MFMARegClass::AGPR;
98+
7899 if (AMDGPU::isChainCC (CC)) {
79100 // Chain functions don't receive an SP from their caller, but are free to
80101 // set one up. For now, we can use s32 to match what amdgpu_gfx functions
0 commit comments