Skip to content

Commit aec8ea7

Browse files
committed
Use binary flag
Change-Id: Ic76c5f834352c2d0fc893332733cbb6f2382f2f7
1 parent 554f486 commit aec8ea7

File tree

2 files changed

+1960
-51
lines changed

2 files changed

+1960
-51
lines changed

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -29,23 +29,11 @@ enum { MAX_LANES = 64 };
2929

3030
using namespace llvm;
3131

32-
namespace {
33-
enum MFMARegClass {
34-
Unspecified,
35-
VGPR,
36-
AGPR,
37-
};
38-
}
39-
40-
cl::opt<MFMARegClass>
41-
MFMAForm("amdgpu-mfma-form", cl::Hidden,
42-
cl::desc("Register class to use for Opc and Dest of MFMA. If "
32+
cl::opt<bool>
33+
MFMAVGPRForm("amdgpu-mfma-vgpr-form", cl::Hidden,
34+
cl::desc("Whether to force use VGPR for Opc and Dest of MFMA. If "
4335
"unspecified, default to compiler heuristics"),
44-
cl::init(MFMARegClass::Unspecified),
45-
cl::values(clEnumValN(MFMARegClass::VGPR, "vgpr",
46-
"Use the VGPR MFMA form."),
47-
clEnumValN(MFMARegClass::AGPR, "agpr",
48-
"Use the VGPR MFMA form.")));
36+
cl::init(false));
4937

5038
const GCNTargetMachine &getTM(const GCNSubtarget *STI) {
5139
const SITargetLowering *TLI = STI->getTargetLowering();
@@ -87,15 +75,12 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
8775
PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
8876
}
8977

90-
MayNeedAGPRs = ST.hasMAIInsts();
91-
if (MFMAForm == MFMARegClass::Unspecified && ST.hasGFX90AInsts() &&
78+
MayNeedAGPRs = ST.hasMAIInsts() & !MFMAVGPRForm;
79+
if (!MFMAVGPRForm && ST.hasGFX90AInsts() &&
9280
ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
9381
!mayUseAGPRs(F))
9482
MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
9583

96-
else if (MFMAForm != MFMARegClass::Unspecified)
97-
MayNeedAGPRs = MFMAForm == MFMARegClass::AGPR;
98-
9984
if (AMDGPU::isChainCC(CC)) {
10085
// Chain functions don't receive an SP from their caller, but are free to
10186
// set one up. For now, we can use s32 to match what amdgpu_gfx functions

0 commit comments

Comments
 (0)