Skip to content

Commit 69def3b

Browse files
committed
WIP: AMDGPU: Handle multiple AGPR MFMA rewrites
I have this firing on one of the real examples, need to produce the tests and check a few edge cases
1 parent 7bbb65c commit 69def3b

File tree

1 file changed

+31
-11
lines changed

1 file changed

+31
-11
lines changed

llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -57,27 +57,33 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
5757
TRI(*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
5858
LIS(LIS) {}
5959

60+
bool isRewriteCandidate(const MachineInstr &MI) const {
61+
if (!TII.isMAI(MI))
62+
return false;
63+
return AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()) != -1;
64+
}
65+
6066
/// Compute the register class constraints based on the uses of \p Reg,
6167
/// excluding uses from \p ExceptMI. This should be nearly identical to
6268
/// MachineRegisterInfo::recomputeRegClass.
6369
const TargetRegisterClass *
64-
recomputeRegClassExcept(Register Reg, const TargetRegisterClass *OldRC,
65-
const TargetRegisterClass *NewRC,
66-
const MachineInstr *ExceptMI) const;
70+
recomputeRegClassExceptRewritable(Register Reg,
71+
const TargetRegisterClass *OldRC,
72+
const TargetRegisterClass *NewRC) const;
6773

6874
bool run(MachineFunction &MF) const;
6975
};
7076

7177
const TargetRegisterClass *
72-
AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExcept(
78+
AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
7379
Register Reg, const TargetRegisterClass *OldRC,
74-
const TargetRegisterClass *NewRC, const MachineInstr *ExceptMI) const {
80+
const TargetRegisterClass *NewRC) const {
7581

7682
// Accumulate constraints from all uses.
7783
for (MachineOperand &MO : MRI.reg_nodbg_operands(Reg)) {
7884
// Apply the effect of the given operand to NewRC.
7985
MachineInstr *MI = MO.getParent();
80-
if (MI == ExceptMI)
86+
if (isRewriteCandidate(*MI))
8187
continue;
8288

8389
unsigned OpNo = &MO - &MI->getOperand(0);
@@ -182,10 +188,13 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
182188
// first place, as well as need to assign another register, and need to
183189
// figure out where to put them. The live range splitting is smarter than
184190
// anything we're doing here, so trust it did something reasonable.
185-
const TargetRegisterClass *Src2ExceptRC = recomputeRegClassExcept(
186-
Src2->getReg(), Src2VirtRegRC, VirtRegRC, CopySrcMI);
187-
if (!Src2ExceptRC)
191+
const TargetRegisterClass *Src2ExceptRC =
192+
recomputeRegClassExceptRewritable(Src2->getReg(), Src2VirtRegRC,
193+
VirtRegRC);
194+
if (!Src2ExceptRC) {
195+
LLVM_DEBUG(dbgs() << "Could not recompute the regclass\n");
188196
continue;
197+
}
189198

190199
const TargetRegisterClass *NewSrc2ConstraintRC =
191200
TII.getRegClass(TII.get(AGPROp), Src2->getOperandNo(), &TRI, MF);
@@ -207,8 +216,19 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
207216

208217
CopySrcMI->setDesc(TII.get(AGPROp));
209218

210-
// TODO: Is replacing too aggressive, fixup these instructions only?
211-
MRI.replaceRegWith(CopySrcReg, VReg);
219+
// Perform replacement of the register, rewriting the rewritable uses.
220+
for (MachineInstr &UseMI :
221+
make_early_inc_range(MRI.reg_instructions(CopySrcReg))) {
222+
if (TII.isMAI(UseMI)) {
223+
// Note the register we need to rewrite may still appear in src0/src1,
224+
// but that's fine since those can use A or V anyway.
225+
int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(UseMI.getOpcode());
226+
if (ReplacementOp != -1)
227+
UseMI.setDesc(TII.get(ReplacementOp));
228+
}
229+
230+
UseMI.substituteRegister(CopySrcReg, VReg, AMDGPU::NoSubRegister, TRI);
231+
}
212232

213233
LLVM_DEBUG(dbgs() << "Replaced VGPR MFMA with AGPR: " << *CopySrcMI);
214234

0 commit comments

Comments
 (0)