@@ -57,27 +57,33 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
57
57
TRI (*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
58
58
LIS(LIS) {}
59
59
60
+ bool isRewriteCandidate (const MachineInstr &MI) const {
61
+ if (!TII.isMAI (MI))
62
+ return false ;
63
+ return AMDGPU::getMFMASrcCVDstAGPROp (MI.getOpcode ()) != -1 ;
64
+ }
65
+
60
66
// / Compute the register class constraints based on the uses of \p Reg,
61
67
// / excluding uses from \p ExceptMI. This should be nearly identical to
62
68
// / MachineRegisterInfo::recomputeRegClass.
63
69
const TargetRegisterClass *
64
- recomputeRegClassExcept (Register Reg, const TargetRegisterClass *OldRC ,
65
- const TargetRegisterClass *NewRC ,
66
- const MachineInstr *ExceptMI ) const ;
70
+ recomputeRegClassExceptRewritable (Register Reg,
71
+ const TargetRegisterClass *OldRC ,
72
+ const TargetRegisterClass *NewRC ) const ;
67
73
68
74
bool run (MachineFunction &MF) const ;
69
75
};
70
76
71
77
const TargetRegisterClass *
72
- AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExcept (
78
+ AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable (
73
79
Register Reg, const TargetRegisterClass *OldRC,
74
- const TargetRegisterClass *NewRC, const MachineInstr *ExceptMI ) const {
80
+ const TargetRegisterClass *NewRC) const {
75
81
76
82
// Accumulate constraints from all uses.
77
83
for (MachineOperand &MO : MRI.reg_nodbg_operands (Reg)) {
78
84
// Apply the effect of the given operand to NewRC.
79
85
MachineInstr *MI = MO.getParent ();
80
- if (MI == ExceptMI )
86
+ if (isRewriteCandidate (*MI) )
81
87
continue ;
82
88
83
89
unsigned OpNo = &MO - &MI->getOperand (0 );
@@ -182,10 +188,13 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
182
188
// first place, as well as need to assign another register, and need to
183
189
// figure out where to put them. The live range splitting is smarter than
184
190
// anything we're doing here, so trust it did something reasonable.
185
- const TargetRegisterClass *Src2ExceptRC = recomputeRegClassExcept (
186
- Src2->getReg (), Src2VirtRegRC, VirtRegRC, CopySrcMI);
187
- if (!Src2ExceptRC)
191
+ const TargetRegisterClass *Src2ExceptRC =
192
+ recomputeRegClassExceptRewritable (Src2->getReg (), Src2VirtRegRC,
193
+ VirtRegRC);
194
+ if (!Src2ExceptRC) {
195
+ LLVM_DEBUG (dbgs () << " Could not recompute the regclass\n " );
188
196
continue ;
197
+ }
189
198
190
199
const TargetRegisterClass *NewSrc2ConstraintRC =
191
200
TII.getRegClass (TII.get (AGPROp), Src2->getOperandNo (), &TRI, MF);
@@ -207,8 +216,19 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
207
216
208
217
CopySrcMI->setDesc (TII.get (AGPROp));
209
218
210
- // TODO: Is replacing too aggressive, fixup these instructions only?
211
- MRI.replaceRegWith (CopySrcReg, VReg);
219
+ // Perform replacement of the register, rewriting the rewritable uses.
220
+ for (MachineInstr &UseMI :
221
+ make_early_inc_range (MRI.reg_instructions (CopySrcReg))) {
222
+ if (TII.isMAI (UseMI)) {
223
+ // Note the register we need to rewrite may still appear in src0/src1,
224
+ // but that's fine since those can use A or V anyway.
225
+ int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp (UseMI.getOpcode ());
226
+ if (ReplacementOp != -1 )
227
+ UseMI.setDesc (TII.get (ReplacementOp));
228
+ }
229
+
230
+ UseMI.substituteRegister (CopySrcReg, VReg, AMDGPU::NoSubRegister, TRI);
231
+ }
212
232
213
233
LLVM_DEBUG (dbgs () << " Replaced VGPR MFMA with AGPR: " << *CopySrcMI);
214
234
0 commit comments