@@ -96,8 +96,8 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
9696
9797 // / Compute the register class constraints based on the uses of \p Reg,
9898 // / excluding MFMA uses from which can be rewritten to change the register
99- // / class constraint. This should be nearly identical to
100- // / MachineRegisterInfo::recomputeRegClass.
99+ // / class constraint. MFMA scale operands need to be constraint checked.
100+ // / This should be nearly identical to MachineRegisterInfo::recomputeRegClass.
101101
102102 // / \p RewriteCandidates will collect the set of MFMA instructions that need
103103 // / to have the opcode mutated to perform the replacement.
@@ -151,9 +151,26 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
151151
152152 // We can swap the classes of dst + src2 as a pair to AGPR, so ignore the
153153 // effects of rewrite candidates. It just so happens that we can use
154- // either AGPR or VGPR in src0/src1, so don't bother checking the
155- // constraint effects of the individual operands .
154+ // either AGPR or VGPR in src0/src1. We still need to check constraint
155+ // effects for scale variant, which does not allow AGPR .
156156 if (isRewriteCandidate (*MI)) {
157+
158+ int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp (MI->getOpcode ());
159+ MachineInstrBuilder TmpMIB =
160+ BuildMI (*MI->getParent (), MI->getIterator (), MI->getDebugLoc (),
161+ TII.get (AGPROp));
162+ for (const MachineOperand &TmpMO : MI->operands ())
163+ TmpMIB.add (TmpMO);
164+ MachineInstr *TmpMI = TmpMIB.getInstr ();
165+ unsigned OpNo = &MO - &MI->getOperand (0 );
166+ const TargetRegisterClass *EquivalentAGPRRegClass =
167+ TRI.getEquivalentAGPRClass (MRI.getRegClass (Reg));
168+ const TargetRegisterClass *Allowed = TmpMI->getRegClassConstraintEffect (
169+ OpNo, EquivalentAGPRRegClass, &TII, &TRI);
170+ TmpMI->eraseFromParent ();
171+ if (!Allowed || Allowed != EquivalentAGPRRegClass)
172+ return false ;
173+
157174 const MachineOperand *VDst =
158175 TII.getNamedOperand (*MI, AMDGPU::OpName::vdst);
159176 const MachineOperand *Src2 =
@@ -302,10 +319,6 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(
302319 const TargetRegisterClass *EquivalentAGPRRegClass =
303320 TRI.getEquivalentAGPRClass (MRI.getRegClass (InterferingReg));
304321
305- // Do not reassign scale operands
306- if (EquivalentAGPRRegClass == &AMDGPU::AGPR_32RegClass)
307- return false ;
308-
309322 MCPhysReg Assignable = AMDGPU::NoRegister;
310323 if (EquivalentAGPRRegClass->contains (PrefPhysReg) &&
311324 LRM.checkInterference (ReassignLI, PrefPhysReg) ==
0 commit comments