1414// / MFMA opcode.
1515// /
1616// / TODO:
17- // / - Handle SplitKit partial copy bundles, and not just full copy instructions
18- // /
1917// / - Update LiveIntervals incrementally instead of recomputing from scratch
2018// /
2119// ===----------------------------------------------------------------------===//
@@ -37,6 +35,7 @@ using namespace llvm;
3735namespace {
3836
3937class AMDGPURewriteAGPRCopyMFMAImpl {
38+ MachineFunction &MF;
4039 const GCNSubtarget &ST;
4140 const SIInstrInfo &TII;
4241 const SIRegisterInfo &TRI;
@@ -53,7 +52,7 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
5352 AMDGPURewriteAGPRCopyMFMAImpl (MachineFunction &MF, VirtRegMap &VRM,
5453 LiveRegMatrix &LRM, LiveIntervals &LIS,
5554 const RegisterClassInfo &RegClassInfo)
56- : ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
55+ : MF(MF), ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
5756 TRI (*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
5857 LIS(LIS), RegClassInfo(RegClassInfo) {}
5958
@@ -71,26 +70,28 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
7170 // /
7271 // / \p RewriteRegs will accumulate the set of register used by those MFMAs
7372 // / that need to have the register classes adjusted.
74- const TargetRegisterClass *recomputeRegClassExceptRewritable (
75- Register Reg, const TargetRegisterClass *OldRC,
76- const TargetRegisterClass *NewRC,
77- SmallVectorImpl<MachineInstr *> &RewriteCandidates,
73+ bool recomputeRegClassExceptRewritable (
74+ Register Reg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
7875 SmallSetVector<Register, 4 > &RewriteRegs) const ;
7976
8077 bool run (MachineFunction &MF) const ;
8178};
8279
83- const TargetRegisterClass *
84- AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable (
85- Register StartReg, const TargetRegisterClass *OldRC,
86- const TargetRegisterClass *NewRC,
87- SmallVectorImpl<MachineInstr *> &RewriteCandidates,
80+ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable (
81+ Register StartReg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
8882 SmallSetVector<Register, 4 > &RewriteRegs) const {
8983 SmallVector<Register, 8 > Worklist = {StartReg};
9084
9185 // Recursively visit all transitive MFMA users
9286 while (!Worklist.empty ()) {
9387 Register Reg = Worklist.pop_back_val ();
88+ const TargetRegisterClass *OldRC = MRI.getRegClass (Reg);
89+
90+ // Inflate to the equivalent AV_* class.
91+ const TargetRegisterClass *NewRC = TRI.getLargestLegalSuperClass (OldRC, MF);
92+ if (OldRC == NewRC)
93+ return false ;
94+
9495 // Accumulate constraints from all uses.
9596 for (MachineOperand &MO : MRI.reg_nodbg_operands (Reg)) {
9697 // Apply the effect of the given operand to NewRC.
@@ -101,23 +102,40 @@ AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
101102 // either AGPR or VGPR in src0/src1, so don't bother checking the
102103 // constraint effects of the individual operands.
103104 if (isRewriteCandidate (*MI)) {
104- for (AMDGPU::OpName OpName :
105- {AMDGPU::OpName::vdst, AMDGPU::OpName::src2}) {
106- const MachineOperand *Op = TII.getNamedOperand (*MI, OpName);
105+ const MachineOperand *VDst =
106+ TII.getNamedOperand (*MI, AMDGPU::OpName::vdst);
107+ const MachineOperand *Src2 =
108+ TII.getNamedOperand (*MI, AMDGPU::OpName::src2);
109+ for (const MachineOperand *Op : {VDst, Src2}) {
107110 if (!Op->isReg ())
108111 continue ;
109112
110113 Register OtherReg = Op->getReg ();
111- if (OtherReg != Reg) {
112- if (RewriteRegs.insert (OtherReg))
113- Worklist.push_back (OtherReg);
114- }
114+ if (OtherReg.isPhysical ())
115+ return false ;
116+
117+ if (OtherReg != Reg && RewriteRegs.insert (OtherReg))
118+ Worklist.push_back (OtherReg);
115119 }
116120
117- LLVM_DEBUG (dbgs () << " Ignoring effects of " << *MI);
121+ if (!is_contained (RewriteCandidates, MI)) {
122+ LLVM_DEBUG ({
123+ Register VDstPhysReg = VRM.getPhys (VDst->getReg ());
124+ dbgs () << " Attempting to replace VGPR MFMA with AGPR version:"
125+ << " Dst=[" << printReg (VDst->getReg ()) << " => "
126+ << printReg (VDstPhysReg, &TRI);
127+
128+ if (Src2->isReg ()) {
129+ Register Src2PhysReg = VRM.getPhys (Src2->getReg ());
130+ dbgs () << " ], Src2=[" << printReg (Src2->getReg (), &TRI) << " => "
131+ << printReg (Src2PhysReg, &TRI);
132+ }
133+
134+ dbgs () << " ]: " << MI;
135+ });
118136
119- if (!is_contained (RewriteCandidates, MI))
120137 RewriteCandidates.push_back (MI);
138+ }
121139
122140 continue ;
123141 }
@@ -126,13 +144,14 @@ AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
126144 NewRC = MI->getRegClassConstraintEffect (OpNo, NewRC, &TII, &TRI);
127145 if (!NewRC || NewRC == OldRC) {
128146 LLVM_DEBUG (dbgs () << " User of " << printReg (Reg, &TRI)
129- << " cannot be reassigned to AGPR: " << *MI);
130- return nullptr ;
147+ << " cannot be reassigned to "
148+ << TRI.getRegClassName (NewRC) << " : " << *MI);
149+ return false ;
131150 }
132151 }
133152 }
134153
135- return NewRC ;
154+ return true ;
136155}
137156
138157// / Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a
@@ -228,10 +247,7 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
228247 continue ;
229248
230249 MachineInstr *DefMI = LIS.getInstructionFromIndex (VNI->def );
231-
232- // TODO: Handle SplitKit produced copy bundles for partially defined
233- // registers.
234- if (!DefMI || !DefMI->isFullCopy ())
250+ if (!DefMI || !DefMI->isCopy ())
235251 continue ;
236252
237253 Register MFMADstReg = DefMI->getOperand (1 ).getReg ();
@@ -244,34 +260,6 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
244260 if (!MFMA || !isRewriteCandidate (*MFMA))
245261 continue ;
246262
247- MachineOperand *Src2 = TII.getNamedOperand (*MFMA, AMDGPU::OpName::src2);
248- Register Src2Reg;
249- if (Src2->isReg ()) {
250- Src2Reg = Src2->getReg ();
251- if (!Src2Reg.isVirtual ())
252- continue ;
253- }
254-
255- // FIXME: getMinimalPhysRegClass returns a nonsense AV_* subclass instead
256- // of an AGPR or VGPR subclass, so we can't simply use the result on the
257- // assignment.
258-
259- LLVM_DEBUG ({
260- dbgs () << " Attempting to replace VGPR MFMA with AGPR version:"
261- << " Dst=[" << printReg (VReg) << " => "
262- << printReg (PhysReg, &TRI);
263-
264- if (Src2Reg) {
265- Register Src2PhysReg = VRM.getPhys (Src2Reg);
266- dbgs () << " ], Src2=[" << printReg (Src2Reg, &TRI) << " => "
267- << printReg (Src2PhysReg, &TRI);
268- }
269-
270- dbgs () << " ]: " << *MFMA;
271- });
272-
273- const TargetRegisterClass *DstVirtRegRC = MRI.getRegClass (MFMADstReg);
274-
275263 // src2 and dst have the same physical class constraint; try to preserve
276264 // the original src2 subclass if one were to exist.
277265 SmallVector<MachineInstr *, 4 > RewriteCandidates = {MFMA};
@@ -290,11 +278,9 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
290278 //
291279 // Note recomputeRegClassExceptRewritable will consider the constraints of
292280 // this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
293- const TargetRegisterClass *DstExceptRC =
294- recomputeRegClassExceptRewritable (MFMADstReg, DstVirtRegRC, VirtRegRC,
295- RewriteCandidates, RewriteRegs);
296- if (!DstExceptRC) {
297- LLVM_DEBUG (dbgs () << " Could not recompute the regclass of "
281+ if (!recomputeRegClassExceptRewritable (MFMADstReg, RewriteCandidates,
282+ RewriteRegs)) {
283+ LLVM_DEBUG (dbgs () << " Could not recompute the regclass of dst reg "
298284 << printReg (MFMADstReg, &TRI) << ' \n ' );
299285 continue ;
300286 }
0 commit comments