1414// / MFMA opcode.
1515// /
1616// / TODO:
17- // / - Handle SplitKit partial copy bundles, and not just full copy instructions
18- // /
1917// / - Update LiveIntervals incrementally instead of recomputing from scratch
2018// /
2119// ===----------------------------------------------------------------------===//
@@ -37,6 +35,7 @@ using namespace llvm;
3735namespace {
3836
3937class AMDGPURewriteAGPRCopyMFMAImpl {
38+ MachineFunction &MF;
4039 const GCNSubtarget &ST;
4140 const SIInstrInfo &TII;
4241 const SIRegisterInfo &TRI;
@@ -53,7 +52,7 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
5352 AMDGPURewriteAGPRCopyMFMAImpl (MachineFunction &MF, VirtRegMap &VRM,
5453 LiveRegMatrix &LRM, LiveIntervals &LIS,
5554 const RegisterClassInfo &RegClassInfo)
56- : ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
55+ : MF(MF), ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
5756 TRI (*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
5857 LIS(LIS), RegClassInfo(RegClassInfo) {}
5958
@@ -71,26 +70,26 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
7170 // /
7271 // / \p RewriteRegs will accumulate the set of register used by those MFMAs
7372 // / that need to have the register classes adjusted.
74- const TargetRegisterClass *recomputeRegClassExceptRewritable (
75- Register Reg, const TargetRegisterClass *OldRC,
76- const TargetRegisterClass *NewRC,
77- SmallVectorImpl<MachineInstr *> &RewriteCandidates,
73+ bool recomputeRegClassExceptRewritable (
74+ Register Reg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
7875 SmallSetVector<Register, 4 > &RewriteRegs) const ;
7976
8077 bool run (MachineFunction &MF) const ;
8178};
8279
83- const TargetRegisterClass *
84- AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable (
85- Register StartReg, const TargetRegisterClass *OldRC,
86- const TargetRegisterClass *NewRC,
87- SmallVectorImpl<MachineInstr *> &RewriteCandidates,
80+ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable (
81+ Register StartReg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
8882 SmallSetVector<Register, 4 > &RewriteRegs) const {
8983 SmallVector<Register, 8 > Worklist = {StartReg};
9084
9185 // Recursively visit all transitive MFMA users
9286 while (!Worklist.empty ()) {
9387 Register Reg = Worklist.pop_back_val ();
88+ const TargetRegisterClass *OldRC = MRI.getRegClass (Reg);
89+
90+ // Inflate to the equivalent AV_* class.
91+ const TargetRegisterClass *NewRC = TRI.getLargestLegalSuperClass (OldRC, MF);
92+
9493 // Accumulate constraints from all uses.
9594 for (MachineOperand &MO : MRI.reg_nodbg_operands (Reg)) {
9695 // Apply the effect of the given operand to NewRC.
@@ -101,23 +100,40 @@ AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
101100 // either AGPR or VGPR in src0/src1, so don't bother checking the
102101 // constraint effects of the individual operands.
103102 if (isRewriteCandidate (*MI)) {
104- for (AMDGPU::OpName OpName :
105- {AMDGPU::OpName::vdst, AMDGPU::OpName::src2}) {
106- const MachineOperand *Op = TII.getNamedOperand (*MI, OpName);
103+ const MachineOperand *VDst =
104+ TII.getNamedOperand (*MI, AMDGPU::OpName::vdst);
105+ const MachineOperand *Src2 =
106+ TII.getNamedOperand (*MI, AMDGPU::OpName::src2);
107+ for (const MachineOperand *Op : {VDst, Src2}) {
107108 if (!Op->isReg ())
108109 continue ;
109110
110111 Register OtherReg = Op->getReg ();
111- if (OtherReg != Reg) {
112- if (RewriteRegs.insert (OtherReg))
113- Worklist.push_back (OtherReg);
114- }
112+ if (OtherReg.isPhysical ())
113+ return false ;
114+
115+ if (OtherReg != Reg && RewriteRegs.insert (OtherReg))
116+ Worklist.push_back (OtherReg);
115117 }
116118
117- LLVM_DEBUG (dbgs () << " Ignoring effects of " << *MI);
119+ if (!is_contained (RewriteCandidates, MI)) {
120+ LLVM_DEBUG ({
121+ Register VDstPhysReg = VRM.getPhys (VDst->getReg ());
122+ dbgs () << " Attempting to replace VGPR MFMA with AGPR version:"
123+ << " Dst=[" << printReg (VDst->getReg ()) << " => "
124+ << printReg (VDstPhysReg, &TRI);
125+
126+ if (Src2->isReg ()) {
127+ Register Src2PhysReg = VRM.getPhys (Src2->getReg ());
128+ dbgs () << " , Src2=[" << printReg (Src2->getReg (), &TRI) << " => "
129+ << printReg (Src2PhysReg, &TRI);
130+ }
131+
132+ dbgs () << " ]: " << MI;
133+ });
118134
119- if (!is_contained (RewriteCandidates, MI))
120135 RewriteCandidates.push_back (MI);
136+ }
121137
122138 continue ;
123139 }
@@ -126,13 +142,14 @@ AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
126142 NewRC = MI->getRegClassConstraintEffect (OpNo, NewRC, &TII, &TRI);
127143 if (!NewRC || NewRC == OldRC) {
128144 LLVM_DEBUG (dbgs () << " User of " << printReg (Reg, &TRI)
129- << " cannot be reassigned to AGPR: " << *MI);
130- return nullptr ;
145+ << " cannot be reassigned to "
146+ << TRI.getRegClassName (NewRC) << " : " << *MI);
147+ return false ;
131148 }
132149 }
133150 }
134151
135- return NewRC ;
152+ return true ;
136153}
137154
138155// / Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a
@@ -225,10 +242,7 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
225242
226243 for (VNInfo *VNI : LI.vnis ()) {
227244 MachineInstr *DefMI = LIS.getInstructionFromIndex (VNI->def );
228-
229- // TODO: Handle SplitKit produced copy bundles for partially defined
230- // registers.
231- if (!DefMI || !DefMI->isFullCopy ())
245+ if (!DefMI || !DefMI->isCopy ())
232246 continue ;
233247
234248 Register MFMADstReg = DefMI->getOperand (1 ).getReg ();
@@ -241,34 +255,6 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
241255 if (!MFMA || !isRewriteCandidate (*MFMA))
242256 continue ;
243257
244- MachineOperand *Src2 = TII.getNamedOperand (*MFMA, AMDGPU::OpName::src2);
245- Register Src2Reg;
246- if (Src2->isReg ()) {
247- Src2Reg = Src2->getReg ();
248- if (!Src2Reg.isVirtual ())
249- continue ;
250- }
251-
252- // FIXME: getMinimalPhysRegClass returns a nonsense AV_* subclass instead
253- // of an AGPR or VGPR subclass, so we can't simply use the result on the
254- // assignment.
255-
256- LLVM_DEBUG ({
257- dbgs () << " Attempting to replace VGPR MFMA with AGPR version:"
258- << " Dst=[" << printReg (VReg) << " => "
259- << printReg (PhysReg, &TRI);
260-
261- if (Src2Reg) {
262- Register Src2PhysReg = VRM.getPhys (Src2Reg);
263- dbgs () << " , Src2=[" << printReg (Src2Reg, &TRI) << " => "
264- << printReg (Src2PhysReg, &TRI);
265- }
266-
267- dbgs () << " ]: " << *MFMA;
268- });
269-
270- const TargetRegisterClass *DstVirtRegRC = MRI.getRegClass (MFMADstReg);
271-
272258 // src2 and dst have the same physical class constraint; try to preserve
273259 // the original src2 subclass if one were to exist.
274260 SmallVector<MachineInstr *, 4 > RewriteCandidates = {MFMA};
@@ -287,11 +273,9 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
287273 //
288274 // Note recomputeRegClassExceptRewritable will consider the constraints of
289275 // this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
290- const TargetRegisterClass *DstExceptRC =
291- recomputeRegClassExceptRewritable (MFMADstReg, DstVirtRegRC, VirtRegRC,
292- RewriteCandidates, RewriteRegs);
293- if (!DstExceptRC) {
294- LLVM_DEBUG (dbgs () << " Could not recompute the regclass of "
276+ if (!recomputeRegClassExceptRewritable (MFMADstReg, RewriteCandidates,
277+ RewriteRegs)) {
278+ LLVM_DEBUG (dbgs () << " Could not recompute the regclass of dst reg "
295279 << printReg (MFMADstReg, &TRI) << ' \n ' );
296280 continue ;
297281 }
0 commit comments