14
14
// / MFMA opcode.
15
15
// /
16
16
// / TODO:
17
- // / - Handle SplitKit partial copy bundles, and not just full copy instructions
18
- // /
19
17
// / - Update LiveIntervals incrementally instead of recomputing from scratch
20
18
// /
21
19
// ===----------------------------------------------------------------------===//
@@ -37,6 +35,7 @@ using namespace llvm;
37
35
namespace {
38
36
39
37
class AMDGPURewriteAGPRCopyMFMAImpl {
38
+ MachineFunction &MF;
40
39
const GCNSubtarget &ST;
41
40
const SIInstrInfo &TII;
42
41
const SIRegisterInfo &TRI;
@@ -53,7 +52,7 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
53
52
AMDGPURewriteAGPRCopyMFMAImpl (MachineFunction &MF, VirtRegMap &VRM,
54
53
LiveRegMatrix &LRM, LiveIntervals &LIS,
55
54
const RegisterClassInfo &RegClassInfo)
56
- : ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
55
+ : MF(MF), ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
57
56
TRI (*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
58
57
LIS(LIS), RegClassInfo(RegClassInfo) {}
59
58
@@ -71,26 +70,28 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
71
70
// /
72
71
// / \p RewriteRegs will accumulate the set of register used by those MFMAs
73
72
// / that need to have the register classes adjusted.
74
- const TargetRegisterClass *recomputeRegClassExceptRewritable (
75
- Register Reg, const TargetRegisterClass *OldRC,
76
- const TargetRegisterClass *NewRC,
77
- SmallVectorImpl<MachineInstr *> &RewriteCandidates,
73
+ bool recomputeRegClassExceptRewritable (
74
+ Register Reg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
78
75
SmallSetVector<Register, 4 > &RewriteRegs) const ;
79
76
80
77
bool run (MachineFunction &MF) const ;
81
78
};
82
79
83
- const TargetRegisterClass *
84
- AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable (
85
- Register StartReg, const TargetRegisterClass *OldRC,
86
- const TargetRegisterClass *NewRC,
87
- SmallVectorImpl<MachineInstr *> &RewriteCandidates,
80
+ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable (
81
+ Register StartReg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
88
82
SmallSetVector<Register, 4 > &RewriteRegs) const {
89
83
SmallVector<Register, 8 > Worklist = {StartReg};
90
84
91
85
// Recursively visit all transitive MFMA users
92
86
while (!Worklist.empty ()) {
93
87
Register Reg = Worklist.pop_back_val ();
88
+ const TargetRegisterClass *OldRC = MRI.getRegClass (Reg);
89
+
90
+ // Inflate to the equivalent AV_* class.
91
+ const TargetRegisterClass *NewRC = TRI.getLargestLegalSuperClass (OldRC, MF);
92
+ if (OldRC == NewRC)
93
+ return false ;
94
+
94
95
// Accumulate constraints from all uses.
95
96
for (MachineOperand &MO : MRI.reg_nodbg_operands (Reg)) {
96
97
// Apply the effect of the given operand to NewRC.
@@ -101,23 +102,40 @@ AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
101
102
// either AGPR or VGPR in src0/src1, so don't bother checking the
102
103
// constraint effects of the individual operands.
103
104
if (isRewriteCandidate (*MI)) {
104
- for (AMDGPU::OpName OpName :
105
- {AMDGPU::OpName::vdst, AMDGPU::OpName::src2}) {
106
- const MachineOperand *Op = TII.getNamedOperand (*MI, OpName);
105
+ const MachineOperand *VDst =
106
+ TII.getNamedOperand (*MI, AMDGPU::OpName::vdst);
107
+ const MachineOperand *Src2 =
108
+ TII.getNamedOperand (*MI, AMDGPU::OpName::src2);
109
+ for (const MachineOperand *Op : {VDst, Src2}) {
107
110
if (!Op->isReg ())
108
111
continue ;
109
112
110
113
Register OtherReg = Op->getReg ();
111
- if (OtherReg != Reg) {
112
- if (RewriteRegs.insert (OtherReg))
113
- Worklist.push_back (OtherReg);
114
- }
114
+ if (OtherReg.isPhysical ())
115
+ return false ;
116
+
117
+ if (OtherReg != Reg && RewriteRegs.insert (OtherReg))
118
+ Worklist.push_back (OtherReg);
115
119
}
116
120
117
- LLVM_DEBUG (dbgs () << " Ignoring effects of " << *MI);
121
+ if (!is_contained (RewriteCandidates, MI)) {
122
+ LLVM_DEBUG ({
123
+ Register VDstPhysReg = VRM.getPhys (VDst->getReg ());
124
+ dbgs () << " Attempting to replace VGPR MFMA with AGPR version:"
125
+ << " Dst=[" << printReg (VDst->getReg ()) << " => "
126
+ << printReg (VDstPhysReg, &TRI);
127
+
128
+ if (Src2->isReg ()) {
129
+ Register Src2PhysReg = VRM.getPhys (Src2->getReg ());
130
+ dbgs () << " ], Src2=[" << printReg (Src2->getReg (), &TRI) << " => "
131
+ << printReg (Src2PhysReg, &TRI);
132
+ }
133
+
134
+ dbgs () << " ]: " << MI;
135
+ });
118
136
119
- if (!is_contained (RewriteCandidates, MI))
120
137
RewriteCandidates.push_back (MI);
138
+ }
121
139
122
140
continue ;
123
141
}
@@ -126,13 +144,14 @@ AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
126
144
NewRC = MI->getRegClassConstraintEffect (OpNo, NewRC, &TII, &TRI);
127
145
if (!NewRC || NewRC == OldRC) {
128
146
LLVM_DEBUG (dbgs () << " User of " << printReg (Reg, &TRI)
129
- << " cannot be reassigned to AGPR: " << *MI);
130
- return nullptr ;
147
+ << " cannot be reassigned to "
148
+ << TRI.getRegClassName (NewRC) << " : " << *MI);
149
+ return false ;
131
150
}
132
151
}
133
152
}
134
153
135
- return NewRC ;
154
+ return true ;
136
155
}
137
156
138
157
// / Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a
@@ -228,10 +247,7 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
228
247
continue ;
229
248
230
249
MachineInstr *DefMI = LIS.getInstructionFromIndex (VNI->def );
231
-
232
- // TODO: Handle SplitKit produced copy bundles for partially defined
233
- // registers.
234
- if (!DefMI || !DefMI->isFullCopy ())
250
+ if (!DefMI || !DefMI->isCopy ())
235
251
continue ;
236
252
237
253
Register MFMADstReg = DefMI->getOperand (1 ).getReg ();
@@ -244,34 +260,6 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
244
260
if (!MFMA || !isRewriteCandidate (*MFMA))
245
261
continue ;
246
262
247
- MachineOperand *Src2 = TII.getNamedOperand (*MFMA, AMDGPU::OpName::src2);
248
- Register Src2Reg;
249
- if (Src2->isReg ()) {
250
- Src2Reg = Src2->getReg ();
251
- if (!Src2Reg.isVirtual ())
252
- continue ;
253
- }
254
-
255
- // FIXME: getMinimalPhysRegClass returns a nonsense AV_* subclass instead
256
- // of an AGPR or VGPR subclass, so we can't simply use the result on the
257
- // assignment.
258
-
259
- LLVM_DEBUG ({
260
- dbgs () << " Attempting to replace VGPR MFMA with AGPR version:"
261
- << " Dst=[" << printReg (VReg) << " => "
262
- << printReg (PhysReg, &TRI);
263
-
264
- if (Src2Reg) {
265
- Register Src2PhysReg = VRM.getPhys (Src2Reg);
266
- dbgs () << " ], Src2=[" << printReg (Src2Reg, &TRI) << " => "
267
- << printReg (Src2PhysReg, &TRI);
268
- }
269
-
270
- dbgs () << " ]: " << *MFMA;
271
- });
272
-
273
- const TargetRegisterClass *DstVirtRegRC = MRI.getRegClass (MFMADstReg);
274
-
275
263
// src2 and dst have the same physical class constraint; try to preserve
276
264
// the original src2 subclass if one were to exist.
277
265
SmallVector<MachineInstr *, 4 > RewriteCandidates = {MFMA};
@@ -290,11 +278,9 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
290
278
//
291
279
// Note recomputeRegClassExceptRewritable will consider the constraints of
292
280
// this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
293
- const TargetRegisterClass *DstExceptRC =
294
- recomputeRegClassExceptRewritable (MFMADstReg, DstVirtRegRC, VirtRegRC,
295
- RewriteCandidates, RewriteRegs);
296
- if (!DstExceptRC) {
297
- LLVM_DEBUG (dbgs () << " Could not recompute the regclass of "
281
+ if (!recomputeRegClassExceptRewritable (MFMADstReg, RewriteCandidates,
282
+ RewriteRegs)) {
283
+ LLVM_DEBUG (dbgs () << " Could not recompute the regclass of dst reg "
298
284
<< printReg (MFMADstReg, &TRI) << ' \n ' );
299
285
continue ;
300
286
}
0 commit comments