1616// ===----------------------------------------------------------------------===//
1717
1818#include " AMDGPU.h"
19- #include " llvm/CodeGen/MachineFunctionPass.h"
19+ #include " AMDGPUGlobalISelUtils.h"
20+ #include " GCNSubtarget.h"
21+ #include " llvm/CodeGen/GlobalISel/CSEInfo.h"
22+ #include " llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
23+ #include " llvm/CodeGen/MachineUniformityAnalysis.h"
24+ #include " llvm/CodeGen/TargetPassConfig.h"
2025#include " llvm/InitializePasses.h"
2126
2227#define DEBUG_TYPE " amdgpu-regbankselect"
2328
2429using namespace llvm ;
30+ using namespace AMDGPU ;
2531
2632namespace {
2733
@@ -40,6 +46,9 @@ class AMDGPURegBankSelect : public MachineFunctionPass {
4046 }
4147
4248 void getAnalysisUsage (AnalysisUsage &AU) const override {
49+ AU.addRequired <TargetPassConfig>();
50+ AU.addRequired <GISelCSEAnalysisWrapperPass>();
51+ AU.addRequired <MachineUniformityAnalysisPass>();
4352 MachineFunctionPass::getAnalysisUsage (AU);
4453 }
4554
@@ -55,6 +64,9 @@ class AMDGPURegBankSelect : public MachineFunctionPass {
5564
5665INITIALIZE_PASS_BEGIN (AMDGPURegBankSelect, DEBUG_TYPE,
5766 " AMDGPU Register Bank Select" , false , false )
67+ INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
68+ INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
69+ INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
5870INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE,
5971 " AMDGPU Register Bank Select" , false , false )
6072
@@ -66,9 +78,201 @@ FunctionPass *llvm::createAMDGPURegBankSelectPass() {
6678 return new AMDGPURegBankSelect ();
6779}
6880
81+ class RegBankSelectHelper {
82+ MachineIRBuilder &B;
83+ MachineRegisterInfo &MRI;
84+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA;
85+ const MachineUniformityInfo &MUI;
86+ const RegisterBank *SgprRB;
87+ const RegisterBank *VgprRB;
88+ const RegisterBank *VccRB;
89+
90+ public:
91+ RegBankSelectHelper (MachineIRBuilder &B,
92+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA,
93+ const MachineUniformityInfo &MUI,
94+ const RegisterBankInfo &RBI)
95+ : B(B), MRI(*B.getMRI()), ILMA(ILMA), MUI(MUI),
96+ SgprRB (&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
97+ VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
98+ VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
99+
100+ const RegisterBank *getRegBankToAssign (Register Reg) {
101+ if (MUI.isUniform (Reg) || ILMA.isS32S64LaneMask (Reg))
102+ return SgprRB;
103+ if (MRI.getType (Reg) == LLT::scalar (1 ))
104+ return VccRB;
105+ return VgprRB;
106+ }
107+
108+ // %rc:RegClass(s32) = G_ ...
109+ // ...
110+ // %a = G_ ..., %rc
111+ // ->
112+ // %rb:RegBank(s32) = G_ ...
113+ // %rc:RegClass(s32) = COPY %rb
114+ // ...
115+ // %a = G_ ..., %rb
116+ void reAssignRegBankOnDef (MachineInstr &MI, MachineOperand &DefOP,
117+ const RegisterBank *RB) {
118+ // Register that already has Register class got it during pre-inst selection
119+ // of another instruction. Maybe cross bank copy was required so we insert a
120+ // copy that can be removed later. This simplifies post regbanklegalize
121+ // combiner and avoids need to special case some patterns.
122+ Register Reg = DefOP.getReg ();
123+ LLT Ty = MRI.getType (Reg);
124+ Register NewReg = MRI.createVirtualRegister ({RB, Ty});
125+ DefOP.setReg (NewReg);
126+
127+ auto &MBB = *MI.getParent ();
128+ B.setInsertPt (MBB, MBB.SkipPHIsAndLabels (std::next (MI.getIterator ())));
129+ B.buildCopy (Reg, NewReg);
130+
131+ // The problem was discovered for uniform S1 that was used as both
132+ // lane mask(vcc) and regular sgpr S1.
133+ // - lane-mask(vcc) use was by si_if, this use is divergent and requires
134+ // non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
135+ // sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
136+ // - the regular sgpr S1(uniform) instruction is now broken since
137+ // it uses sreg_64_xexec(S1) which is divergent.
138+
139+ // Replace virtual registers with register class on generic instructions
140+ // uses with virtual registers with register bank.
141+ for (auto &UseMI : make_early_inc_range (MRI.use_instructions (Reg))) {
142+ if (UseMI.isPreISelOpcode ()) {
143+ for (MachineOperand &Op : UseMI.operands ()) {
144+ if (Op.isReg () && Op.getReg () == Reg)
145+ Op.setReg (NewReg);
146+ }
147+ }
148+ }
149+ }
150+
151+ // %a = G_ ..., %rc
152+ // ->
153+ // %rb:RegBank(s32) = COPY %rc
154+ // %a = G_ ..., %rb
155+ void constrainRegBankUse (MachineInstr &MI, MachineOperand &UseOP,
156+ const RegisterBank *RB) {
157+ Register Reg = UseOP.getReg ();
158+
159+ LLT Ty = MRI.getType (Reg);
160+ Register NewReg = MRI.createVirtualRegister ({RB, Ty});
161+ UseOP.setReg (NewReg);
162+
163+ if (MI.isPHI ()) {
164+ auto DefMI = MRI.getVRegDef (Reg)->getIterator ();
165+ MachineBasicBlock *DefMBB = DefMI->getParent ();
166+ B.setInsertPt (*DefMBB, DefMBB->SkipPHIsAndLabels (std::next (DefMI)));
167+ } else {
168+ B.setInstr (MI);
169+ }
170+
171+ B.buildCopy (NewReg, Reg);
172+ }
173+ };
174+
175+ static Register getVReg (MachineOperand &Op) {
176+ if (!Op.isReg ())
177+ return {};
178+
179+ // Operands of COPY and G_SI_CALL can be physical registers.
180+ Register Reg = Op.getReg ();
181+ if (!Reg.isVirtual ())
182+ return {};
183+
184+ return Reg;
185+ }
186+
69187bool AMDGPURegBankSelect::runOnMachineFunction (MachineFunction &MF) {
70188 if (MF.getProperties ().hasProperty (
71189 MachineFunctionProperties::Property::FailedISel))
72190 return false ;
191+
192+ // Setup the instruction builder with CSE.
193+ const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
194+ GISelCSEAnalysisWrapper &Wrapper =
195+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper ();
196+ GISelCSEInfo &CSEInfo = Wrapper.get (TPC.getCSEConfig ());
197+ GISelObserverWrapper Observer;
198+ Observer.addObserver (&CSEInfo);
199+
200+ CSEMIRBuilder B (MF);
201+ B.setCSEInfo (&CSEInfo);
202+ B.setChangeObserver (Observer);
203+
204+ RAIIDelegateInstaller DelegateInstaller (MF, &Observer);
205+ RAIIMFObserverInstaller MFObserverInstaller (MF, Observer);
206+
207+ IntrinsicLaneMaskAnalyzer ILMA (MF);
208+ MachineUniformityInfo &MUI =
209+ getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo ();
210+ MachineRegisterInfo &MRI = *B.getMRI ();
211+ const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
212+ RegBankSelectHelper RBSHelper (B, ILMA, MUI, *ST.getRegBankInfo ());
213+ // Virtual registers at this point don't have register banks.
214+ // Virtual registers in def and use operands of already inst-selected
215+ // instruction have register class.
216+
217+ for (MachineBasicBlock &MBB : MF) {
218+ for (MachineInstr &MI : MBB) {
219+ // Vregs in def and use operands of COPY can have either register class
220+ // or bank. If there is neither on vreg in def operand, assign bank.
221+ if (MI.isCopy ()) {
222+ Register DefReg = getVReg (MI.getOperand (0 ));
223+ if (!DefReg.isValid () || MRI.getRegClassOrNull (DefReg))
224+ continue ;
225+
226+ assert (!MRI.getRegBankOrNull (DefReg));
227+ MRI.setRegBank (DefReg, *RBSHelper.getRegBankToAssign (DefReg));
228+ continue ;
229+ }
230+
231+ if (!MI.isPreISelOpcode ())
232+ continue ;
233+
234+ // Vregs in def and use operands of G_ instructions need to have register
235+ // banks assigned. Before this loop possible case are
236+ // - (1) vreg without register class or bank in def or use operand
237+ // - (2) vreg with register class in def operand
238+ // - (3) vreg, defined by G_ instruction, in use operand
239+ // - (4) vreg, defined by pre-inst-selected instruction, in use operand
240+
241+ // First three cases are handled in loop through all def operands of G_
242+ // instructions. For case (1) simply setRegBank. Cases (2) and (3) are
243+ // handled by reAssignRegBankOnDef.
244+ for (MachineOperand &DefOP : MI.defs ()) {
245+ Register DefReg = getVReg (DefOP);
246+ if (!DefReg.isValid ())
247+ continue ;
248+
249+ const RegisterBank *RB = RBSHelper.getRegBankToAssign (DefReg);
250+ if (MRI.getRegClassOrNull (DefReg))
251+ RBSHelper.reAssignRegBankOnDef (MI, DefOP, RB);
252+ else {
253+ assert (!MRI.getRegBankOrNull (DefReg));
254+ MRI.setRegBank (DefReg, *RB);
255+ }
256+ }
257+
258+ // Register bank select doesn't modify pre-inst-selected instructions.
259+ // For case (4) need to insert a copy, handled by constrainRegBankUse.
260+ for (MachineOperand &UseOP : MI.uses ()) {
261+ Register UseReg = getVReg (UseOP);
262+ if (!UseReg.isValid ())
263+ continue ;
264+
265+ // Skip case (3).
266+ if (!MRI.getRegClassOrNull (UseReg) ||
267+ MRI.getVRegDef (UseReg)->isPreISelOpcode ())
268+ continue ;
269+
270+ // Use with register class defined by pre-inst-selected instruction.
271+ const RegisterBank *RB = RBSHelper.getRegBankToAssign (UseReg);
272+ RBSHelper.constrainRegBankUse (MI, UseOP, RB);
273+ }
274+ }
275+ }
276+
73277 return true ;
74278}
0 commit comments