1111#include < llvm/InitializePasses.h>
1212
1313#include " AMDGPU.h"
14+ #include " GCNSubtarget.h"
15+ #include " SIInstrInfo.h"
16+ #include " SIMachineFunctionInfo.h"
1417
1518using namespace llvm ;
1619
@@ -19,10 +22,8 @@ namespace {
1922const char PassName[] = " AMDGPU if conversion" ;
2023
2124class AMDGPUIfConverter : public MachineFunctionPass {
22- const TargetInstrInfo *TII = nullptr ;
23- const TargetRegisterInfo *TRI = nullptr ;
25+ const SIInstrInfo *TII = nullptr ;
2426 TargetSchedModel SchedModel;
25- MachineRegisterInfo *MRI = nullptr ;
2627 MachineDominatorTree *DomTree = nullptr ;
2728 MachineBranchProbabilityInfo *MBPI = nullptr ;
2829 MachineLoopInfo *Loops = nullptr ;
@@ -41,6 +42,7 @@ class AMDGPUIfConverter : public MachineFunctionPass {
4142 void getAnalysisUsage (AnalysisUsage &AU) const override ;
4243
4344 bool tryConvertIf (MachineBasicBlock *);
45+ bool shouldConvertIf ();
4446
4547 StringRef getPassName () const override { return PassName; }
4648};
@@ -60,10 +62,11 @@ bool AMDGPUIfConverter::runOnMachineFunction(MachineFunction &MF) {
6062 if (skipFunction (MF.getFunction ()))
6163 return false ;
6264
63- const TargetSubtargetInfo &STI = MF.getSubtarget ();
65+ const auto &STI = MF.getSubtarget <GCNSubtarget>();
66+ if (!STI.hasGFX10_3Insts ())
67+ return false ;
68+
6469 TII = STI.getInstrInfo ();
65- TRI = STI.getRegisterInfo ();
66- MRI = &MF.getRegInfo ();
6770 SchedModel.init (&STI);
6871 DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree ();
6972 Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI ();
@@ -79,7 +82,193 @@ bool AMDGPUIfConverter::runOnMachineFunction(MachineFunction &MF) {
7982 return Changed;
8083}
8184
82- bool AMDGPUIfConverter::tryConvertIf (MachineBasicBlock *MBB) { return false ; }
85+ unsigned getReversedVCMPXOpcode (unsigned Opcode) {
86+ // TODO: this is a placeholder for the real function
87+ switch (Opcode) {
88+ case AMDGPU::V_CMPX_LT_I32_nosdst_e64:
89+ return AMDGPU::V_CMPX_GE_I32_nosdst_e64;
90+ default :
91+ errs () << " unhandled: " << Opcode << " \n " ;
92+ llvm_unreachable (" unhandled vcmp opcode" );
93+ }
94+ }
95+
96+ bool needsPredication (const SIInstrInfo *TII, const MachineInstr &I) {
97+ return TII->isVALU (I) || TII->isVMEM (I);
98+ }
99+
100+ struct ExecPredicate : ifcvt::PredicationStrategy {
101+ const SIInstrInfo *TII;
102+ const SIRegisterInfo *RegInfo;
103+
104+ MachineInstr *Cmp = nullptr ;
105+
106+ ExecPredicate (const SIInstrInfo *TII)
107+ : TII(TII), RegInfo(&TII->getRegisterInfo ()) {}
108+
109+ bool canConvertIf (MachineBasicBlock *Head, MachineBasicBlock *TBB,
110+ MachineBasicBlock *FBB, MachineBasicBlock *Tail,
111+ ArrayRef<MachineOperand> Cond) override {
112+
113+ // check that the cmp is just before the branch and that it is promotable to
114+ // v_cmpx
115+ const unsigned SupportedBranchOpc[]{
116+ AMDGPU::S_CBRANCH_SCC0, AMDGPU::S_CBRANCH_SCC1, AMDGPU::S_CBRANCH_VCCNZ,
117+ AMDGPU::S_CBRANCH_VCCZ};
118+
119+ MachineInstr &CBranch = *Head->getFirstInstrTerminator ();
120+ if (!llvm::is_contained (SupportedBranchOpc, CBranch.getOpcode ()))
121+ return false ;
122+
123+ auto CmpInstr = std::next (CBranch.getReverseIterator ());
124+ if (CmpInstr == Head->instr_rend ())
125+ return false ;
126+
127+ Register SCCorVCC = Cond[1 ].getReg ();
128+ bool ModifiesConditionReg = CmpInstr->modifiesRegister (SCCorVCC, RegInfo);
129+ if (!ModifiesConditionReg)
130+ return false ;
131+
132+ Cmp = &*CmpInstr;
133+
134+ unsigned CmpOpc = Cmp->getOpcode ();
135+ if (TII->isSALU (*Cmp))
136+ CmpOpc = TII->getVALUOp (*Cmp);
137+ if (AMDGPU::getVCMPXOpFromVCMP (CmpOpc) == -1 ) {
138+ errs () << *Cmp << " \n " ;
139+ return false ;
140+ }
141+
142+ auto NeedsPredication = [&](const MachineInstr &I) {
143+ return needsPredication (TII, I);
144+ };
145+ auto BlockNeedsPredication = [&](const MachineBasicBlock *MBB) {
146+ if (MBB == Tail)
147+ return false ;
148+ auto Insts = llvm::make_range (MBB->begin (), MBB->getFirstTerminator ());
149+ return llvm::any_of (Insts, NeedsPredication);
150+ };
151+
152+ MachineBasicBlock *Blocks[] = {TBB, FBB};
153+
154+ if (llvm::none_of (Blocks, BlockNeedsPredication))
155+ return false ;
156+
157+ return true ;
158+ }
159+
160+ bool canPredicate (const MachineInstr &I) override {
161+
162+ // TODO: relax this condition, if exec is masked, check that it goes back to
163+ // normal
164+ // TODO: what about scc or vcc ? Are they taken into acount in the MBB
165+ // live-ins ?
166+ MCRegister Exec = RegInfo->getExec ();
167+ bool ModifiesExec = I.modifiesRegister (Exec, RegInfo);
168+ if (ModifiesExec)
169+ return false ;
170+
171+ if (needsPredication (TII, I))
172+ return true ;
173+
174+ bool DontMoveAcrossStore = true ;
175+ bool IsSpeculatable = I.isDereferenceableInvariantLoad () ||
176+ I.isSafeToMove (DontMoveAcrossStore);
177+ if (IsSpeculatable)
178+ return true ;
179+
180+ return false ;
181+ }
182+
183+ bool predicateBlock (MachineBasicBlock *MBB, ArrayRef<MachineOperand> Cond,
184+ bool Reverse) override {
185+ // save exec
186+ MachineFunction &MF = *MBB->getParent ();
187+ SIMachineFunctionInfo *MFI = MF.getInfo <SIMachineFunctionInfo>();
188+
189+ Register ExecBackup = MFI->getSGPRForEXECCopy ();
190+
191+ const DebugLoc &CmpLoc = Cmp->getDebugLoc ();
192+
193+ auto FirstInstruction = MBB->begin ();
194+ const bool IsSCCLive =
195+ false ; // asume not since the live-ins are supposed to be empty
196+ TII->insertScratchExecCopy (MF, *MBB, FirstInstruction, CmpLoc, ExecBackup,
197+ IsSCCLive);
198+
199+ // mask exec
200+ unsigned CmpOpc = Cmp->getOpcode ();
201+ if (TII->isSALU (*Cmp))
202+ CmpOpc = TII->getVALUOp (*Cmp);
203+
204+ CmpOpc = AMDGPU::getVCMPXOpFromVCMP (CmpOpc);
205+ if (Reverse)
206+ CmpOpc = getReversedVCMPXOpcode (CmpOpc);
207+
208+ // TODO: handle this properly. The second block may kill those registers.
209+ Cmp->getOperand (0 ).setIsKill (false );
210+ Cmp->getOperand (1 ).setIsKill (false );
211+
212+ auto VCmpX = BuildMI (*MBB, FirstInstruction, CmpLoc, TII->get (CmpOpc));
213+ VCmpX->addOperand (Cmp->getOperand (0 ));
214+ VCmpX->addOperand (Cmp->getOperand (1 ));
215+
216+ // restore exec
217+ TII->restoreExec (MF, *MBB, MBB->end (), DebugLoc (), ExecBackup);
218+
219+ return true ;
220+ }
221+
222+ ~ExecPredicate () override = default ;
223+ };
224+
225+ // / Update the dominator tree after if-conversion erased some blocks.
226+ void updateDomTree (MachineDominatorTree *DomTree, const SSAIfConv &IfConv,
227+ ArrayRef<MachineBasicBlock *> Removed) {
228+ // convertIf can remove TBB, FBB, and Tail can be merged into Head.
229+ // TBB and FBB should not dominate any blocks.
230+ // Tail children should be transferred to Head.
231+ MachineDomTreeNode *HeadNode = DomTree->getNode (IfConv.Head );
232+ for (auto *B : Removed) {
233+ MachineDomTreeNode *Node = DomTree->getNode (B);
234+ assert (Node != HeadNode && " Cannot erase the head node" );
235+ while (Node->getNumChildren ()) {
236+ assert (Node->getBlock () == IfConv.Tail && " Unexpected children" );
237+ DomTree->changeImmediateDominator (Node->back (), HeadNode);
238+ }
239+ DomTree->eraseNode (B);
240+ }
241+ }
242+
243+ // / Update LoopInfo after if-conversion.
244+ void updateLoops (MachineLoopInfo *Loops,
245+ ArrayRef<MachineBasicBlock *> Removed) {
246+ // If-conversion doesn't change loop structure, and it doesn't mess with back
247+ // edges, so updating LoopInfo is simply removing the dead blocks.
248+ for (auto *B : Removed)
249+ Loops->removeBlock (B);
250+ }
251+
252+ bool AMDGPUIfConverter::shouldConvertIf () {
253+ // TODO: cost model
254+ return true ;
255+ }
256+
257+ bool AMDGPUIfConverter::tryConvertIf (MachineBasicBlock *MBB) {
258+ ExecPredicate Predicate{TII};
259+ bool Changed = false ;
260+ while (IfConv.canConvertIf (MBB, Predicate) && shouldConvertIf ()) {
261+ // If-convert MBB and update analyses.
262+ SmallVector<MachineBasicBlock *, 4 > RemoveBlocks;
263+ IfConv.convertIf (RemoveBlocks, Predicate);
264+ Changed = true ;
265+ updateDomTree (DomTree, IfConv, RemoveBlocks);
266+ for (MachineBasicBlock *MBB : RemoveBlocks)
267+ MBB->eraseFromParent ();
268+ updateLoops (Loops, RemoveBlocks);
269+ }
270+ return Changed;
271+ }
83272
84273} // namespace
85274
0 commit comments