1313
1414#include " AMDGPU.h"
1515#include " GCNSubtarget.h"
16- #include " llvm/Analysis/DomTreeUpdater.h"
1716#include " llvm/Analysis/LoopInfo.h"
1817#include " llvm/Analysis/UniformityAnalysis.h"
1918#include " llvm/CodeGen/TargetPassConfig.h"
@@ -54,7 +53,7 @@ class SIAnnotateControlFlow : public FunctionPass {
5453 Function *Else;
5554 Function *IfBreak;
5655 Function *Loop;
57- Function *WaveReconverge ;
56+ Function *EndCf ;
5857
5958 DominatorTree *DT;
6059 StackVector Stack;
@@ -87,7 +86,7 @@ class SIAnnotateControlFlow : public FunctionPass {
8786
8887 bool handleLoop (BranchInst *Term);
8988
90- bool tryWaveReconverge (BasicBlock *BB);
89+ bool closeControlFlow (BasicBlock *BB);
9190
9291public:
9392 static char ID;
@@ -142,8 +141,7 @@ void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
142141 IfBreak = Intrinsic::getDeclaration (&M, Intrinsic::amdgcn_if_break,
143142 { IntMask });
144143 Loop = Intrinsic::getDeclaration (&M, Intrinsic::amdgcn_loop, { IntMask });
145- WaveReconverge = Intrinsic::getDeclaration (
146- &M, Intrinsic::amdgcn_wave_reconverge, {IntMask});
144+ EndCf = Intrinsic::getDeclaration (&M, Intrinsic::amdgcn_end_cf, { IntMask });
147145}
148146
149147// / Is the branch condition uniform or did the StructurizeCFG pass
@@ -206,6 +204,8 @@ bool SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
206204
207205// / Open a new "If" block
208206bool SIAnnotateControlFlow::openIf (BranchInst *Term) {
207+ if (isUniform (Term))
208+ return false ;
209209
210210 IRBuilder<> IRB (Term);
211211 Value *IfCall = IRB.CreateCall (If, {Term->getCondition ()});
@@ -306,43 +306,41 @@ bool SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
306306}
307307
308308// / Close the last opened control flow
309- bool SIAnnotateControlFlow::tryWaveReconverge (BasicBlock *BB) {
309+ bool SIAnnotateControlFlow::closeControlFlow (BasicBlock *BB) {
310+ llvm::Loop *L = LI->getLoopFor (BB);
310311
311- if (succ_empty (BB))
312- return false ;
312+ assert (Stack.back ().first == BB);
313313
314- BranchInst *Term = dyn_cast<BranchInst>(BB-> getTerminator ());
315- if (Term-> getNumSuccessors () == 1 ) {
316- // The current BBs single successor is a top of the stack. We need to
317- // reconverge over thaqt path .
318- BasicBlock *SingleSucc = * succ_begin (BB) ;
319- BasicBlock::iterator InsPt = Term ? BasicBlock::iterator (Term) : BB-> end ( );
314+ if (L && L-> getHeader () == BB) {
315+ // We can't insert an EndCF call into a loop header, because it will
316+ // get executed on every iteration of the loop, when it should be
317+ // executed only once before the loop .
318+ SmallVector < BasicBlock *, 8 > Latches ;
319+ L-> getLoopLatches (Latches );
320320
321- if (isTopOfStack (SingleSucc)) {
322- Value *Exec = Stack.back ().second ;
323- IRBuilder<>(BB, InsPt).CreateCall (WaveReconverge, {Exec});
321+ SmallVector<BasicBlock *, 2 > Preds;
322+ for (BasicBlock *Pred : predecessors (BB)) {
323+ if (!is_contained (Latches, Pred))
324+ Preds.push_back (Pred);
324325 }
325- } else {
326- // We have a uniform conditional branch terminating the block.
327- // THis block may be the last in the Then path of the enclosing divergent
328- // IF.
329- if (!isUniform (Term))
330- // Divergent loop is going to be further processed in another place
331- return false ;
332-
333- for (auto Succ : Term->successors ()) {
334- if (isTopOfStack (Succ)) {
335- // Just split to make a room for further WAVE_RECONVERGE insertion
336- SmallVector<BasicBlock *, 2 > Preds;
337- for (auto P : predecessors (Succ)) {
338- if (DT->dominates (BB, P))
339- Preds.push_back (P);
340- }
341- DomTreeUpdater DTU (DT, DomTreeUpdater::UpdateStrategy::Eager);
342- SplitBlockPredecessors (Succ, Preds, " .reconverge" , &DTU, LI, nullptr ,
343- false );
344- }
326+
327+ BB = SplitBlockPredecessors (BB, Preds, " endcf.split" , DT, LI, nullptr ,
328+ false );
329+ }
330+
331+ Value *Exec = popSaved ();
332+ Instruction *FirstInsertionPt = &*BB->getFirstInsertionPt ();
333+ if (!isa<UndefValue>(Exec) && !isa<UnreachableInst>(FirstInsertionPt)) {
334+ Instruction *ExecDef = cast<Instruction>(Exec);
335+ BasicBlock *DefBB = ExecDef->getParent ();
336+ if (!DT->dominates (DefBB, BB)) {
337+ // Split edge to make Def dominate Use
338+ FirstInsertionPt = &*SplitEdge (DefBB, BB, DT, LI)->getFirstInsertionPt ();
345339 }
340+ IRBuilder<> IRB (FirstInsertionPt);
341+ // TODO: Clear dbg location for now as it causes regression in GDB tests.
342+ IRB.SetCurrentDebugLocation (DebugLoc ());
343+ IRB.CreateCall (EndCf, {Exec});
346344 }
347345
348346 return true ;
@@ -366,20 +364,14 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) {
366364
367365 if (!Term || Term->isUnconditional ()) {
368366 if (isTopOfStack (BB))
369- Stack.pop_back ();
370-
371- Changed |= tryWaveReconverge (BB);
367+ Changed |= closeControlFlow (BB);
372368
373369 continue ;
374370 }
375371
376372 if (I.nodeVisited (Term->getSuccessor (1 ))) {
377373 if (isTopOfStack (BB))
378- Stack.pop_back ();
379-
380- // Let's take care of uniform loop latch that may be closing the Then
381- // path of the enclosing divergent branch.
382- Changed |= tryWaveReconverge (BB);
374+ Changed |= closeControlFlow (BB);
383375
384376 if (DT->dominates (Term->getSuccessor (1 ), BB))
385377 Changed |= handleLoop (Term);
@@ -394,15 +386,10 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) {
394386 continue ;
395387 }
396388
397- Stack. pop_back ( );
389+ Changed |= closeControlFlow (BB );
398390 }
399391
400- if (isUniform (Term))
401- // Uniform conditional branch may be in the block that closes the Then
402- // path of the divergent conditional branch.
403- Changed |= tryWaveReconverge (BB);
404- else
405- Changed |= openIf (Term);
392+ Changed |= openIf (Term);
406393 }
407394
408395 if (!Stack.empty ()) {
0 commit comments