Skip to content

Commit af68efc

Browse files
authored
Revert "[AMDGPU][UnifyDivergentExitNodes][StructurizeCFG] Add support for callbr instruction with inline-asm" (llvm#166186)
Reverts llvm#152161 Need to revert to fix changed logic for the expensive checks.
1 parent ce92582 commit af68efc

File tree

9 files changed

+79
-926
lines changed

9 files changed

+79
-926
lines changed

llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp

Lines changed: 35 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -181,52 +181,14 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
181181
return NewRetBlock;
182182
}
183183

184-
static BasicBlock *
185-
createDummyReturnBlock(Function &F,
186-
SmallVector<BasicBlock *, 4> &ReturningBlocks) {
187-
BasicBlock *DummyReturnBB =
188-
BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F);
189-
Type *RetTy = F.getReturnType();
190-
Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
191-
ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
192-
ReturningBlocks.push_back(DummyReturnBB);
193-
return DummyReturnBB;
194-
}
195-
196-
/// Handle conditional branch instructions (-> 2 targets) and callbr
197-
/// instructions with N targets.
198-
static void handleNBranch(Function &F, BasicBlock *BB, Instruction *BI,
199-
BasicBlock *DummyReturnBB,
200-
std::vector<DominatorTree::UpdateType> &Updates) {
201-
SmallVector<BasicBlock *, 2> Successors(successors(BB));
202-
203-
// Create a new transition block to hold the conditional branch.
204-
BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
205-
206-
Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
207-
208-
// 'Successors' become successors of TransitionBB instead of BB,
209-
// and TransitionBB becomes a single successor of BB.
210-
Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
211-
for (BasicBlock *Successor : Successors) {
212-
Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor);
213-
Updates.emplace_back(DominatorTree::Delete, BB, Successor);
214-
}
215-
216-
// Create a branch that will always branch to the transition block and
217-
// references DummyReturnBB.
218-
BB->getTerminator()->eraseFromParent();
219-
BranchInst::Create(TransitionBB, DummyReturnBB,
220-
ConstantInt::getTrue(F.getContext()), BB);
221-
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
222-
}
223-
224184
bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
225185
const PostDominatorTree &PDT,
226186
const UniformityInfo &UA) {
187+
assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
188+
227189
if (PDT.root_size() == 0 ||
228190
(PDT.root_size() == 1 &&
229-
!isa<BranchInst, CallBrInst>(PDT.getRoot()->getTerminator())))
191+
!isa<BranchInst>(PDT.getRoot()->getTerminator())))
230192
return false;
231193

232194
// Loop over all of the blocks in a function, tracking all of the blocks that
@@ -260,27 +222,46 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
260222
if (HasDivergentExitBlock)
261223
UnreachableBlocks.push_back(BB);
262224
} else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
263-
if (!DummyReturnBB)
264-
DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
225+
226+
ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext());
227+
if (DummyReturnBB == nullptr) {
228+
DummyReturnBB =
229+
BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F);
230+
Type *RetTy = F.getReturnType();
231+
Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
232+
ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
233+
ReturningBlocks.push_back(DummyReturnBB);
234+
}
265235

266236
if (BI->isUnconditional()) {
267237
BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
268238
BI->eraseFromParent(); // Delete the unconditional branch.
269239
// Add a new conditional branch with a dummy edge to the return block.
270-
BranchInst::Create(LoopHeaderBB, DummyReturnBB,
271-
ConstantInt::getTrue(F.getContext()), BB);
240+
BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
241+
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
242+
} else { // Conditional branch.
243+
SmallVector<BasicBlock *, 2> Successors(successors(BB));
244+
245+
// Create a new transition block to hold the conditional branch.
246+
BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
247+
248+
Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
249+
250+
// 'Successors' become successors of TransitionBB instead of BB,
251+
// and TransitionBB becomes a single successor of BB.
252+
Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
253+
for (BasicBlock *Successor : Successors) {
254+
Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor);
255+
Updates.emplace_back(DominatorTree::Delete, BB, Successor);
256+
}
257+
258+
// Create a branch that will always branch to the transition block and
259+
// references DummyReturnBB.
260+
BB->getTerminator()->eraseFromParent();
261+
BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
272262
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
273-
} else {
274-
handleNBranch(F, BB, BI, DummyReturnBB, Updates);
275263
}
276264
Changed = true;
277-
} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(BB->getTerminator())) {
278-
if (!DummyReturnBB)
279-
DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
280-
281-
handleNBranch(F, BB, CBI, DummyReturnBB, Updates);
282-
} else {
283-
llvm_unreachable("unsupported block terminator");
284265
}
285266
}
286267

llvm/lib/Transforms/Scalar/StructurizeCFG.cpp

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -558,10 +558,11 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
558558
} else {
559559
// Test for successors as back edge
560560
BasicBlock *BB = N->getNodeAs<BasicBlock>();
561-
if (BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()))
562-
for (BasicBlock *Succ : Term->successors())
563-
if (Visited.count(Succ))
564-
Loops[Succ] = BB;
561+
BranchInst *Term = cast<BranchInst>(BB->getTerminator());
562+
563+
for (BasicBlock *Succ : Term->successors())
564+
if (Visited.count(Succ))
565+
Loops[Succ] = BB;
565566
}
566567
}
567568

@@ -593,7 +594,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
593594

594595
for (BasicBlock *P : predecessors(BB)) {
595596
// Ignore it if it's a branch from outside into our region entry
596-
if (!ParentRegion->contains(P) || !dyn_cast<BranchInst>(P->getTerminator()))
597+
if (!ParentRegion->contains(P))
597598
continue;
598599

599600
Region *R = RI->getRegionFor(P);
@@ -1401,17 +1402,13 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
14011402
/// Run the transformation for each region found
14021403
bool StructurizeCFG::run(Region *R, DominatorTree *DT,
14031404
const TargetTransformInfo *TTI) {
1404-
// CallBr and its corresponding direct target blocks are for now ignored by
1405-
// this pass. This is not a limitation for the currently intended uses cases
1406-
// of callbr in the AMDGPU backend.
1407-
// Parent and child regions are not affected by this (current) restriction.
1408-
// See `llvm/test/Transforms/StructurizeCFG/callbr.ll` for details.
1409-
if (R->isTopLevelRegion() || isa<CallBrInst>(R->getEntry()->getTerminator()))
1405+
if (R->isTopLevelRegion())
14101406
return false;
14111407

14121408
this->DT = DT;
14131409
this->TTI = TTI;
14141410
Func = R->getEntry()->getParent();
1411+
assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
14151412

14161413
ParentRegion = R;
14171414

llvm/test/CodeGen/AMDGPU/callbr.ll

Lines changed: 0 additions & 54 deletions
This file was deleted.

llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
declare void @foo(ptr)
55
declare i1 @bar(ptr)
6-
declare i32 @bar32(ptr)
76

87
define void @musttail_call_without_return_value(ptr %p) {
98
; CHECK-LABEL: define void @musttail_call_without_return_value(
@@ -29,31 +28,6 @@ bb.1:
2928
ret void
3029
}
3130

32-
define void @musttail_call_without_return_value_callbr(ptr %p) {
33-
; CHECK-LABEL: define void @musttail_call_without_return_value_callbr(
34-
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
35-
; CHECK-NEXT: [[ENTRY:.*:]]
36-
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[P]], align 1
37-
; CHECK-NEXT: callbr void asm "", "r,!i"(i32 [[LOAD]])
38-
; CHECK-NEXT: to label %[[BB_0:.*]] [label %bb.1]
39-
; CHECK: [[BB_0]]:
40-
; CHECK-NEXT: musttail call void @foo(ptr [[P]])
41-
; CHECK-NEXT: ret void
42-
; CHECK: [[BB_1:.*:]]
43-
; CHECK-NEXT: ret void
44-
;
45-
entry:
46-
%load = load i32, ptr %p, align 1
47-
callbr void asm "", "r,!i"(i32 %load) to label %bb.0 [label %bb.1]
48-
49-
bb.0:
50-
musttail call void @foo(ptr %p)
51-
ret void
52-
53-
bb.1:
54-
ret void
55-
}
56-
5731
define i1 @musttail_call_with_return_value(ptr %p) {
5832
; CHECK-LABEL: define i1 @musttail_call_with_return_value(
5933
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
@@ -77,28 +51,3 @@ bb.0:
7751
bb.1:
7852
ret i1 %load
7953
}
80-
81-
define i32 @musttail_call_with_return_value_callbr(ptr %p) {
82-
; CHECK-LABEL: define i32 @musttail_call_with_return_value_callbr(
83-
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
84-
; CHECK-NEXT: [[ENTRY:.*:]]
85-
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[P]], align 1
86-
; CHECK-NEXT: callbr void asm "", "r,!i"(i32 [[LOAD]])
87-
; CHECK-NEXT: to label %[[BB_0:.*]] [label %bb.1]
88-
; CHECK: [[BB_0]]:
89-
; CHECK-NEXT: [[RET:%.*]] = musttail call i32 @bar32(ptr [[P]])
90-
; CHECK-NEXT: ret i32 [[RET]]
91-
; CHECK: [[BB_1:.*:]]
92-
; CHECK-NEXT: ret i32 [[LOAD]]
93-
;
94-
entry:
95-
%load = load i32, ptr %p, align 1
96-
callbr void asm "", "r,!i"(i32 %load) to label %bb.0 [label %bb.1]
97-
98-
bb.0:
99-
%ret = musttail call i32 @bar32(ptr %p)
100-
ret i32 %ret
101-
102-
bb.1:
103-
ret i32 %load
104-
}

0 commit comments

Comments
 (0)