Skip to content

Commit 0e517e1

Browse files
committed
[AMDGPU][GlobalISel] Fix / workaround amdgcn.kill/.unreachable lowering
cf. #133907 (comment)
1 parent a25e367 commit 0e517e1

File tree

2 files changed

+37
-10
lines changed

2 files changed

+37
-10
lines changed

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3088,18 +3088,31 @@ bool IRTranslator::translateCallBr(const User &U,
30883088
return false;
30893089

30903090
// Retrieve successors.
3091-
SmallPtrSet<BasicBlock *, 8> Dests = {I.getDefaultDest()};
3091+
SmallPtrSet<BasicBlock *, 8> Dests;
3092+
Dests.insert(I.getDefaultDest());
30923093
MachineBasicBlock *Return = &getMBB(*I.getDefaultDest());
30933094

30943095
// Update successor info.
30953096
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
3096-
// TODO: For most of the cases where there is an intrinsic callbr, we're
3097-
// having exactly one indirect target, which will be unreachable. As soon as
3098-
// this changes, we might need to enhance
3099-
// Target->setIsInlineAsmBrIndirectTarget or add something similar for
3100-
// intrinsic indirect branches.
3097+
3098+
// Add indirect targets as successors. For intrinsic callbr, these represent
3099+
// implicit control flow (e.g., the "kill" path for amdgcn.kill). We mark them
3100+
// with setIsInlineAsmBrIndirectTarget so the machine verifier accepts them as
3101+
// valid successors, even though they're not from inline asm.
3102+
for (BasicBlock *Dest : I.getIndirectDests()) {
3103+
MachineBasicBlock *Target = &getMBB(*Dest);
3104+
Target->setIsInlineAsmBrIndirectTarget();
3105+
Target->setLabelMustBeEmitted();
3106+
// Don't add duplicate machine successors.
3107+
if (Dests.insert(Dest).second)
3108+
addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
3109+
}
3110+
31013111
CallBrMBB->normalizeSuccProbs();
31023112

3113+
// Drop into default successor.
3114+
MIRBuilder.buildBr(*Return);
3115+
31033116
return true;
31043117
}
31053118

llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,21 @@ define void @test_kill(ptr %src, ptr %dst, i1 %c) {
3232
; GISEL-NEXT: s_mov_b64 s[4:5], exec
3333
; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc
3434
; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
35-
; GISEL-NEXT: s_cbranch_scc0 .LBB0_2
35+
; GISEL-NEXT: s_cbranch_scc0 .LBB0_4
3636
; GISEL-NEXT: ; %bb.1:
3737
; GISEL-NEXT: s_and_b64 exec, exec, s[4:5]
38+
; GISEL-NEXT: ; %bb.2: ; %cont
3839
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3940
; GISEL-NEXT: flat_store_dword v[2:3], v0
4041
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
4142
; GISEL-NEXT: s_setpc_b64 s[30:31]
42-
; GISEL-NEXT: .LBB0_2:
43+
; GISEL-NEXT: .LBB0_3: ; Inline asm indirect target
44+
; GISEL-NEXT: ; %kill
45+
; GISEL-NEXT: ; Label of block must be emitted
46+
; GISEL-NEXT: ; divergent unreachable
47+
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
48+
; GISEL-NEXT: s_setpc_b64 s[30:31]
49+
; GISEL-NEXT: .LBB0_4:
4350
; GISEL-NEXT: s_mov_b64 exec, 0
4451
; GISEL-NEXT: s_endpgm
4552
%a = load i32, ptr %src, align 4
@@ -81,14 +88,21 @@ define void @test_kill_block_order(ptr %src, ptr %dst, i1 %c) {
8188
; GISEL-NEXT: s_mov_b64 s[4:5], exec
8289
; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc
8390
; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
84-
; GISEL-NEXT: s_cbranch_scc0 .LBB1_2
91+
; GISEL-NEXT: s_cbranch_scc0 .LBB1_4
8592
; GISEL-NEXT: ; %bb.1:
8693
; GISEL-NEXT: s_and_b64 exec, exec, s[4:5]
94+
; GISEL-NEXT: ; %bb.2: ; %cont
8795
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8896
; GISEL-NEXT: flat_store_dword v[2:3], v0
8997
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9098
; GISEL-NEXT: s_setpc_b64 s[30:31]
91-
; GISEL-NEXT: .LBB1_2:
99+
; GISEL-NEXT: .LBB1_3: ; Inline asm indirect target
100+
; GISEL-NEXT: ; %kill
101+
; GISEL-NEXT: ; Label of block must be emitted
102+
; GISEL-NEXT: ; divergent unreachable
103+
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
104+
; GISEL-NEXT: s_setpc_b64 s[30:31]
105+
; GISEL-NEXT: .LBB1_4:
92106
; GISEL-NEXT: s_mov_b64 exec, 0
93107
; GISEL-NEXT: s_endpgm
94108
%a = load i32, ptr %src, align 4

0 commit comments

Comments
 (0)