Skip to content

Commit da042e2

Browse files
ro-ikcloudy0717
authored andcommitted
[AMDGPU][GlobalISel] Fix / workaround amdgcn.kill/.unreachable lowering (llvm#170639)
cf. llvm#133907 (comment)
1 parent 93b5e63 commit da042e2

File tree

2 files changed

+35
-9
lines changed

2 files changed

+35
-9
lines changed

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3093,13 +3093,25 @@ bool IRTranslator::translateCallBr(const User &U,
30933093

30943094
// Update successor info.
30953095
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
3096-
// TODO: For most of the cases where there is an intrinsic callbr, we're
3097-
// having exactly one indirect target, which will be unreachable. As soon as
3098-
// this changes, we might need to enhance
3099-
// Target->setIsInlineAsmBrIndirectTarget or add something similar for
3100-
// intrinsic indirect branches.
3096+
3097+
// Add indirect targets as successors. For intrinsic callbr, these represent
3098+
// implicit control flow (e.g., the "kill" path for amdgcn.kill). We mark them
3099+
// with setIsInlineAsmBrIndirectTarget so the machine verifier accepts them as
3100+
// valid successors, even though they're not from inline asm.
3101+
for (BasicBlock *Dest : I.getIndirectDests()) {
3102+
MachineBasicBlock &Target = getMBB(*Dest);
3103+
Target.setIsInlineAsmBrIndirectTarget();
3104+
Target.setLabelMustBeEmitted();
3105+
// Don't add duplicate machine successors.
3106+
if (Dests.insert(Dest).second)
3107+
addSuccessorWithProb(CallBrMBB, &Target, BranchProbability::getZero());
3108+
}
3109+
31013110
CallBrMBB->normalizeSuccProbs();
31023111

3112+
// Drop into default successor.
3113+
MIRBuilder.buildBr(*Return);
3114+
31033115
return true;
31043116
}
31053117

llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,21 @@ define void @test_kill(ptr %src, ptr %dst, i1 %c) {
3232
; GISEL-NEXT: s_mov_b64 s[4:5], exec
3333
; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc
3434
; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
35-
; GISEL-NEXT: s_cbranch_scc0 .LBB0_2
35+
; GISEL-NEXT: s_cbranch_scc0 .LBB0_4
3636
; GISEL-NEXT: ; %bb.1:
3737
; GISEL-NEXT: s_and_b64 exec, exec, s[4:5]
38+
; GISEL-NEXT: ; %bb.2: ; %cont
3839
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3940
; GISEL-NEXT: flat_store_dword v[2:3], v0
4041
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
4142
; GISEL-NEXT: s_setpc_b64 s[30:31]
42-
; GISEL-NEXT: .LBB0_2:
43+
; GISEL-NEXT: .LBB0_3: ; Inline asm indirect target
44+
; GISEL-NEXT: ; %kill
45+
; GISEL-NEXT: ; Label of block must be emitted
46+
; GISEL-NEXT: ; divergent unreachable
47+
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
48+
; GISEL-NEXT: s_setpc_b64 s[30:31]
49+
; GISEL-NEXT: .LBB0_4:
4350
; GISEL-NEXT: s_mov_b64 exec, 0
4451
; GISEL-NEXT: s_endpgm
4552
%a = load i32, ptr %src, align 4
@@ -81,14 +88,21 @@ define void @test_kill_block_order(ptr %src, ptr %dst, i1 %c) {
8188
; GISEL-NEXT: s_mov_b64 s[4:5], exec
8289
; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc
8390
; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
84-
; GISEL-NEXT: s_cbranch_scc0 .LBB1_2
91+
; GISEL-NEXT: s_cbranch_scc0 .LBB1_4
8592
; GISEL-NEXT: ; %bb.1:
8693
; GISEL-NEXT: s_and_b64 exec, exec, s[4:5]
94+
; GISEL-NEXT: ; %bb.2: ; %cont
8795
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
8896
; GISEL-NEXT: flat_store_dword v[2:3], v0
8997
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
9098
; GISEL-NEXT: s_setpc_b64 s[30:31]
91-
; GISEL-NEXT: .LBB1_2:
99+
; GISEL-NEXT: .LBB1_3: ; Inline asm indirect target
100+
; GISEL-NEXT: ; %kill
101+
; GISEL-NEXT: ; Label of block must be emitted
102+
; GISEL-NEXT: ; divergent unreachable
103+
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
104+
; GISEL-NEXT: s_setpc_b64 s[30:31]
105+
; GISEL-NEXT: .LBB1_4:
92106
; GISEL-NEXT: s_mov_b64 exec, 0
93107
; GISEL-NEXT: s_endpgm
94108
%a = load i32, ptr %src, align 4

0 commit comments

Comments
 (0)