Skip to content

Commit e84fdbe

Browse files
authored
[IR] Add CallBr intrinsics support (#133907)
This commit adds support for using intrinsics with callbr. The uses of this will most of the time look like this example: ```llvm callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill] kill: unreachable cont: ... ```
1 parent 8993c93 commit e84fdbe

File tree

9 files changed

+321
-34
lines changed

9 files changed

+321
-34
lines changed

llvm/docs/LangRef.rst

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9861,8 +9861,12 @@ The '``callbr``' instruction causes control to transfer to a specified
98619861
function, with the possibility of control flow transfer to either the
98629862
'``fallthrough``' label or one of the '``indirect``' labels.
98639863

9864-
This instruction should only be used to implement the "goto" feature of gcc
9865-
style inline assembly. Any other usage is an error in the IR verifier.
9864+
This instruction can currently only be used
9865+
9866+
#. to implement the "goto" feature of gcc style inline assembly or
9867+
#. to call selected intrinsics.
9868+
9869+
Any other usage is an error in the IR verifier.
98669870

98679871
Note that in order to support outputs along indirect edges, LLVM may need to
98689872
split critical edges, which may require synthesizing a replacement block for
@@ -9911,7 +9915,7 @@ This instruction requires several arguments:
99119915
indicates the function accepts a variable number of arguments, the
99129916
extra arguments can be specified.
99139917
#. '``fallthrough label``': the label reached when the inline assembly's
9914-
execution exits the bottom.
9918+
execution exits the bottom / the intrinsic call returns.
99159919
#. '``indirect labels``': the labels reached when a callee transfers control
99169920
to a location other than the '``fallthrough label``'. Label constraints
99179921
refer to these destinations.
@@ -9929,9 +9933,12 @@ flow goes after the call.
99299933
The output values of a '``callbr``' instruction are available both in the
99309934
the '``fallthrough``' block, and any '``indirect``' blocks(s).
99319935

9932-
The only use of this today is to implement the "goto" feature of gcc inline
9933-
assembly where additional labels can be provided as locations for the inline
9934-
assembly to jump to.
9936+
The only current uses of this are:
9937+
9938+
#. implement the "goto" feature of gcc inline assembly where additional
9939+
labels can be provided as locations for the inline assembly to jump to.
9940+
#. support selected intrinsics which manipulate control flow and should
9941+
be chained to specific terminators, such as '``unreachable``'.
99359942

99369943
Example:
99379944
""""""""
@@ -9946,6 +9953,14 @@ Example:
99469953
<result> = callbr i32 asm "", "=r,r,!i"(i32 %x)
99479954
to label %fallthrough [label %indirect]
99489955

9956+
; intrinsic which should be followed by unreachable (the order of the
9957+
; blocks after the callbr instruction doesn't matter)
9958+
callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
9959+
cont:
9960+
...
9961+
kill:
9962+
unreachable
9963+
99499964
.. _i_resume:
99509965

99519966
'``resume``' Instruction

llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,8 @@ class IRTranslator : public MachineFunctionPass {
317317
bool translateInvoke(const User &U, MachineIRBuilder &MIRBuilder);
318318

319319
bool translateCallBr(const User &U, MachineIRBuilder &MIRBuilder);
320+
bool translateCallBrIntrinsic(const CallBrInst &I,
321+
MachineIRBuilder &MIRBuilder);
320322

321323
bool translateLandingPad(const User &U, MachineIRBuilder &MIRBuilder);
322324

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2836,7 +2836,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
28362836
IsTgtMemIntrinsic ? &Info : nullptr);
28372837
}
28382838

2839-
/// Translate a call to an intrinsic.
2839+
/// Translate a call or callbr to an intrinsic.
28402840
/// Depending on whether TLI->getTgtMemIntrinsic() is true, TgtMemIntrinsicInfo
28412841
/// is a pointer to the correspondingly populated IntrinsicInfo object.
28422842
/// Otherwise, this pointer is null.
@@ -3067,10 +3067,40 @@ bool IRTranslator::translateInvoke(const User &U,
30673067
return true;
30683068
}
30693069

3070+
/// The intrinsics currently supported by callbr are implicit control flow
3071+
/// intrinsics such as amdgcn.kill.
30703072
bool IRTranslator::translateCallBr(const User &U,
30713073
MachineIRBuilder &MIRBuilder) {
3072-
// FIXME: Implement this.
3073-
return false;
3074+
if (containsBF16Type(U))
3075+
return false; // see translateCall
3076+
3077+
const CallBrInst &I = cast<CallBrInst>(U);
3078+
MachineBasicBlock *CallBrMBB = &MIRBuilder.getMBB();
3079+
3080+
Intrinsic::ID IID = I.getIntrinsicID();
3081+
if (I.isInlineAsm()) {
3082+
// FIXME: inline asm is not yet supported for callbr in GlobalISel. As soon
3083+
// as we add support, we need to handle the indirect asm targets, see
3084+
// SelectionDAGBuilder::visitCallBr().
3085+
return false;
3086+
}
3087+
if (!translateIntrinsic(I, IID, MIRBuilder))
3088+
return false;
3089+
3090+
// Retrieve successors.
3091+
SmallPtrSet<BasicBlock *, 8> Dests = {I.getDefaultDest()};
3092+
MachineBasicBlock *Return = &getMBB(*I.getDefaultDest());
3093+
3094+
// Update successor info.
3095+
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
3096+
// TODO: For most of the cases where there is an intrinsic callbr, we're
3097+
// having exactly one indirect target, which will be unreachable. As soon as
3098+
// this changes, we might need to enhance
3099+
// Target->setIsInlineAsmBrIndirectTarget or add something similar for
3100+
// intrinsic indirect branches.
3101+
CallBrMBB->normalizeSuccProbs();
3102+
3103+
return true;
30743104
}
30753105

30763106
bool IRTranslator::translateLandingPad(const User &U,

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 56 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3505,16 +3505,46 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
35053505
DAG.getBasicBlock(Return)));
35063506
}
35073507

3508+
/// The intrinsics currently supported by callbr are implicit control flow
3509+
/// intrinsics such as amdgcn.kill.
3510+
/// - they should be called (no "dontcall-" attributes)
3511+
/// - they do not touch memory on the target (= !TLI.getTgtMemIntrinsic())
3512+
/// - they do not need custom argument handling (no
3513+
/// TLI.CollectTargetIntrinsicOperands())
3514+
void SelectionDAGBuilder::visitCallBrIntrinsic(const CallBrInst &I) {
3515+
TargetLowering::IntrinsicInfo Info;
3516+
assert(!DAG.getTargetLoweringInfo().getTgtMemIntrinsic(
3517+
Info, I, DAG.getMachineFunction(), I.getIntrinsicID()) &&
3518+
"Intrinsic touches memory");
3519+
3520+
auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I);
3521+
3522+
SmallVector<SDValue, 8> Ops =
3523+
getTargetIntrinsicOperands(I, HasChain, OnlyLoad);
3524+
SDVTList VTs = getTargetIntrinsicVTList(I, HasChain);
3525+
3526+
// Create the node.
3527+
SDValue Result =
3528+
getTargetNonMemIntrinsicNode(*I.getType(), HasChain, Ops, VTs);
3529+
Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result);
3530+
3531+
setValue(&I, Result);
3532+
}
3533+
35083534
void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
35093535
MachineBasicBlock *CallBrMBB = FuncInfo.MBB;
35103536

3511-
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
3512-
// have to do anything here to lower funclet bundles.
3513-
failForInvalidBundles(I, "callbrs",
3514-
{LLVMContext::OB_deopt, LLVMContext::OB_funclet});
3515-
3516-
assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
3517-
visitInlineAsm(I);
3537+
if (I.isInlineAsm()) {
3538+
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
3539+
// have to do anything here to lower funclet bundles.
3540+
failForInvalidBundles(I, "callbrs",
3541+
{LLVMContext::OB_deopt, LLVMContext::OB_funclet});
3542+
visitInlineAsm(I);
3543+
} else {
3544+
assert(!I.hasOperandBundles() &&
3545+
"Can't have operand bundles for intrinsics");
3546+
visitCallBrIntrinsic(I);
3547+
}
35183548
CopyToExportRegsIfNeeded(&I);
35193549

35203550
// Retrieve successors.
@@ -3524,18 +3554,25 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
35243554

35253555
// Update successor info.
35263556
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
3527-
for (BasicBlock *Dest : I.getIndirectDests()) {
3528-
MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
3529-
Target->setIsInlineAsmBrIndirectTarget();
3530-
// If we introduce a type of asm goto statement that is permitted to use an
3531-
// indirect call instruction to jump to its labels, then we should add a
3532-
// call to Target->setMachineBlockAddressTaken() here, to mark the target
3533-
// block as requiring a BTI.
3534-
3535-
Target->setLabelMustBeEmitted();
3536-
// Don't add duplicate machine successors.
3537-
if (Dests.insert(Dest).second)
3538-
addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
3557+
// TODO: For most of the cases where there is an intrinsic callbr, we're
3558+
// having exactly one indirect target, which will be unreachable. As soon as
3559+
// this changes, we might need to enhance
3560+
// Target->setIsInlineAsmBrIndirectTarget or add something similar for
3561+
// intrinsic indirect branches.
3562+
if (I.isInlineAsm()) {
3563+
for (BasicBlock *Dest : I.getIndirectDests()) {
3564+
MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
3565+
Target->setIsInlineAsmBrIndirectTarget();
3566+
// If we introduce a type of asm goto statement that is permitted to use
3567+
// an indirect call instruction to jump to its labels, then we should add
3568+
// a call to Target->setMachineBlockAddressTaken() here, to mark the
3569+
// target block as requiring a BTI.
3570+
3571+
Target->setLabelMustBeEmitted();
3572+
// Don't add duplicate machine successors.
3573+
if (Dests.insert(Dest).second)
3574+
addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
3575+
}
35393576
}
35403577
CallBrMBB->normalizeSuccProbs();
35413578

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -556,10 +556,12 @@ class SelectionDAGBuilder {
556556
private:
557557
// These all get lowered before this pass.
558558
void visitInvoke(const InvokeInst &I);
559-
void visitCallBr(const CallBrInst &I);
560559
void visitCallBrLandingPad(const CallInst &I);
561560
void visitResume(const ResumeInst &I);
562561

562+
void visitCallBr(const CallBrInst &I);
563+
void visitCallBrIntrinsic(const CallBrInst &I);
564+
563565
void visitUnary(const User &I, unsigned Opcode);
564566
void visitFNeg(const User &I) { visitUnary(I, ISD::FNEG); }
565567

llvm/lib/IR/Verifier.cpp

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3446,11 +3446,34 @@ void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
34463446
}
34473447

34483448
void Verifier::visitCallBrInst(CallBrInst &CBI) {
3449-
Check(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!", &CBI);
3450-
const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand());
3451-
Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
3449+
if (!CBI.isInlineAsm()) {
3450+
Check(CBI.getCalledFunction(),
3451+
"Callbr: indirect function / invalid signature");
3452+
Check(!CBI.hasOperandBundles(),
3453+
"Callbr for intrinsics currently doesn't support operand bundles");
3454+
3455+
switch (CBI.getIntrinsicID()) {
3456+
case Intrinsic::amdgcn_kill: {
3457+
Check(CBI.getNumIndirectDests() == 1,
3458+
"Callbr amdgcn_kill only supports one indirect dest");
3459+
bool Unreachable = isa<UnreachableInst>(CBI.getIndirectDest(0)->begin());
3460+
CallInst *Call = dyn_cast<CallInst>(CBI.getIndirectDest(0)->begin());
3461+
Check(Unreachable || (Call && Call->getIntrinsicID() ==
3462+
Intrinsic::amdgcn_unreachable),
3463+
"Callbr amdgcn_kill indirect dest needs to be unreachable");
3464+
break;
3465+
}
3466+
default:
3467+
CheckFailed(
3468+
"Callbr currently only supports asm-goto and selected intrinsics");
3469+
}
3470+
visitIntrinsicCall(CBI.getIntrinsicID(), CBI);
3471+
} else {
3472+
const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand());
3473+
Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
34523474

3453-
verifyInlineAsmCall(CBI);
3475+
verifyInlineAsmCall(CBI);
3476+
}
34543477
visitTerminator(CBI);
34553478
}
34563479

@@ -5546,7 +5569,7 @@ void Verifier::visitInstruction(Instruction &I) {
55465569
(CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)) ||
55475570
IsAttachedCallOperand(F, CBI, i)),
55485571
"Cannot take the address of an intrinsic!", &I);
5549-
Check(!F->isIntrinsic() || isa<CallInst>(I) ||
5572+
Check(!F->isIntrinsic() || isa<CallInst>(I) || isa<CallBrInst>(I) ||
55505573
F->getIntrinsicID() == Intrinsic::donothing ||
55515574
F->getIntrinsicID() == Intrinsic::seh_try_begin ||
55525575
F->getIntrinsicID() == Intrinsic::seh_try_end ||

llvm/test/Assembler/callbr.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
2+
3+
declare void @llvm.amdgcn.kill(i1)
4+
5+
define void @test_kill(i1 %c) {
6+
; CHECK-LABEL: define void @test_kill(
7+
; CHECK-SAME: i1 [[C:%.*]]) {
8+
; CHECK-NEXT: callbr void @llvm.amdgcn.kill(i1 [[C]])
9+
; CHECK-NEXT: to label %[[CONT:.*]] [label %kill]
10+
; CHECK: [[KILL:.*:]]
11+
; CHECK-NEXT: unreachable
12+
; CHECK: [[CONT]]:
13+
; CHECK-NEXT: ret void
14+
;
15+
callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
16+
kill:
17+
unreachable
18+
cont:
19+
ret void
20+
}
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefix=GISEL %s
4+
5+
define void @test_kill(ptr %src, ptr %dst, i1 %c) {
6+
; CHECK-LABEL: test_kill:
7+
; CHECK: ; %bb.0:
8+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9+
; CHECK-NEXT: flat_load_dword v0, v[0:1]
10+
; CHECK-NEXT: v_and_b32_e32 v1, 1, v4
11+
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
12+
; CHECK-NEXT: s_mov_b64 s[4:5], exec
13+
; CHECK-NEXT: s_andn2_b64 s[6:7], exec, vcc
14+
; CHECK-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
15+
; CHECK-NEXT: s_cbranch_scc0 .LBB0_2
16+
; CHECK-NEXT: ; %bb.1:
17+
; CHECK-NEXT: s_and_b64 exec, exec, s[4:5]
18+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
19+
; CHECK-NEXT: flat_store_dword v[2:3], v0
20+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
21+
; CHECK-NEXT: s_setpc_b64 s[30:31]
22+
; CHECK-NEXT: .LBB0_2:
23+
; CHECK-NEXT: s_mov_b64 exec, 0
24+
; CHECK-NEXT: s_endpgm
25+
;
26+
; GISEL-LABEL: test_kill:
27+
; GISEL: ; %bb.0:
28+
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29+
; GISEL-NEXT: flat_load_dword v0, v[0:1]
30+
; GISEL-NEXT: v_and_b32_e32 v1, 1, v4
31+
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
32+
; GISEL-NEXT: s_mov_b64 s[4:5], exec
33+
; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc
34+
; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
35+
; GISEL-NEXT: s_cbranch_scc0 .LBB0_2
36+
; GISEL-NEXT: ; %bb.1:
37+
; GISEL-NEXT: s_and_b64 exec, exec, s[4:5]
38+
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
39+
; GISEL-NEXT: flat_store_dword v[2:3], v0
40+
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
41+
; GISEL-NEXT: s_setpc_b64 s[30:31]
42+
; GISEL-NEXT: .LBB0_2:
43+
; GISEL-NEXT: s_mov_b64 exec, 0
44+
; GISEL-NEXT: s_endpgm
45+
%a = load i32, ptr %src, align 4
46+
callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
47+
kill:
48+
unreachable
49+
cont:
50+
store i32 %a, ptr %dst, align 4
51+
ret void
52+
}
53+
54+
define void @test_kill_block_order(ptr %src, ptr %dst, i1 %c) {
55+
; CHECK-LABEL: test_kill_block_order:
56+
; CHECK: ; %bb.0:
57+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58+
; CHECK-NEXT: flat_load_dword v0, v[0:1]
59+
; CHECK-NEXT: v_and_b32_e32 v1, 1, v4
60+
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
61+
; CHECK-NEXT: s_mov_b64 s[4:5], exec
62+
; CHECK-NEXT: s_andn2_b64 s[6:7], exec, vcc
63+
; CHECK-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
64+
; CHECK-NEXT: s_cbranch_scc0 .LBB1_2
65+
; CHECK-NEXT: ; %bb.1:
66+
; CHECK-NEXT: s_and_b64 exec, exec, s[4:5]
67+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
68+
; CHECK-NEXT: flat_store_dword v[2:3], v0
69+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
70+
; CHECK-NEXT: s_setpc_b64 s[30:31]
71+
; CHECK-NEXT: .LBB1_2:
72+
; CHECK-NEXT: s_mov_b64 exec, 0
73+
; CHECK-NEXT: s_endpgm
74+
;
75+
; GISEL-LABEL: test_kill_block_order:
76+
; GISEL: ; %bb.0:
77+
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78+
; GISEL-NEXT: flat_load_dword v0, v[0:1]
79+
; GISEL-NEXT: v_and_b32_e32 v1, 1, v4
80+
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
81+
; GISEL-NEXT: s_mov_b64 s[4:5], exec
82+
; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc
83+
; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
84+
; GISEL-NEXT: s_cbranch_scc0 .LBB1_2
85+
; GISEL-NEXT: ; %bb.1:
86+
; GISEL-NEXT: s_and_b64 exec, exec, s[4:5]
87+
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
88+
; GISEL-NEXT: flat_store_dword v[2:3], v0
89+
; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
90+
; GISEL-NEXT: s_setpc_b64 s[30:31]
91+
; GISEL-NEXT: .LBB1_2:
92+
; GISEL-NEXT: s_mov_b64 exec, 0
93+
; GISEL-NEXT: s_endpgm
94+
%a = load i32, ptr %src, align 4
95+
callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
96+
cont:
97+
store i32 %a, ptr %dst, align 4
98+
ret void
99+
kill:
100+
unreachable
101+
}

0 commit comments

Comments
 (0)