Skip to content

Commit c349e96

Browse files
committed
[Inliner] Add support for preserving nocapture param attr
Currently if we have: ``` define @foo(ptr nocapture %p) { entry: ... bar(ptr %p) ... } ``` When inlining `foo`, we will lose the `nocapture` on `%p` which might not be recoverable. The goal of this patch is to preserve the `nocapture` if some conservative analysis indicates we can. 1) Return value of `bar` is either unused or only used as return of `foo` (this rules of capture via return). 2) No `alloca` (or scratch memory of any sort) in `foo` s.t there is a path from `entry` to `bar` that goes an `alloca`. This helps rule out `bar` capturing `%p` in memory in a way that wouldn't be capturing outside of the scope of `foo`. 3) No paths in `foo` that go through `bar` have any instructions with side-effects other than `bar`. This rules out `bar` capturing `%p` in memory, but then some later instructions clearing the memory capture s.t `nocapture` in `foo` still holds. It also rules out some function (i.e `malloc`) creating scratch memory that `bar` could capture `%p` in but still only visible in the scope of `foo`. Ultimately these three checks are highly conservative, but should allow some preservation.
1 parent 3038f55 commit c349e96

File tree

2 files changed

+117
-6
lines changed

2 files changed

+117
-6
lines changed

llvm/lib/Transforms/Utils/InlineFunction.cpp

Lines changed: 112 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1351,6 +1351,104 @@ static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
13511351
++BeginIt, End->getIterator(), InlinerAttributeWindow + 1);
13521352
}
13531353

1354+
template <typename RangeT> static bool ContainsSideEffects(RangeT Range) {
1355+
// Any instruction that may clear local scratch space CB stored
1356+
// into.
1357+
return any_of(Range, [](Instruction &I) { return I.mayHaveSideEffects(); });
1358+
}
1359+
1360+
template <typename RangeT> static bool ContainsScratchSpace(RangeT Range) {
1361+
return any_of(Range, [](Instruction &I) {
1362+
// Any instruction that may create local scratch space CB can store
1363+
// into.
1364+
return I.mayHaveSideEffects() || isa<AllocaInst>(&I);
1365+
});
1366+
}
1367+
1368+
template <typename NextFn, typename CheckFn>
1369+
static bool CheckPathFromBBRecurse(DenseMap<BasicBlock *, bool> &CachedRes,
1370+
bool First, BasicBlock *BB, NextFn Next,
1371+
CheckFn Check) {
1372+
if (!First) {
1373+
// Initialize to true (okay to propagate) `nocapture`. This means that loops
1374+
// will be okay.
1375+
auto [Iter, Inserted] = CachedRes.try_emplace(BB, true);
1376+
// If we already have a result, return it.
1377+
if (!Inserted)
1378+
return Iter->second;
1379+
1380+
if (!Check(BB->instructionsWithoutDebug())) {
1381+
Iter->second = false;
1382+
return false;
1383+
}
1384+
}
1385+
auto NextBBs = Next(BB);
1386+
// Check all Succs/Preds
1387+
for (BasicBlock *NextBB : NextBBs) {
1388+
if (!CheckPathFromBBRecurse(CachedRes, /*First=*/false, NextBB, Next,
1389+
Check)) {
1390+
CachedRes[BB] = false;
1391+
return false;
1392+
}
1393+
}
1394+
1395+
return true;
1396+
}
1397+
1398+
// Assuming we have:
1399+
// define @foo(ptr nocapture %p) {
1400+
// entry:
1401+
// ...
1402+
// bar (ptr %p)
1403+
// ...
1404+
// }
1405+
//
1406+
// Determine if we can propagate `nocapture` to the `%p` at the
1407+
// `bar`.
1408+
static bool
1409+
CanPropagateNoCaptureAtCB(DenseMap<BasicBlock *, bool> &PureFromBB,
1410+
DenseMap<BasicBlock *, bool> &NoLocalStateToBB,
1411+
BasicBlock *BB, CallBase *CB) {
1412+
// If CB returns and its used by anything other than `ret`, assume it may be
1413+
// capturing.
1414+
// Potential TODO: We could allow many operations.
1415+
if (!CB->getType()->isVoidTy())
1416+
for (auto Use : CB->users())
1417+
if (!isa<ReturnInst>(Use))
1418+
return false;
1419+
1420+
// Can't capture via return, so if no side-effects we are set.
1421+
if (!CB->mayHaveSideEffects())
1422+
return true;
1423+
1424+
auto It = CB->getIterator();
1425+
++It;
1426+
1427+
// Check that CB instruction with side-effects on all paths from
1428+
// `entry` that go through the CB and there are no `alloca`
1429+
// instructions. This accomplishes two things. 1) It ensures that
1430+
// after CB, there is no way a store/other could "clean up" any
1431+
// captures from CB. 2) There is no local state (i.e `alloca` or a
1432+
// local `malloc`) that could CB could have stored in params in.
1433+
if (ContainsSideEffects(make_range(It, BB->end())) ||
1434+
ContainsScratchSpace(make_range(BB->begin(), CB->getIterator())))
1435+
return false;
1436+
1437+
if (!CheckPathFromBBRecurse(
1438+
PureFromBB, /*First=*/true, BB,
1439+
[](BasicBlock *CheckedBB) { return successors(CheckedBB); },
1440+
[](const auto &Region) { return !ContainsSideEffects(Region); }))
1441+
return false;
1442+
1443+
if (!CheckPathFromBBRecurse(
1444+
PureFromBB, /*First=*/true, BB,
1445+
[](BasicBlock *CheckedBB) { return predecessors(CheckedBB); },
1446+
[](const auto &Region) { return !ContainsScratchSpace(Region); }))
1447+
return false;
1448+
1449+
return true;
1450+
}
1451+
13541452
// Add attributes from CB params and Fn attributes that can always be propagated
13551453
// to the corresponding argument / inner callbases.
13561454
static void AddParamAndFnBasicAttributes(const CallBase &CB,
@@ -1363,6 +1461,9 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
13631461
SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
13641462
bool HasAttrToPropagate = false;
13651463

1464+
DenseMap<BasicBlock *, bool> PureFromBB{};
1465+
DenseMap<BasicBlock *, bool> NoLocalStateToBB{};
1466+
13661467
// Attributes we can only propagate if the exact parameter is forwarded.
13671468
// We can propagate both poison generating and UB generating attributes
13681469
// without any extra checks. The only attribute that is tricky to propagate
@@ -1381,6 +1482,8 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
13811482
ValidObjParamAttrs.back().addAttribute(Attribute::ReadNone);
13821483
if (CB.paramHasAttr(I, Attribute::ReadOnly))
13831484
ValidObjParamAttrs.back().addAttribute(Attribute::ReadOnly);
1485+
if (CB.paramHasAttr(I, Attribute::NoCapture))
1486+
ValidObjParamAttrs.back().addAttribute(Attribute::NoCapture);
13841487

13851488
for (Attribute::AttrKind AK : ExactAttrsToPropagate) {
13861489
Attribute Attr = CB.getParamAttr(I, AK);
@@ -1463,8 +1566,16 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
14631566
ArgNo = Arg->getArgNo();
14641567
}
14651568

1569+
AttributeSet AS = AttributeSet::get(Context, ValidObjParamAttrs[ArgNo]);
1570+
// Check if we can propagate `nocapture`.
1571+
if (AS.hasAttribute(Attribute::NoCapture) &&
1572+
(NewInnerCB->paramHasAttr(I, Attribute::NoCapture) ||
1573+
!CanPropagateNoCaptureAtCB(PureFromBB, NoLocalStateToBB, &BB,
1574+
cast<CallBase>(&Ins))))
1575+
AS = AS.removeAttribute(Context, Attribute::NoCapture);
1576+
14661577
// If so, propagate its access attributes.
1467-
AL = AL.addParamAttributes(Context, I, ValidObjParamAttrs[ArgNo]);
1578+
AL = AL.addParamAttributes(Context, I, AttrBuilder{Context, AS});
14681579
// We can have conflicting attributes from the inner callsite and
14691580
// to-be-inlined callsite. In that case, choose the most
14701581
// restrictive.

llvm/test/Transforms/Inline/prop-nocapture.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ define void @simple_nocapture_prop(ptr nocapture %p) {
2020
define void @simple_nocapture_prop_caller(ptr %p) {
2121
; CHECK-LABEL: define {{[^@]+}}@simple_nocapture_prop_caller
2222
; CHECK-SAME: (ptr [[P:%.*]]) {
23-
; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
23+
; CHECK-NEXT: call void @void.call.p0(ptr nocapture [[P]])
2424
; CHECK-NEXT: ret void
2525
;
2626
call void @simple_nocapture_prop(ptr %p)
@@ -40,7 +40,7 @@ define i32 @nocapture_with_return_prop(ptr nocapture %p) {
4040
define i32 @nocapture_with_return_prop_caller(ptr %p) {
4141
; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_caller
4242
; CHECK-SAME: (ptr [[P:%.*]]) {
43-
; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]])
43+
; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr nocapture [[P]])
4444
; CHECK-NEXT: ret i32 [[R_I]]
4545
;
4646
%r = call i32 @nocapture_with_return_prop(ptr %p)
@@ -193,7 +193,7 @@ define void @nocapture_prop_okay_seperate_alloca_caller(ptr %p, i1 %c) {
193193
; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
194194
; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT:%.*]]
195195
; CHECK: F.i:
196-
; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
196+
; CHECK-NEXT: call void @void.call.p0(ptr nocapture [[P]])
197197
; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
198198
; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT]]
199199
; CHECK: nocapture_prop_okay_seperate_alloca.exit:
@@ -286,10 +286,10 @@ F:
286286
define i32 @nocapture_prop_okay_no_sideeffects_caller(ptr %p, i1 %c) {
287287
; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects_caller
288288
; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
289-
; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
289+
; CHECK-NEXT: call void @void.call.p0(ptr nocapture [[P]])
290290
; CHECK-NEXT: br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
291291
; CHECK: T.i:
292-
; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3]]
292+
; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr nocapture [[P]]) #[[ATTR3]]
293293
; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT:%.*]]
294294
; CHECK: F.i:
295295
; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT]]

0 commit comments

Comments
 (0)