Skip to content

Commit e7c7a29

Browse files
committed
[Inliner] Add support for preserving nocapture param attr
Currently if we have: ``` define @foo(ptr nocapture %p) { entry: ... bar(ptr %p) ... } ``` When inlining `foo`, we will lose the `nocapture` on `%p` which might not be recoverable. The goal of this patch is to preserve the `nocapture` if some conservative analysis indicates we can. 1) Return value of `bar` is either unused or only used as return of `foo` (this rules of capture via return). 2) No `alloca` (or scratch memory of any sort) in `foo` s.t there is a path from `entry` to `bar` that goes an `alloca`. This helps rule out `bar` capturing `%p` in memory in a way that wouldn't be capturing outside of the scope of `foo`. 3) No paths in `foo` that go through `bar` have any instructions with side-effects other than `bar`. This rules out `bar` capturing `%p` in memory, but then some later instructions clearing the memory capture s.t `nocapture` in `foo` still holds. It also rules out some function (i.e `malloc`) creating scratch memory that `bar` could capture `%p` in but still only visible in the scope of `foo`. Ultimately these three checks are highly conservative, but should allow some preservation.
1 parent 8d91f62 commit e7c7a29

File tree

2 files changed

+117
-7
lines changed

2 files changed

+117
-7
lines changed

llvm/lib/Transforms/Utils/InlineFunction.cpp

Lines changed: 112 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,6 +1363,104 @@ static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
13631363
++BeginIt, End->getIterator(), InlinerAttributeWindow + 1);
13641364
}
13651365

1366+
template <typename RangeT> static bool ContainsSideEffects(RangeT Range) {
1367+
// Any instruction that may clear local scratch space CB stored
1368+
// into.
1369+
return any_of(Range, [](Instruction &I) { return I.mayHaveSideEffects(); });
1370+
}
1371+
1372+
template <typename RangeT> static bool ContainsScratchSpace(RangeT Range) {
1373+
return any_of(Range, [](Instruction &I) {
1374+
// Any instruction that may create local scratch space CB can store
1375+
// into.
1376+
return I.mayHaveSideEffects() || isa<AllocaInst>(&I);
1377+
});
1378+
}
1379+
1380+
template <typename NextFn, typename CheckFn>
1381+
static bool CheckPathFromBBRecurse(DenseMap<BasicBlock *, bool> &CachedRes,
1382+
bool First, BasicBlock *BB, NextFn Next,
1383+
CheckFn Check) {
1384+
if (!First) {
1385+
// Initialize to true (okay to propagate) `nocapture`. This means that loops
1386+
// will be okay.
1387+
auto [Iter, Inserted] = CachedRes.try_emplace(BB, true);
1388+
// If we already have a result, return it.
1389+
if (!Inserted)
1390+
return Iter->second;
1391+
1392+
if (!Check(BB->instructionsWithoutDebug())) {
1393+
Iter->second = false;
1394+
return false;
1395+
}
1396+
}
1397+
auto NextBBs = Next(BB);
1398+
// Check all Succs/Preds
1399+
for (BasicBlock *NextBB : NextBBs) {
1400+
if (!CheckPathFromBBRecurse(CachedRes, /*First=*/false, NextBB, Next,
1401+
Check)) {
1402+
CachedRes[BB] = false;
1403+
return false;
1404+
}
1405+
}
1406+
1407+
return true;
1408+
}
1409+
1410+
// Assuming we have:
1411+
// define @foo(ptr nocapture %p) {
1412+
// entry:
1413+
// ...
1414+
// bar (ptr %p)
1415+
// ...
1416+
// }
1417+
//
1418+
// Determine if we can propagate `nocapture` to the `%p` at the
1419+
// `bar`.
1420+
static bool
1421+
CanPropagateNoCaptureAtCB(DenseMap<BasicBlock *, bool> &PureFromBB,
1422+
DenseMap<BasicBlock *, bool> &NoLocalStateToBB,
1423+
BasicBlock *BB, CallBase *CB) {
1424+
// If CB returns and its used by anything other than `ret`, assume it may be
1425+
// capturing.
1426+
// Potential TODO: We could allow many operations.
1427+
if (!CB->getType()->isVoidTy())
1428+
for (auto Use : CB->users())
1429+
if (!isa<ReturnInst>(Use))
1430+
return false;
1431+
1432+
// Can't capture via return, so if no side-effects we are set.
1433+
if (!CB->mayHaveSideEffects())
1434+
return true;
1435+
1436+
auto It = CB->getIterator();
1437+
++It;
1438+
1439+
// Check that CB instruction with side-effects on all paths from
1440+
// `entry` that go through the CB and there are no `alloca`
1441+
// instructions. This accomplishes two things. 1) It ensures that
1442+
// after CB, there is no way a store/other could "clean up" any
1443+
// captures from CB. 2) There is no local state (i.e `alloca` or a
1444+
// local `malloc`) that could CB could have stored in params in.
1445+
if (ContainsSideEffects(make_range(It, BB->end())) ||
1446+
ContainsScratchSpace(make_range(BB->begin(), CB->getIterator())))
1447+
return false;
1448+
1449+
if (!CheckPathFromBBRecurse(
1450+
PureFromBB, /*First=*/true, BB,
1451+
[](BasicBlock *CheckedBB) { return successors(CheckedBB); },
1452+
[](const auto &Region) { return !ContainsSideEffects(Region); }))
1453+
return false;
1454+
1455+
if (!CheckPathFromBBRecurse(
1456+
PureFromBB, /*First=*/true, BB,
1457+
[](BasicBlock *CheckedBB) { return predecessors(CheckedBB); },
1458+
[](const auto &Region) { return !ContainsScratchSpace(Region); }))
1459+
return false;
1460+
1461+
return true;
1462+
}
1463+
13661464
// Add attributes from CB params and Fn attributes that can always be propagated
13671465
// to the corresponding argument / inner callbases.
13681466
static void AddParamAndFnBasicAttributes(const CallBase &CB,
@@ -1375,6 +1473,9 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
13751473
SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
13761474
bool HasAttrToPropagate = false;
13771475

1476+
DenseMap<BasicBlock *, bool> PureFromBB{};
1477+
DenseMap<BasicBlock *, bool> NoLocalStateToBB{};
1478+
13781479
// Attributes we can only propagate if the exact parameter is forwarded.
13791480
// We can propagate both poison generating and UB generating attributes
13801481
// without any extra checks. The only attribute that is tricky to propagate
@@ -1393,6 +1494,8 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
13931494
ValidObjParamAttrs.back().addAttribute(Attribute::ReadNone);
13941495
if (CB.paramHasAttr(I, Attribute::ReadOnly))
13951496
ValidObjParamAttrs.back().addAttribute(Attribute::ReadOnly);
1497+
if (CB.paramHasAttr(I, Attribute::NoCapture))
1498+
ValidObjParamAttrs.back().addAttribute(Attribute::NoCapture);
13961499

13971500
for (Attribute::AttrKind AK : ExactAttrsToPropagate) {
13981501
Attribute Attr = CB.getParamAttr(I, AK);
@@ -1477,9 +1580,16 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
14771580
continue;
14781581
}
14791582

1480-
// If so, propagate its access attributes.
1481-
AL = AL.addParamAttributes(Context, I, ValidObjParamAttrs[ArgNo]);
1583+
AttributeSet AS = AttributeSet::get(Context, ValidObjParamAttrs[ArgNo]);
1584+
// Check if we can propagate `nocapture`.
1585+
if (AS.hasAttribute(Attribute::NoCapture) &&
1586+
(NewInnerCB->paramHasAttr(I, Attribute::NoCapture) ||
1587+
!CanPropagateNoCaptureAtCB(PureFromBB, NoLocalStateToBB, &BB,
1588+
cast<CallBase>(&Ins))))
1589+
AS = AS.removeAttribute(Context, Attribute::NoCapture);
14821590

1591+
// If so, propagate its access attributes.
1592+
AL = AL.addParamAttributes(Context, I, AttrBuilder{Context, AS});
14831593
// We can have conflicting attributes from the inner callsite and
14841594
// to-be-inlined callsite. In that case, choose the most
14851595
// restrictive.

llvm/test/Transforms/Inline/prop-nocapture.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ define void @simple_nocapture_prop(ptr nocapture %p) {
2020
define void @simple_nocapture_prop_caller(ptr %p) {
2121
; CHECK-LABEL: define {{[^@]+}}@simple_nocapture_prop_caller
2222
; CHECK-SAME: (ptr [[P:%.*]]) {
23-
; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
23+
; CHECK-NEXT: call void @void.call.p0(ptr nocapture [[P]])
2424
; CHECK-NEXT: ret void
2525
;
2626
call void @simple_nocapture_prop(ptr %p)
@@ -40,7 +40,7 @@ define i32 @nocapture_with_return_prop(ptr nocapture %p) {
4040
define i32 @nocapture_with_return_prop_caller(ptr %p) {
4141
; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_caller
4242
; CHECK-SAME: (ptr [[P:%.*]]) {
43-
; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]])
43+
; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr nocapture [[P]])
4444
; CHECK-NEXT: ret i32 [[R_I]]
4545
;
4646
%r = call i32 @nocapture_with_return_prop(ptr %p)
@@ -193,7 +193,7 @@ define void @nocapture_prop_okay_seperate_alloca_caller(ptr %p, i1 %c) {
193193
; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
194194
; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT:%.*]]
195195
; CHECK: F.i:
196-
; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
196+
; CHECK-NEXT: call void @void.call.p0(ptr nocapture [[P]])
197197
; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
198198
; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT]]
199199
; CHECK: nocapture_prop_okay_seperate_alloca.exit:
@@ -286,10 +286,10 @@ F:
286286
define i32 @nocapture_prop_okay_no_sideeffects_caller(ptr %p, i1 %c) {
287287
; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects_caller
288288
; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
289-
; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
289+
; CHECK-NEXT: call void @void.call.p0(ptr nocapture [[P]])
290290
; CHECK-NEXT: br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
291291
; CHECK: T.i:
292-
; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3]]
292+
; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr nocapture [[P]]) #[[ATTR3]]
293293
; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT:%.*]]
294294
; CHECK: F.i:
295295
; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT]]

0 commit comments

Comments
 (0)