Skip to content

Commit afd608c

Browse files
committed
[Inliner] Add support for preserving nocapture param attr
Currently if we have: ``` define @foo(ptr nocapture %p) { entry: ... bar(ptr %p) ... } ``` When inlining `foo`, we will lose the `nocapture` on `%p` which might not be recoverable. The goal of this patch is to preserve the `nocapture` if some conservative analysis indicates we can. 1) Return value of `bar` is either unused or only used as return of `foo` (this rules of capture via return). 2) No `alloca` (or scratch memory of any sort) in `foo` s.t there is a path from `entry` to `bar` that goes an `alloca`. This helps rule out `bar` capturing `%p` in memory in a way that wouldn't be capturing outside of the scope of `foo`. 3) No paths in `foo` that go through `bar` have any instructions with side-effects other than `bar`. This rules out `bar` capturing `%p` in memory, but then some later instructions clearing the memory capture s.t `nocapture` in `foo` still holds. It also rules out some function (i.e `malloc`) creating scratch memory that `bar` could capture `%p` in but still only visible in the scope of `foo`. Ultimately these three checks are highly conservative, but should allow some preservation.
1 parent a08347a commit afd608c

File tree

2 files changed

+118
-7
lines changed

2 files changed

+118
-7
lines changed

llvm/lib/Transforms/Utils/InlineFunction.cpp

Lines changed: 113 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
#include "llvm/Support/Casting.h"
6969
#include "llvm/Support/CommandLine.h"
7070
#include "llvm/Support/ErrorHandling.h"
71+
#include "llvm/Support/ModRef.h"
7172
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
7273
#include "llvm/Transforms/Utils/Cloning.h"
7374
#include "llvm/Transforms/Utils/Local.h"
@@ -1364,6 +1365,104 @@ static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
13641365
++BeginIt, End->getIterator(), InlinerAttributeWindow + 1);
13651366
}
13661367

1368+
template <typename RangeT> static bool ContainsSideEffects(RangeT Range) {
1369+
// Any instruction that may clear local scratch space CB stored
1370+
// into.
1371+
return any_of(Range, [](Instruction &I) { return I.mayHaveSideEffects(); });
1372+
}
1373+
1374+
template <typename RangeT> static bool ContainsScratchSpace(RangeT Range) {
1375+
return any_of(Range, [](Instruction &I) {
1376+
// Any instruction that may create local scratch space CB can store
1377+
// into.
1378+
return I.mayHaveSideEffects() || isa<AllocaInst>(&I);
1379+
});
1380+
}
1381+
1382+
template <typename NextFn, typename CheckFn>
1383+
static bool CheckPathFromBBRecurse(DenseMap<BasicBlock *, bool> &CachedRes,
1384+
bool First, BasicBlock *BB, NextFn Next,
1385+
CheckFn Check) {
1386+
if (!First) {
1387+
// Initialize to true (okay to propagate) `nocapture`. This means that loops
1388+
// will be okay.
1389+
auto [Iter, Inserted] = CachedRes.try_emplace(BB, true);
1390+
// If we already have a result, return it.
1391+
if (!Inserted)
1392+
return Iter->second;
1393+
1394+
if (!Check(BB->instructionsWithoutDebug())) {
1395+
Iter->second = false;
1396+
return false;
1397+
}
1398+
}
1399+
auto NextBBs = Next(BB);
1400+
// Check all Succs/Preds
1401+
for (BasicBlock *NextBB : NextBBs) {
1402+
if (!CheckPathFromBBRecurse(CachedRes, /*First=*/false, NextBB, Next,
1403+
Check)) {
1404+
CachedRes[BB] = false;
1405+
return false;
1406+
}
1407+
}
1408+
1409+
return true;
1410+
}
1411+
1412+
// Assuming we have:
1413+
// define @foo(ptr nocapture %p) {
1414+
// entry:
1415+
// ...
1416+
// bar (ptr %p)
1417+
// ...
1418+
// }
1419+
//
1420+
// Determine if we can propagate `nocapture` to the `%p` at the
1421+
// `bar`.
1422+
static bool
1423+
CanPropagateNoCaptureAtCB(DenseMap<BasicBlock *, bool> &PureFromBB,
1424+
DenseMap<BasicBlock *, bool> &NoLocalStateToBB,
1425+
BasicBlock *BB, CallBase *CB) {
1426+
// If CB returns and its used by anything other than `ret`, assume it may be
1427+
// capturing.
1428+
// Potential TODO: We could allow many operations.
1429+
if (!CB->getType()->isVoidTy())
1430+
for (auto Use : CB->users())
1431+
if (!isa<ReturnInst>(Use))
1432+
return false;
1433+
1434+
// Can't capture via return, so if no side-effects we are set.
1435+
if (!CB->mayHaveSideEffects())
1436+
return true;
1437+
1438+
auto It = CB->getIterator();
1439+
++It;
1440+
1441+
// Check that CB instruction with side-effects on all paths from
1442+
// `entry` that go through the CB and there are no `alloca`
1443+
// instructions. This accomplishes two things. 1) It ensures that
1444+
// after CB, there is no way a store/other could "clean up" any
1445+
// captures from CB. 2) There is no local state (i.e `alloca` or a
1446+
// local `malloc`) that could CB could have stored in params in.
1447+
if (ContainsSideEffects(make_range(It, BB->end())) ||
1448+
ContainsScratchSpace(make_range(BB->begin(), CB->getIterator())))
1449+
return false;
1450+
1451+
if (!CheckPathFromBBRecurse(
1452+
PureFromBB, /*First=*/true, BB,
1453+
[](BasicBlock *CheckedBB) { return successors(CheckedBB); },
1454+
[](const auto &Region) { return !ContainsSideEffects(Region); }))
1455+
return false;
1456+
1457+
if (!CheckPathFromBBRecurse(
1458+
PureFromBB, /*First=*/true, BB,
1459+
[](BasicBlock *CheckedBB) { return predecessors(CheckedBB); },
1460+
[](const auto &Region) { return !ContainsScratchSpace(Region); }))
1461+
return false;
1462+
1463+
return true;
1464+
}
1465+
13671466
// Add attributes from CB params and Fn attributes that can always be propagated
13681467
// to the corresponding argument / inner callbases.
13691468
static void AddParamAndFnBasicAttributes(const CallBase &CB,
@@ -1376,6 +1475,9 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
13761475
SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
13771476
bool HasAttrToPropagate = false;
13781477

1478+
DenseMap<BasicBlock *, bool> PureFromBB{};
1479+
DenseMap<BasicBlock *, bool> NoLocalStateToBB{};
1480+
13791481
// Attributes we can only propagate if the exact parameter is forwarded.
13801482
// We can propagate both poison generating and UB generating attributes
13811483
// without any extra checks. The only attribute that is tricky to propagate
@@ -1394,6 +1496,8 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
13941496
ValidObjParamAttrs.back().addAttribute(Attribute::ReadNone);
13951497
if (CB.paramHasAttr(I, Attribute::ReadOnly))
13961498
ValidObjParamAttrs.back().addAttribute(Attribute::ReadOnly);
1499+
if (CB.doesNotCapture(I))
1500+
ValidObjParamAttrs.back().addCapturesAttr(CaptureInfo::none());
13971501

13981502
for (Attribute::AttrKind AK : ExactAttrsToPropagate) {
13991503
Attribute Attr = CB.getParamAttr(I, AK);
@@ -1478,9 +1582,16 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
14781582
continue;
14791583
}
14801584

1481-
// If so, propagate its access attributes.
1482-
AL = AL.addParamAttributes(Context, I, ValidObjParamAttrs[ArgNo]);
1585+
AttributeSet AS = AttributeSet::get(Context, ValidObjParamAttrs[ArgNo]);
1586+
// Check if we can propagate `captures(none)`.
1587+
if (capturesNothing(AS.getCaptureInfo()) &&
1588+
(NewInnerCB->doesNotCapture(I) ||
1589+
!CanPropagateNoCaptureAtCB(PureFromBB, NoLocalStateToBB, &BB,
1590+
cast<CallBase>(&Ins))))
1591+
AS = AS.removeAttribute(Context, Attribute::Captures);
14831592

1593+
// If so, propagate its access attributes.
1594+
AL = AL.addParamAttributes(Context, I, AttrBuilder{Context, AS});
14841595
// We can have conflicting attributes from the inner callsite and
14851596
// to-be-inlined callsite. In that case, choose the most
14861597
// restrictive.

llvm/test/Transforms/Inline/prop-nocapture.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ define void @simple_nocapture_prop(ptr captures(none) %p) {
2020
define void @simple_nocapture_prop_caller(ptr %p) {
2121
; CHECK-LABEL: define {{[^@]+}}@simple_nocapture_prop_caller
2222
; CHECK-SAME: (ptr [[P:%.*]]) {
23-
; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
23+
; CHECK-NEXT: call void @void.call.p0(ptr captures(none) [[P]])
2424
; CHECK-NEXT: ret void
2525
;
2626
call void @simple_nocapture_prop(ptr %p)
@@ -40,7 +40,7 @@ define i32 @nocapture_with_return_prop(ptr captures(none) %p) {
4040
define i32 @nocapture_with_return_prop_caller(ptr %p) {
4141
; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_caller
4242
; CHECK-SAME: (ptr [[P:%.*]]) {
43-
; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]])
43+
; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr captures(none) [[P]])
4444
; CHECK-NEXT: ret i32 [[R_I]]
4545
;
4646
%r = call i32 @nocapture_with_return_prop(ptr %p)
@@ -193,7 +193,7 @@ define void @nocapture_prop_okay_seperate_alloca_caller(ptr %p, i1 %c) {
193193
; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
194194
; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT:%.*]]
195195
; CHECK: F.i:
196-
; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
196+
; CHECK-NEXT: call void @void.call.p0(ptr captures(none) [[P]])
197197
; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
198198
; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT]]
199199
; CHECK: nocapture_prop_okay_seperate_alloca.exit:
@@ -286,10 +286,10 @@ F:
286286
define i32 @nocapture_prop_okay_no_sideeffects_caller(ptr %p, i1 %c) {
287287
; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects_caller
288288
; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
289-
; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
289+
; CHECK-NEXT: call void @void.call.p0(ptr captures(none) [[P]])
290290
; CHECK-NEXT: br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
291291
; CHECK: T.i:
292-
; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3]]
292+
; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr captures(none) [[P]]) #[[ATTR3]]
293293
; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT:%.*]]
294294
; CHECK: F.i:
295295
; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT]]

0 commit comments

Comments
 (0)