-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[Inliner] Add support for preserving nocapture param attr
#113418
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[Inliner] Add support for preserving nocapture param attr
#113418
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: None (goldsteinn) ChangesCurrently if we have: When inlining The goal of this patch is to preserve the
Ultimately these three checks are highly conservative, but should Full diff: https://github.com/llvm/llvm-project/pull/113418.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 4ad426285ce2f0..a6ff3c085b8fbb 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1351,6 +1351,104 @@ static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
++BeginIt, End->getIterator(), InlinerAttributeWindow + 1);
}
+template <typename RangeT> static bool ContainsSideEffects(RangeT Range) {
+ // Any instruction that may clear local scratch space CB stored
+ // into.
+ return any_of(Range, [](Instruction &I) { return I.mayHaveSideEffects(); });
+}
+
+template <typename RangeT> static bool ContainsScratchSpace(RangeT Range) {
+ return any_of(Range, [](Instruction &I) {
+ // Any instruction that may create local scratch space CB can store
+ // into.
+ return I.mayHaveSideEffects() || isa<AllocaInst>(&I);
+ });
+}
+
+template <typename NextFn, typename CheckFn>
+static bool CheckPathFromBBRecurse(DenseMap<BasicBlock *, bool> &CachedRes,
+ bool First, BasicBlock *BB, NextFn Next,
+ CheckFn Check) {
+ if (!First) {
+ // Initialize to true (okay to propagate) `nocapture`. This means that loops
+ // will be okay.
+ auto [Iter, Inserted] = CachedRes.try_emplace(BB, true);
+ // If we already have a result, return it.
+ if (!Inserted)
+ return Iter->second;
+
+ if (!Check(BB->instructionsWithoutDebug())) {
+ Iter->second = false;
+ return false;
+ }
+ }
+ auto NextBBs = Next(BB);
+ // Check all Succs/Preds
+ for (BasicBlock *NextBB : NextBBs) {
+ if (!CheckPathFromBBRecurse(CachedRes, /*First=*/false, NextBB, Next,
+ Check)) {
+ CachedRes[BB] = false;
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Assuming we have:
+// define @foo(ptr nocapture %p) {
+// entry:
+// ...
+// bar (ptr %p)
+// ...
+// }
+//
+// Determine if we can propagate `nocapture` to the `%p` at the
+// `bar`.
+static bool
+CanPropagateNoCaptureAtCB(DenseMap<BasicBlock *, bool> &PureFromBB,
+ DenseMap<BasicBlock *, bool> &NoLocalStateToBB,
+ BasicBlock *BB, CallBase *CB) {
+ // If CB returns and its used by anything other than `ret`, assume it may be
+ // capturing.
+ // Potential TODO: We could allow many operations.
+ if (!CB->getType()->isVoidTy())
+ for (auto Use : CB->users())
+ if (!isa<ReturnInst>(Use))
+ return false;
+
+ // Can't capture via return, so if no side-effects we are set.
+ if (!CB->mayHaveSideEffects())
+ return true;
+
+ auto It = CB->getIterator();
+ ++It;
+
+ // Check that CB instruction with side-effects on all paths from
+ // `entry` that go through the CB and there are no `alloca`
+ // instructions. This accomplishes two things. 1) It ensures that
+ // after CB, there is no way a store/other could "clean up" any
+ // captures from CB. 2) There is no local state (i.e `alloca` or a
+ // local `malloc`) that could CB could have stored in params in.
+ if (ContainsSideEffects(make_range(It, BB->end())) ||
+ ContainsScratchSpace(make_range(BB->begin(), CB->getIterator())))
+ return false;
+
+ if (!CheckPathFromBBRecurse(
+ PureFromBB, /*First=*/true, BB,
+ [](BasicBlock *CheckedBB) { return successors(CheckedBB); },
+ [](const auto &Region) { return !ContainsSideEffects(Region); }))
+ return false;
+
+ if (!CheckPathFromBBRecurse(
+ PureFromBB, /*First=*/true, BB,
+ [](BasicBlock *CheckedBB) { return predecessors(CheckedBB); },
+ [](const auto &Region) { return !ContainsScratchSpace(Region); }))
+ return false;
+
+ return true;
+}
+
// Add attributes from CB params and Fn attributes that can always be propagated
// to the corresponding argument / inner callbases.
static void AddParamAndFnBasicAttributes(const CallBase &CB,
@@ -1363,6 +1461,9 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
bool HasAttrToPropagate = false;
+ DenseMap<BasicBlock *, bool> PureFromBB{};
+ DenseMap<BasicBlock *, bool> NoLocalStateToBB{};
+
// Attributes we can only propagate if the exact parameter is forwarded.
// We can propagate both poison generating and UB generating attributes
// without any extra checks. The only attribute that is tricky to propagate
@@ -1381,6 +1482,8 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
ValidObjParamAttrs.back().addAttribute(Attribute::ReadNone);
if (CB.paramHasAttr(I, Attribute::ReadOnly))
ValidObjParamAttrs.back().addAttribute(Attribute::ReadOnly);
+ if (CB.paramHasAttr(I, Attribute::NoCapture))
+ ValidObjParamAttrs.back().addAttribute(Attribute::NoCapture);
for (Attribute::AttrKind AK : ExactAttrsToPropagate) {
Attribute Attr = CB.getParamAttr(I, AK);
@@ -1463,8 +1566,16 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
ArgNo = Arg->getArgNo();
}
+ AttributeSet AS = AttributeSet::get(Context, ValidObjParamAttrs[ArgNo]);
+ // Check if we can propagate `nocapture`.
+ if (AS.hasAttribute(Attribute::NoCapture) &&
+ (NewInnerCB->paramHasAttr(I, Attribute::NoCapture) ||
+ !CanPropagateNoCaptureAtCB(PureFromBB, NoLocalStateToBB, &BB,
+ cast<CallBase>(&Ins))))
+ AS = AS.removeAttribute(Context, Attribute::NoCapture);
+
// If so, propagate its access attributes.
- AL = AL.addParamAttributes(Context, I, ValidObjParamAttrs[ArgNo]);
+ AL = AL.addParamAttributes(Context, I, AttrBuilder{Context, AS});
// We can have conflicting attributes from the inner callsite and
// to-be-inlined callsite. In that case, choose the most
// restrictive.
diff --git a/llvm/test/Transforms/Inline/prop-nocapture.ll b/llvm/test/Transforms/Inline/prop-nocapture.ll
new file mode 100644
index 00000000000000..a9db946db4f28d
--- /dev/null
+++ b/llvm/test/Transforms/Inline/prop-nocapture.ll
@@ -0,0 +1,327 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
+; RUN: opt -passes=inline -S < %s | FileCheck --check-prefixes=CHECK,NO_ASSUME %s
+; RUN: opt -passes=inline -S --enable-knowledge-retention < %s | FileCheck %s --check-prefixes=CHECK,USE_ASSUME
+
+declare void @void.call.p0(ptr)
+declare void @void.call.p0.p1(ptr, ptr)
+declare i32 @ret.call.p0(ptr)
+declare ptr @retp.call.p0(ptr)
+
+define void @simple_nocapture_prop(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@simple_nocapture_prop
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ call void @void.call.p0(ptr %p)
+ ret void
+}
+
+define void @simple_nocapture_prop_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@simple_nocapture_prop_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr nocapture [[P]])
+; CHECK-NEXT: ret void
+;
+ call void @simple_nocapture_prop(ptr %p)
+ ret void
+}
+
+define i32 @nocapture_with_return_prop(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT: [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %r = call i32 @ret.call.p0(ptr %p)
+ ret i32 %r
+}
+
+define i32 @nocapture_with_return_prop_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr nocapture [[P]])
+; CHECK-NEXT: ret i32 [[R_I]]
+;
+ %r = call i32 @nocapture_with_return_prop(ptr %p)
+ ret i32 %r
+}
+
+define i32 @nocapture_with_return_prop_todo_indirect(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_todo_indirect
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT: [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT: [[RR:%.*]] = xor i32 [[R]], -1
+; CHECK-NEXT: ret i32 [[RR]]
+;
+ %r = call i32 @ret.call.p0(ptr %p)
+ %rr = xor i32 %r, -1
+ ret i32 %rr
+}
+
+define i32 @nocapture_with_return_prop_todo_indirect_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_todo_indirect_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT: [[RR_I:%.*]] = xor i32 [[R_I]], -1
+; CHECK-NEXT: ret i32 [[RR_I]]
+;
+ %r = call i32 @nocapture_with_return_prop_todo_indirect(ptr %p)
+ ret i32 %r
+}
+
+define i32 @nocapture_with_return_prop_fail_maybe_captures(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_fail_maybe_captures
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT: [[R:%.*]] = call ptr @void.call.p0(ptr [[P]])
+; CHECK-NEXT: [[RR:%.*]] = load i32, ptr [[R]], align 4
+; CHECK-NEXT: ret i32 [[RR]]
+;
+ %r = call ptr @void.call.p0(ptr %p)
+ %rr = load i32, ptr %r
+ ret i32 %rr
+}
+
+define i32 @nocapture_with_return_prop_fail_maybe_captures_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_fail_maybe_captures_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: [[R_I:%.*]] = call ptr @void.call.p0(ptr [[P]])
+; CHECK-NEXT: [[RR_I:%.*]] = load i32, ptr [[R_I]], align 4
+; CHECK-NEXT: ret i32 [[RR_I]]
+;
+ %r = call i32 @nocapture_with_return_prop_fail_maybe_captures(ptr %p)
+ ret i32 %r
+}
+
+define void @nocapture_prop_fail_preceding_alloca(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT: [[P2:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @void.call.p0.p1(ptr [[P]], ptr [[P2]])
+; CHECK-NEXT: ret void
+;
+ %p2 = alloca i32
+ call void @void.call.p0.p1(ptr %p, ptr %p2)
+ ret void
+}
+
+define void @nocapture_prop_fail_preceding_alloca_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: [[P2:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @nocapture_prop_fail_preceding_alloca(ptr [[P]], ptr [[P2]])
+; CHECK-NEXT: ret void
+;
+ %p2 = alloca i32
+ call void @nocapture_prop_fail_preceding_alloca(ptr %p, ptr %p2)
+ ret void
+}
+
+define void @nocapture_prop_fail_preceding_alloca2(ptr nocapture %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca2
+; CHECK-SAME: (ptr nocapture [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[P2:%.*]] = alloca i32, align 4
+; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK: T:
+; CHECK-NEXT: call void @void.call.p0(ptr [[P2]])
+; CHECK-NEXT: ret void
+; CHECK: F:
+; CHECK-NEXT: call void @void.call.p0.p1(ptr [[P]], ptr [[P2]])
+; CHECK-NEXT: ret void
+;
+ %p2 = alloca i32
+ br i1 %c, label %T, label %F
+T:
+ call void @void.call.p0(ptr %p2)
+ ret void
+F:
+ call void @void.call.p0.p1(ptr %p, ptr %p2)
+ ret void
+}
+
+define void @nocapture_prop_fail_preceding_alloca2_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca2_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[P2_I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[P2_I]])
+; CHECK-NEXT: br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK: T.i:
+; CHECK-NEXT: call void @void.call.p0(ptr [[P2_I]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[P2_I]])
+; CHECK-NEXT: br label [[NOCAPTURE_PROP_FAIL_PRECEDING_ALLOCA2_EXIT:%.*]]
+; CHECK: F.i:
+; CHECK-NEXT: call void @void.call.p0.p1(ptr [[P]], ptr [[P2_I]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[P2_I]])
+; CHECK-NEXT: br label [[NOCAPTURE_PROP_FAIL_PRECEDING_ALLOCA2_EXIT]]
+; CHECK: nocapture_prop_fail_preceding_alloca2.exit:
+; CHECK-NEXT: ret void
+;
+ call void @nocapture_prop_fail_preceding_alloca2(ptr %p, i1 %c)
+ ret void
+}
+
+define void @nocapture_prop_okay_seperate_alloca(ptr nocapture %p, i1 %c) alwaysinline {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_seperate_alloca
+; CHECK-SAME: (ptr nocapture [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK: T:
+; CHECK-NEXT: [[P2:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @void.call.p0(ptr [[P2]])
+; CHECK-NEXT: ret void
+; CHECK: F:
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ br i1 %c, label %T, label %F
+T:
+ %p2 = alloca i32
+ call void @void.call.p0(ptr %p2)
+ ret void
+F:
+ call void @void.call.p0(ptr %p)
+ ret void
+}
+
+define void @nocapture_prop_okay_seperate_alloca_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_seperate_alloca_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[SAVEDSTACK:%.*]] = call ptr @llvm.stacksave.p0()
+; CHECK-NEXT: br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK: T.i:
+; CHECK-NEXT: [[P2_I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @void.call.p0(ptr [[P2_I]])
+; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
+; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT:%.*]]
+; CHECK: F.i:
+; CHECK-NEXT: call void @void.call.p0(ptr nocapture [[P]])
+; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
+; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT]]
+; CHECK: nocapture_prop_okay_seperate_alloca.exit:
+; CHECK-NEXT: ret void
+;
+ call void @nocapture_prop_okay_seperate_alloca(ptr %p, i1 %c)
+ ret void
+}
+
+define void @nocapture_prop_fail_ensuing_side_effects(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ call void @void.call.p0(ptr %p)
+ call void @void.call.p0(ptr %p)
+ ret void
+}
+
+define void @nocapture_prop_fail_ensuing_side_effects_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ call void @nocapture_prop_fail_ensuing_side_effects(ptr %p)
+ ret void
+}
+
+define void @nocapture_prop_fail_ensuing_side_effects2(ptr nocapture %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects2
+; CHECK-SAME: (ptr nocapture [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK: T:
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: ret void
+; CHECK: F:
+; CHECK-NEXT: ret void
+;
+ call void @void.call.p0(ptr %p)
+ br i1 %c, label %T, label %F
+T:
+ call void @void.call.p0(ptr %p)
+ ret void
+F:
+ ret void
+}
+
+define void @nocapture_prop_fail_ensuing_side_effects2_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects2_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK: T.i:
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: br label [[NOCAPTURE_PROP_FAIL_ENSUING_SIDE_EFFECTS2_EXIT:%.*]]
+; CHECK: F.i:
+; CHECK-NEXT: br label [[NOCAPTURE_PROP_FAIL_ENSUING_SIDE_EFFECTS2_EXIT]]
+; CHECK: nocapture_prop_fail_ensuing_side_effects2.exit:
+; CHECK-NEXT: ret void
+;
+ call void @nocapture_prop_fail_ensuing_side_effects2(ptr %p, i1 %c)
+ ret void
+}
+
+define i32 @nocapture_prop_okay_no_sideeffects(ptr nocapture %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects
+; CHECK-SAME: (ptr nocapture [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK: T:
+; CHECK-NEXT: [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT: ret i32 [[R]]
+; CHECK: F:
+; CHECK-NEXT: ret i32 0
+;
+ call void @void.call.p0(ptr %p)
+ br i1 %c, label %T, label %F
+T:
+ %r = call i32 @ret.call.p0(ptr %p) nounwind readonly willreturn
+ ret i32 %r
+F:
+ ret i32 0
+}
+
+define i32 @nocapture_prop_okay_no_sideeffects_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr nocapture [[P]])
+; CHECK-NEXT: br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK: T.i:
+; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr nocapture [[P]]) #[[ATTR3]]
+; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT:%.*]]
+; CHECK: F.i:
+; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT]]
+; CHECK: nocapture_prop_okay_no_sideeffects.exit:
+; CHECK-NEXT: [[R1:%.*]] = phi i32 [ [[R_I]], [[T_I]] ], [ 0, [[F_I]] ]
+; CHECK-NEXT: ret i32 [[R1]]
+;
+ %r = call i32 @nocapture_prop_okay_no_sideeffects(ptr %p, i1 %c)
+ ret i32 %r
+}
+
+define i32 @nocapture_prop_okay_no_sideeffects2(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects2
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ call void @void.call.p0(ptr %p)
+ %r = call i32 @ret.call.p0(ptr %p) nounwind readonly willreturn
+ ret i32 %r
+}
+
+define i32 @nocapture_prop_okay_no_sideeffects2_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects2_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[R:%.*]] = call i32 @nocapture_prop_okay_no_sideeffects2(ptr [[P]], i1 [[C]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %r = call i32 @nocapture_prop_okay_no_sideeffects2(ptr %p, i1 %c)
+ ret i32 %r
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; NO_ASSUME: {{.*}}
+; USE_ASSUME: {{.*}}
|
|
ping |
|
It seems to me this type of attribute propagation should have been done in FunctionAttrs/Attributor, and not the responsibility of the inliner |
That doesn't quite work because at inlining we can lose information that can be irrecoverable. i.e |
Yes, this would be input value nocapture propagating to the outgoing callsite attribute |
Oh I see what you are saying. I tried adding this to FunctionAttrs (way back), but the feedback was to just move it to inlining. |
I don't have any inherent opposition to just propagating all these things before inlining. It would certainly simplify things to remove all of the propagation code from The only downside I guess is that during inlining we are sometimes able to simplify expressions that otherwise make the propagation impossible. |
|
Can you remind me what the original motivating case was? Something like a nocapture wrapper around inline assembly? |
Yes, currently there is no way to apply For example: When Edit: Also generally, I think it would be convenient if we could rely on attributes from inline-helpers being forwarded appropriately. |
c349e96 to
e7c7a29
Compare
|
rebased |
|
@nikic Can you give an indication of if you support this approach? If so I'm going to update to handle the new |
ping |
Currently if we have: ``` define @foo(ptr nocapture %p) { entry: ... bar(ptr %p) ... } ``` When inlining `foo`, we will lose the `nocapture` on `%p` which might not be recoverable. The goal of this patch is to preserve the `nocapture` if some conservative analysis indicates we can. 1) Return value of `bar` is either unused or only used as return of `foo` (this rules of capture via return). 2) No `alloca` (or scratch memory of any sort) in `foo` s.t there is a path from `entry` to `bar` that goes an `alloca`. This helps rule out `bar` capturing `%p` in memory in a way that wouldn't be capturing outside of the scope of `foo`. 3) No paths in `foo` that go through `bar` have any instructions with side-effects other than `bar`. This rules out `bar` capturing `%p` in memory, but then some later instructions clearing the memory capture s.t `nocapture` in `foo` still holds. It also rules out some function (i.e `malloc`) creating scratch memory that `bar` could capture `%p` in but still only visible in the scope of `foo`. Ultimately these three checks are highly conservative, but should allow some preservation.
e7c7a29 to
afd608c
Compare
|
Rebased with new |
Currently if we have:
When inlining
foo, we will lose thenocaptureon%pwhich mightnot be recoverable.
The goal of this patch is to preserve the
nocaptureif someconservative analysis indicates we can.
Return value of
baris either unused or only used as return offoo(this rules of capture via return).No
alloca(or scratch memory of any sort) infoos.t there is apath from
entrytobarthat goes analloca. This helps ruleout
barcapturing%pin memory in a way that wouldn't becapturing outside of the scope of
foo.No paths in
foothat go throughbarhave any instructions withside-effects other than
bar. This rules outbarcapturing%pin memory, but then some later instructions clearing the memory
capture s.t
nocaptureinfoostill holds. It also rules outsome function (i.e
malloc) creating scratch memory thatbarcould capture
%pin but still only visible in the scope offoo.Ultimately these three checks are highly conservative, but should
allow some preservation.