diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index b92d8b16daad2..d5750152502cd 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -68,6 +68,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ModRef.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -1364,6 +1365,104 @@ static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin, ++BeginIt, End->getIterator(), InlinerAttributeWindow + 1); } +template static bool ContainsSideEffects(RangeT Range) { + // Any instruction that may clear local scratch space CB stored + // into. + return any_of(Range, [](Instruction &I) { return I.mayHaveSideEffects(); }); +} + +template static bool ContainsScratchSpace(RangeT Range) { + return any_of(Range, [](Instruction &I) { + // Any instruction that may create local scratch space CB can store + // into. + return I.mayHaveSideEffects() || isa(&I); + }); +} + +template +static bool CheckPathFromBBRecurse(DenseMap &CachedRes, + bool First, BasicBlock *BB, NextFn Next, + CheckFn Check) { + if (!First) { + // Initialize to true (okay to propagate) `nocapture`. This means that loops + // will be okay. + auto [Iter, Inserted] = CachedRes.try_emplace(BB, true); + // If we already have a result, return it. + if (!Inserted) + return Iter->second; + + if (!Check(BB->instructionsWithoutDebug())) { + Iter->second = false; + return false; + } + } + auto NextBBs = Next(BB); + // Check all Succs/Preds + for (BasicBlock *NextBB : NextBBs) { + if (!CheckPathFromBBRecurse(CachedRes, /*First=*/false, NextBB, Next, + Check)) { + CachedRes[BB] = false; + return false; + } + } + + return true; +} + +// Assuming we have: +// define @foo(ptr nocapture %p) { +// entry: +// ... +// bar (ptr %p) +// ... +// } +// +// Determine if we can propagate `nocapture` to the `%p` at the +// `bar`. +static bool +CanPropagateNoCaptureAtCB(DenseMap &PureFromBB, + DenseMap &NoLocalStateToBB, + BasicBlock *BB, CallBase *CB) { + // If CB returns and its used by anything other than `ret`, assume it may be + // capturing. + // Potential TODO: We could allow many operations. + if (!CB->getType()->isVoidTy()) + for (auto Use : CB->users()) + if (!isa(Use)) + return false; + + // Can't capture via return, so if no side-effects we are set. + if (!CB->mayHaveSideEffects()) + return true; + + auto It = CB->getIterator(); + ++It; + + // Check that CB instruction with side-effects on all paths from + // `entry` that go through the CB and there are no `alloca` + // instructions. This accomplishes two things. 1) It ensures that + // after CB, there is no way a store/other could "clean up" any + // captures from CB. 2) There is no local state (i.e `alloca` or a + // local `malloc`) that could CB could have stored in params in. + if (ContainsSideEffects(make_range(It, BB->end())) || + ContainsScratchSpace(make_range(BB->begin(), CB->getIterator()))) + return false; + + if (!CheckPathFromBBRecurse( + PureFromBB, /*First=*/true, BB, + [](BasicBlock *CheckedBB) { return successors(CheckedBB); }, + [](const auto &Region) { return !ContainsSideEffects(Region); })) + return false; + + if (!CheckPathFromBBRecurse( + PureFromBB, /*First=*/true, BB, + [](BasicBlock *CheckedBB) { return predecessors(CheckedBB); }, + [](const auto &Region) { return !ContainsScratchSpace(Region); })) + return false; + + return true; +} + // Add attributes from CB params and Fn attributes that can always be propagated // to the corresponding argument / inner callbases. static void AddParamAndFnBasicAttributes(const CallBase &CB, @@ -1376,6 +1475,9 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB, SmallVector ValidObjParamAttrs, ValidExactParamAttrs; bool HasAttrToPropagate = false; + DenseMap PureFromBB{}; + DenseMap NoLocalStateToBB{}; + // Attributes we can only propagate if the exact parameter is forwarded. // We can propagate both poison generating and UB generating attributes // without any extra checks. The only attribute that is tricky to propagate @@ -1394,6 +1496,8 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB, ValidObjParamAttrs.back().addAttribute(Attribute::ReadNone); if (CB.paramHasAttr(I, Attribute::ReadOnly)) ValidObjParamAttrs.back().addAttribute(Attribute::ReadOnly); + if (CB.doesNotCapture(I)) + ValidObjParamAttrs.back().addCapturesAttr(CaptureInfo::none()); for (Attribute::AttrKind AK : ExactAttrsToPropagate) { Attribute Attr = CB.getParamAttr(I, AK); @@ -1478,9 +1582,16 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB, continue; } - // If so, propagate its access attributes. - AL = AL.addParamAttributes(Context, I, ValidObjParamAttrs[ArgNo]); + AttributeSet AS = AttributeSet::get(Context, ValidObjParamAttrs[ArgNo]); + // Check if we can propagate `captures(none)`. + if (capturesNothing(AS.getCaptureInfo()) && + (NewInnerCB->doesNotCapture(I) || + !CanPropagateNoCaptureAtCB(PureFromBB, NoLocalStateToBB, &BB, + cast(&Ins)))) + AS = AS.removeAttribute(Context, Attribute::Captures); + // If so, propagate its access attributes. + AL = AL.addParamAttributes(Context, I, AttrBuilder{Context, AS}); // We can have conflicting attributes from the inner callsite and // to-be-inlined callsite. In that case, choose the most // restrictive. diff --git a/llvm/test/Transforms/Inline/prop-nocapture.ll b/llvm/test/Transforms/Inline/prop-nocapture.ll new file mode 100644 index 0000000000000..5517b489f64fd --- /dev/null +++ b/llvm/test/Transforms/Inline/prop-nocapture.ll @@ -0,0 +1,327 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature +; RUN: opt -passes=inline -S < %s | FileCheck --check-prefixes=CHECK,NO_ASSUME %s +; RUN: opt -passes=inline -S --enable-knowledge-retention < %s | FileCheck %s --check-prefixes=CHECK,USE_ASSUME + +declare void @void.call.p0(ptr) +declare void @void.call.p0.p1(ptr, ptr) +declare i32 @ret.call.p0(ptr) +declare ptr @retp.call.p0(ptr) + +define void @simple_nocapture_prop(ptr captures(none) %p) { +; CHECK-LABEL: define {{[^@]+}}@simple_nocapture_prop +; CHECK-SAME: (ptr captures(none) [[P:%.*]]) { +; CHECK-NEXT: call void @void.call.p0(ptr [[P]]) +; CHECK-NEXT: ret void +; + call void @void.call.p0(ptr %p) + ret void +} + +define void @simple_nocapture_prop_caller(ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@simple_nocapture_prop_caller +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: call void @void.call.p0(ptr captures(none) [[P]]) +; CHECK-NEXT: ret void +; + call void @simple_nocapture_prop(ptr %p) + ret void +} + +define i32 @nocapture_with_return_prop(ptr captures(none) %p) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop +; CHECK-SAME: (ptr captures(none) [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]]) +; CHECK-NEXT: ret i32 [[R]] +; + %r = call i32 @ret.call.p0(ptr %p) + ret i32 %r +} + +define i32 @nocapture_with_return_prop_caller(ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_caller +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr captures(none) [[P]]) +; CHECK-NEXT: ret i32 [[R_I]] +; + %r = call i32 @nocapture_with_return_prop(ptr %p) + ret i32 %r +} + +define i32 @nocapture_with_return_prop_todo_indirect(ptr captures(none) %p) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_todo_indirect +; CHECK-SAME: (ptr captures(none) [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]]) +; CHECK-NEXT: [[RR:%.*]] = xor i32 [[R]], -1 +; CHECK-NEXT: ret i32 [[RR]] +; + %r = call i32 @ret.call.p0(ptr %p) + %rr = xor i32 %r, -1 + ret i32 %rr +} + +define i32 @nocapture_with_return_prop_todo_indirect_caller(ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_todo_indirect_caller +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]]) +; CHECK-NEXT: [[RR_I:%.*]] = xor i32 [[R_I]], -1 +; CHECK-NEXT: ret i32 [[RR_I]] +; + %r = call i32 @nocapture_with_return_prop_todo_indirect(ptr %p) + ret i32 %r +} + +define i32 @nocapture_with_return_prop_fail_maybe_captures(ptr captures(none) %p) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_fail_maybe_captures +; CHECK-SAME: (ptr captures(none) [[P:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call ptr @void.call.p0(ptr [[P]]) +; CHECK-NEXT: [[RR:%.*]] = load i32, ptr [[R]], align 4 +; CHECK-NEXT: ret i32 [[RR]] +; + %r = call ptr @void.call.p0(ptr %p) + %rr = load i32, ptr %r + ret i32 %rr +} + +define i32 @nocapture_with_return_prop_fail_maybe_captures_caller(ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_fail_maybe_captures_caller +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[R_I:%.*]] = call ptr @void.call.p0(ptr [[P]]) +; CHECK-NEXT: [[RR_I:%.*]] = load i32, ptr [[R_I]], align 4 +; CHECK-NEXT: ret i32 [[RR_I]] +; + %r = call i32 @nocapture_with_return_prop_fail_maybe_captures(ptr %p) + ret i32 %r +} + +define void @nocapture_prop_fail_preceding_alloca(ptr captures(none) %p) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca +; CHECK-SAME: (ptr captures(none) [[P:%.*]]) { +; CHECK-NEXT: [[P2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @void.call.p0.p1(ptr [[P]], ptr [[P2]]) +; CHECK-NEXT: ret void +; + %p2 = alloca i32 + call void @void.call.p0.p1(ptr %p, ptr %p2) + ret void +} + +define void @nocapture_prop_fail_preceding_alloca_caller(ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca_caller +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[P2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @nocapture_prop_fail_preceding_alloca(ptr [[P]], ptr [[P2]]) +; CHECK-NEXT: ret void +; + %p2 = alloca i32 + call void @nocapture_prop_fail_preceding_alloca(ptr %p, ptr %p2) + ret void +} + +define void @nocapture_prop_fail_preceding_alloca2(ptr captures(none) %p, i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca2 +; CHECK-SAME: (ptr captures(none) [[P:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[P2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: call void @void.call.p0(ptr [[P2]]) +; CHECK-NEXT: ret void +; CHECK: F: +; CHECK-NEXT: call void @void.call.p0.p1(ptr [[P]], ptr [[P2]]) +; CHECK-NEXT: ret void +; + %p2 = alloca i32 + br i1 %c, label %T, label %F +T: + call void @void.call.p0(ptr %p2) + ret void +F: + call void @void.call.p0.p1(ptr %p, ptr %p2) + ret void +} + +define void @nocapture_prop_fail_preceding_alloca2_caller(ptr %p, i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca2_caller +; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[P2_I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[P2_I]]) +; CHECK-NEXT: br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]] +; CHECK: T.i: +; CHECK-NEXT: call void @void.call.p0(ptr [[P2_I]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[P2_I]]) +; CHECK-NEXT: br label [[NOCAPTURE_PROP_FAIL_PRECEDING_ALLOCA2_EXIT:%.*]] +; CHECK: F.i: +; CHECK-NEXT: call void @void.call.p0.p1(ptr [[P]], ptr [[P2_I]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[P2_I]]) +; CHECK-NEXT: br label [[NOCAPTURE_PROP_FAIL_PRECEDING_ALLOCA2_EXIT]] +; CHECK: nocapture_prop_fail_preceding_alloca2.exit: +; CHECK-NEXT: ret void +; + call void @nocapture_prop_fail_preceding_alloca2(ptr %p, i1 %c) + ret void +} + +define void @nocapture_prop_okay_seperate_alloca(ptr captures(none) %p, i1 %c) alwaysinline { +; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_seperate_alloca +; CHECK-SAME: (ptr captures(none) [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: [[P2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @void.call.p0(ptr [[P2]]) +; CHECK-NEXT: ret void +; CHECK: F: +; CHECK-NEXT: call void @void.call.p0(ptr [[P]]) +; CHECK-NEXT: ret void +; + br i1 %c, label %T, label %F +T: + %p2 = alloca i32 + call void @void.call.p0(ptr %p2) + ret void +F: + call void @void.call.p0(ptr %p) + ret void +} + +define void @nocapture_prop_okay_seperate_alloca_caller(ptr %p, i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_seperate_alloca_caller +; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[SAVEDSTACK:%.*]] = call ptr @llvm.stacksave.p0() +; CHECK-NEXT: br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]] +; CHECK: T.i: +; CHECK-NEXT: [[P2_I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @void.call.p0(ptr [[P2_I]]) +; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]]) +; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT:%.*]] +; CHECK: F.i: +; CHECK-NEXT: call void @void.call.p0(ptr captures(none) [[P]]) +; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]]) +; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT]] +; CHECK: nocapture_prop_okay_seperate_alloca.exit: +; CHECK-NEXT: ret void +; + call void @nocapture_prop_okay_seperate_alloca(ptr %p, i1 %c) + ret void +} + +define void @nocapture_prop_fail_ensuing_side_effects(ptr captures(none) %p) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects +; CHECK-SAME: (ptr captures(none) [[P:%.*]]) { +; CHECK-NEXT: call void @void.call.p0(ptr [[P]]) +; CHECK-NEXT: call void @void.call.p0(ptr [[P]]) +; CHECK-NEXT: ret void +; + call void @void.call.p0(ptr %p) + call void @void.call.p0(ptr %p) + ret void +} + +define void @nocapture_prop_fail_ensuing_side_effects_caller(ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects_caller +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: call void @void.call.p0(ptr [[P]]) +; CHECK-NEXT: call void @void.call.p0(ptr [[P]]) +; CHECK-NEXT: ret void +; + call void @nocapture_prop_fail_ensuing_side_effects(ptr %p) + ret void +} + +define void @nocapture_prop_fail_ensuing_side_effects2(ptr captures(none) %p, i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects2 +; CHECK-SAME: (ptr captures(none) [[P:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: call void @void.call.p0(ptr [[P]]) +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: call void @void.call.p0(ptr [[P]]) +; CHECK-NEXT: ret void +; CHECK: F: +; CHECK-NEXT: ret void +; + call void @void.call.p0(ptr %p) + br i1 %c, label %T, label %F +T: + call void @void.call.p0(ptr %p) + ret void +F: + ret void +} + +define void @nocapture_prop_fail_ensuing_side_effects2_caller(ptr %p, i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects2_caller +; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: call void @void.call.p0(ptr [[P]]) +; CHECK-NEXT: br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]] +; CHECK: T.i: +; CHECK-NEXT: call void @void.call.p0(ptr [[P]]) +; CHECK-NEXT: br label [[NOCAPTURE_PROP_FAIL_ENSUING_SIDE_EFFECTS2_EXIT:%.*]] +; CHECK: F.i: +; CHECK-NEXT: br label [[NOCAPTURE_PROP_FAIL_ENSUING_SIDE_EFFECTS2_EXIT]] +; CHECK: nocapture_prop_fail_ensuing_side_effects2.exit: +; CHECK-NEXT: ret void +; + call void @nocapture_prop_fail_ensuing_side_effects2(ptr %p, i1 %c) + ret void +} + +define i32 @nocapture_prop_okay_no_sideeffects(ptr captures(none) %p, i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects +; CHECK-SAME: (ptr captures(none) [[P:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: call void @void.call.p0(ptr [[P]]) +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: ret i32 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i32 0 +; + call void @void.call.p0(ptr %p) + br i1 %c, label %T, label %F +T: + %r = call i32 @ret.call.p0(ptr %p) nounwind readonly willreturn + ret i32 %r +F: + ret i32 0 +} + +define i32 @nocapture_prop_okay_no_sideeffects_caller(ptr %p, i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects_caller +; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: call void @void.call.p0(ptr captures(none) [[P]]) +; CHECK-NEXT: br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]] +; CHECK: T.i: +; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr captures(none) [[P]]) #[[ATTR3]] +; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT:%.*]] +; CHECK: F.i: +; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT]] +; CHECK: nocapture_prop_okay_no_sideeffects.exit: +; CHECK-NEXT: [[R1:%.*]] = phi i32 [ [[R_I]], [[T_I]] ], [ 0, [[F_I]] ] +; CHECK-NEXT: ret i32 [[R1]] +; + %r = call i32 @nocapture_prop_okay_no_sideeffects(ptr %p, i1 %c) + ret i32 %r +} + +define i32 @nocapture_prop_okay_no_sideeffects2(ptr captures(none) %p) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects2 +; CHECK-SAME: (ptr captures(none) [[P:%.*]]) { +; CHECK-NEXT: call void @void.call.p0(ptr [[P]]) +; CHECK-NEXT: [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3]] +; CHECK-NEXT: ret i32 [[R]] +; + call void @void.call.p0(ptr %p) + %r = call i32 @ret.call.p0(ptr %p) nounwind readonly willreturn + ret i32 %r +} + +define i32 @nocapture_prop_okay_no_sideeffects2_caller(ptr %p, i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects2_caller +; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call i32 @nocapture_prop_okay_no_sideeffects2(ptr [[P]], i1 [[C]]) +; CHECK-NEXT: ret i32 [[R]] +; + %r = call i32 @nocapture_prop_okay_no_sideeffects2(ptr %p, i1 %c) + ret i32 %r +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; NO_ASSUME: {{.*}} +; USE_ASSUME: {{.*}}