6868#include " llvm/Support/Casting.h"
6969#include " llvm/Support/CommandLine.h"
7070#include " llvm/Support/ErrorHandling.h"
71+ #include " llvm/Support/ModRef.h"
7172#include " llvm/Transforms/Utils/AssumeBundleBuilder.h"
7273#include " llvm/Transforms/Utils/Cloning.h"
7374#include " llvm/Transforms/Utils/Local.h"
@@ -1364,6 +1365,104 @@ static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
13641365 ++BeginIt, End->getIterator (), InlinerAttributeWindow + 1 );
13651366}
13661367
1368+ template <typename RangeT> static bool ContainsSideEffects (RangeT Range) {
1369+ // Any instruction that may clear local scratch space CB stored
1370+ // into.
1371+ return any_of (Range, [](Instruction &I) { return I.mayHaveSideEffects (); });
1372+ }
1373+
1374+ template <typename RangeT> static bool ContainsScratchSpace (RangeT Range) {
1375+ return any_of (Range, [](Instruction &I) {
1376+ // Any instruction that may create local scratch space CB can store
1377+ // into.
1378+ return I.mayHaveSideEffects () || isa<AllocaInst>(&I);
1379+ });
1380+ }
1381+
1382+ template <typename NextFn, typename CheckFn>
1383+ static bool CheckPathFromBBRecurse (DenseMap<BasicBlock *, bool > &CachedRes,
1384+ bool First, BasicBlock *BB, NextFn Next,
1385+ CheckFn Check) {
1386+ if (!First) {
1387+ // Initialize to true (okay to propagate) `nocapture`. This means that loops
1388+ // will be okay.
1389+ auto [Iter, Inserted] = CachedRes.try_emplace (BB, true );
1390+ // If we already have a result, return it.
1391+ if (!Inserted)
1392+ return Iter->second ;
1393+
1394+ if (!Check (BB->instructionsWithoutDebug ())) {
1395+ Iter->second = false ;
1396+ return false ;
1397+ }
1398+ }
1399+ auto NextBBs = Next (BB);
1400+ // Check all Succs/Preds
1401+ for (BasicBlock *NextBB : NextBBs) {
1402+ if (!CheckPathFromBBRecurse (CachedRes, /* First=*/ false , NextBB, Next,
1403+ Check)) {
1404+ CachedRes[BB] = false ;
1405+ return false ;
1406+ }
1407+ }
1408+
1409+ return true ;
1410+ }
1411+
1412+ // Assuming we have:
1413+ // define @foo(ptr nocapture %p) {
1414+ // entry:
1415+ // ...
1416+ // bar (ptr %p)
1417+ // ...
1418+ // }
1419+ //
1420+ // Determine if we can propagate `nocapture` to the `%p` at the
1421+ // `bar`.
1422+ static bool
1423+ CanPropagateNoCaptureAtCB (DenseMap<BasicBlock *, bool > &PureFromBB,
1424+ DenseMap<BasicBlock *, bool > &NoLocalStateToBB,
1425+ BasicBlock *BB, CallBase *CB) {
1426+ // If CB returns and its used by anything other than `ret`, assume it may be
1427+ // capturing.
1428+ // Potential TODO: We could allow many operations.
1429+ if (!CB->getType ()->isVoidTy ())
1430+ for (auto Use : CB->users ())
1431+ if (!isa<ReturnInst>(Use))
1432+ return false ;
1433+
1434+ // Can't capture via return, so if no side-effects we are set.
1435+ if (!CB->mayHaveSideEffects ())
1436+ return true ;
1437+
1438+ auto It = CB->getIterator ();
1439+ ++It;
1440+
1441+ // Check that CB instruction with side-effects on all paths from
1442+ // `entry` that go through the CB and there are no `alloca`
1443+ // instructions. This accomplishes two things. 1) It ensures that
1444+ // after CB, there is no way a store/other could "clean up" any
1445+ // captures from CB. 2) There is no local state (i.e `alloca` or a
1446+ // local `malloc`) that could CB could have stored in params in.
1447+ if (ContainsSideEffects (make_range (It, BB->end ())) ||
1448+ ContainsScratchSpace (make_range (BB->begin (), CB->getIterator ())))
1449+ return false ;
1450+
1451+ if (!CheckPathFromBBRecurse (
1452+ PureFromBB, /* First=*/ true , BB,
1453+ [](BasicBlock *CheckedBB) { return successors (CheckedBB); },
1454+ [](const auto &Region) { return !ContainsSideEffects (Region); }))
1455+ return false ;
1456+
1457+ if (!CheckPathFromBBRecurse (
1458+ PureFromBB, /* First=*/ true , BB,
1459+ [](BasicBlock *CheckedBB) { return predecessors (CheckedBB); },
1460+ [](const auto &Region) { return !ContainsScratchSpace (Region); }))
1461+ return false ;
1462+
1463+ return true ;
1464+ }
1465+
13671466// Add attributes from CB params and Fn attributes that can always be propagated
13681467// to the corresponding argument / inner callbases.
13691468static void AddParamAndFnBasicAttributes (const CallBase &CB,
@@ -1376,6 +1475,9 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
13761475 SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
13771476 bool HasAttrToPropagate = false ;
13781477
1478+ DenseMap<BasicBlock *, bool > PureFromBB{};
1479+ DenseMap<BasicBlock *, bool > NoLocalStateToBB{};
1480+
13791481 // Attributes we can only propagate if the exact parameter is forwarded.
13801482 // We can propagate both poison generating and UB generating attributes
13811483 // without any extra checks. The only attribute that is tricky to propagate
@@ -1394,6 +1496,8 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
13941496 ValidObjParamAttrs.back ().addAttribute (Attribute::ReadNone);
13951497 if (CB.paramHasAttr (I, Attribute::ReadOnly))
13961498 ValidObjParamAttrs.back ().addAttribute (Attribute::ReadOnly);
1499+ if (CB.doesNotCapture (I))
1500+ ValidObjParamAttrs.back ().addCapturesAttr (CaptureInfo::none ());
13971501
13981502 for (Attribute::AttrKind AK : ExactAttrsToPropagate) {
13991503 Attribute Attr = CB.getParamAttr (I, AK);
@@ -1478,9 +1582,16 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
14781582 continue ;
14791583 }
14801584
1481- // If so, propagate its access attributes.
1482- AL = AL.addParamAttributes (Context, I, ValidObjParamAttrs[ArgNo]);
1585+ AttributeSet AS = AttributeSet::get (Context, ValidObjParamAttrs[ArgNo]);
1586+ // Check if we can propagate `captures(none)`.
1587+ if (capturesNothing (AS.getCaptureInfo ()) &&
1588+ (NewInnerCB->doesNotCapture (I) ||
1589+ !CanPropagateNoCaptureAtCB (PureFromBB, NoLocalStateToBB, &BB,
1590+ cast<CallBase>(&Ins))))
1591+ AS = AS.removeAttribute (Context, Attribute::Captures);
14831592
1593+ // If so, propagate its access attributes.
1594+ AL = AL.addParamAttributes (Context, I, AttrBuilder{Context, AS});
14841595 // We can have conflicting attributes from the inner callsite and
14851596 // to-be-inlined callsite. In that case, choose the most
14861597 // restrictive.
0 commit comments