@@ -124,7 +124,7 @@ static cl::opt<unsigned>
124124
125125// Optionally enable cloning of callsites involved with recursive cycles
126126static cl::opt<bool > AllowRecursiveCallsites (
127- " memprof-allow-recursive-callsites" , cl::init(false ), cl::Hidden,
127+ " memprof-allow-recursive-callsites" , cl::init(true ), cl::Hidden,
128128 cl::desc(" Allow cloning of callsites involved in recursive cycles" ));
129129
130130// When disabled, try to detect and prevent cloning of recursive contexts.
@@ -301,12 +301,14 @@ class CallsiteContextGraph {
301301 // callers (i.e. if this is the leaf allocation node).
302302 if (!CalleeEdges.empty ())
303303 return &CalleeEdges;
304- if (!CallerEdges.empty ()) {
305- // A node with caller edges but no callee edges must be the allocation
306- // node.
307- assert (IsAllocation);
304+ // Typically if the callee edges are empty either the caller edges are
305+ // also empty, or this is an allocation (leaf node). However, if we are
306+ // allowing recursive callsites and contexts this will be violated for
307+ // incompletely cloned recursive cycles.
308+ assert (CallerEdges.empty () || IsAllocation ||
309+ (AllowRecursiveCallsites && AllowRecursiveContexts));
310+ if (!CallerEdges.empty () && IsAllocation)
308311 return &CallerEdges;
309- }
310312 return nullptr ;
311313 }
312314
@@ -403,8 +405,13 @@ class CallsiteContextGraph {
403405 // True if this node was effectively removed from the graph, in which case
404406 // it should have an allocation type of None and empty context ids.
405407 bool isRemoved () const {
406- assert ((AllocTypes == (uint8_t )AllocationType::None) ==
407- emptyContextIds ());
408+ // Typically if the callee edges are empty either the caller edges are
409+ // also empty, or this is an allocation (leaf node). However, if we are
410+ // allowing recursive callsites and contexts this will be violated for
411+ // incompletely cloned recursive cycles.
412+ assert ((AllowRecursiveCallsites && AllowRecursiveContexts) ||
413+ (AllocTypes == (uint8_t )AllocationType::None) ==
414+ emptyContextIds ());
408415 return AllocTypes == (uint8_t )AllocationType::None;
409416 }
410417
@@ -1344,16 +1351,48 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
13441351 DenseSet<uint32_t > RemainingContextIds) {
13451352 auto &OrigEdges =
13461353 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges ;
1354+ DenseSet<uint32_t > RecursiveContextIds;
1355+ DenseSet<uint32_t > AllCallerContextIds;
1356+ if (AllowRecursiveCallsites) {
1357+ // Identify which context ids are recursive which is needed to properly
1358+ // update the RemainingContextIds set. The relevant recursive context ids
1359+ // are those that are in multiple edges.
1360+ for (auto &CE : OrigEdges) {
1361+ AllCallerContextIds.reserve (CE->getContextIds ().size ());
1362+ for (auto Id : CE->getContextIds ())
1363+ if (!AllCallerContextIds.insert (Id).second )
1364+ RecursiveContextIds.insert (Id);
1365+ }
1366+ }
13471367 // Increment iterator in loop so that we can remove edges as needed.
13481368 for (auto EI = OrigEdges.begin (); EI != OrigEdges.end ();) {
13491369 auto Edge = *EI;
1370+ DenseSet<uint32_t > NewEdgeContextIds;
1371+ DenseSet<uint32_t > NotFoundContextIds;
13501372 // Remove any matching context ids from Edge, return set that were found and
13511373 // removed, these are the new edge's context ids. Also update the remaining
13521374 // (not found ids).
1353- DenseSet<uint32_t > NewEdgeContextIds, NotFoundContextIds;
13541375 set_subtract (Edge->getContextIds (), RemainingContextIds, NewEdgeContextIds,
13551376 NotFoundContextIds);
1356- RemainingContextIds.swap (NotFoundContextIds);
1377+ // Update the remaining context ids set for the later edges. This is a
1378+ // compile time optimization.
1379+ if (RecursiveContextIds.empty ()) {
1380+ // No recursive ids, so all of the previously remaining context ids that
1381+ // were not seen on this edge are the new remaining set.
1382+ RemainingContextIds.swap (NotFoundContextIds);
1383+ } else {
1384+ // Keep the recursive ids in the remaining set as we expect to see those
1385+ // on another edge. We can remove the non-recursive remaining ids that
1386+ // were seen on this edge, however. We already have the set of remaining
1387+ // ids that were on this edge (in NewEdgeContextIds). Figure out which are
1388+ // non-recursive and only remove those. Note that despite the higher
1389+ // overhead of updating the remaining context ids set when recursion
1390+ // handling is enabled, it was found to be at worst performance neutral
1391+ // and in one case a clear win.
1392+ DenseSet<uint32_t > NonRecursiveRemainingCurEdgeIds =
1393+ set_difference (NewEdgeContextIds, RecursiveContextIds);
1394+ set_subtract (RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1395+ }
13571396 // If no matching context ids for this edge, skip it.
13581397 if (NewEdgeContextIds.empty ()) {
13591398 ++EI;
@@ -1410,9 +1449,9 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
14101449 set_union (CallerEdgeContextIds, Edge->ContextIds );
14111450 }
14121451 // Node can have more context ids than callers if some contexts terminate at
1413- // node and some are longer. If we are allowing recursive callsites but
1414- // haven't disabled recursive contexts, this will be violated for
1415- // incompletely cloned recursive cycles, so skip the checking in that case.
1452+ // node and some are longer. If we are allowing recursive callsites and
1453+ // contexts this will be violated for incompletely cloned recursive cycles,
1454+ // so skip the checking in that case.
14161455 assert ((AllowRecursiveCallsites && AllowRecursiveContexts) ||
14171456 NodeContextIds == CallerEdgeContextIds ||
14181457 set_is_subset (CallerEdgeContextIds, NodeContextIds));
@@ -1425,7 +1464,11 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
14251464 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
14261465 set_union (CalleeEdgeContextIds, Edge->getContextIds ());
14271466 }
1428- assert (NodeContextIds == CalleeEdgeContextIds);
1467+ // If we are allowing recursive callsites and contexts this will be violated
1468+ // for incompletely cloned recursive cycles, so skip the checking in that
1469+ // case.
1470+ assert ((AllowRecursiveCallsites && AllowRecursiveContexts) ||
1471+ NodeContextIds == CalleeEdgeContextIds);
14291472 }
14301473 // FIXME: Since this checking is only invoked under an option, we should
14311474 // change the error checking from using assert to something that will trigger
@@ -3134,6 +3177,12 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
31343177 // over to the corresponding edge into the clone (which is created here if
31353178 // this is a newly created clone).
31363179 for (auto &OldCalleeEdge : OldCallee->CalleeEdges ) {
3180+ ContextNode *CalleeToUse = OldCalleeEdge->Callee ;
3181+ // If this is a direct recursion edge, use NewCallee (the clone) as the
3182+ // callee as well, so that any edge updated/created here is also direct
3183+ // recursive.
3184+ if (CalleeToUse == OldCallee)
3185+ CalleeToUse = NewCallee;
31373186 // The context ids moving to the new callee are the subset of this edge's
31383187 // context ids and the context ids on the caller edge being moved.
31393188 DenseSet<uint32_t > EdgeContextIdsToMove =
@@ -3147,17 +3196,16 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
31473196 // clone, specifically during function assignment, where we would have
31483197 // removed none type edges after creating the clone. If we can't find
31493198 // a corresponding edge there, fall through to the cloning below.
3150- if (auto *NewCalleeEdge =
3151- NewCallee->findEdgeFromCallee (OldCalleeEdge->Callee )) {
3199+ if (auto *NewCalleeEdge = NewCallee->findEdgeFromCallee (CalleeToUse)) {
31523200 NewCalleeEdge->getContextIds ().insert (EdgeContextIdsToMove.begin (),
31533201 EdgeContextIdsToMove.end ());
31543202 NewCalleeEdge->AllocTypes |= computeAllocType (EdgeContextIdsToMove);
31553203 continue ;
31563204 }
31573205 }
31583206 auto NewEdge = std::make_shared<ContextEdge>(
3159- OldCalleeEdge-> Callee , NewCallee,
3160- computeAllocType (EdgeContextIdsToMove), EdgeContextIdsToMove);
3207+ CalleeToUse , NewCallee, computeAllocType (EdgeContextIdsToMove) ,
3208+ EdgeContextIdsToMove);
31613209 NewCallee->CalleeEdges .push_back (NewEdge);
31623210 NewEdge->Callee ->CallerEdges .push_back (NewEdge);
31633211 }
@@ -3183,13 +3231,20 @@ template <typename DerivedCCG, typename FuncTy, typename CallTy>
31833231void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
31843232 moveCalleeEdgeToNewCaller (const std::shared_ptr<ContextEdge> &Edge,
31853233 ContextNode *NewCaller) {
3234+ auto *OldCallee = Edge->Callee ;
3235+ auto *NewCallee = OldCallee;
3236+ // If this edge was direct recursive, make any new/updated edge also direct
3237+ // recursive to NewCaller.
3238+ bool Recursive = Edge->Caller == Edge->Callee ;
3239+ if (Recursive)
3240+ NewCallee = NewCaller;
31863241
31873242 ContextNode *OldCaller = Edge->Caller ;
31883243 OldCaller->eraseCalleeEdge (Edge.get ());
31893244
31903245 // We might already have an edge to the new caller. If one exists we will
31913246 // reuse it.
3192- auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee (Edge-> Callee );
3247+ auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee (NewCallee );
31933248
31943249 if (ExistingEdgeToNewCaller) {
31953250 // Since we already have an edge to NewCaller, simply move the ids
@@ -3199,11 +3254,19 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
31993254 ExistingEdgeToNewCaller->AllocTypes |= Edge->AllocTypes ;
32003255 Edge->ContextIds .clear ();
32013256 Edge->AllocTypes = (uint8_t )AllocationType::None;
3202- Edge-> Callee ->eraseCallerEdge (Edge.get ());
3257+ OldCallee ->eraseCallerEdge (Edge.get ());
32033258 } else {
32043259 // Otherwise just reconnect Edge to NewCaller.
32053260 Edge->Caller = NewCaller;
32063261 NewCaller->CalleeEdges .push_back (Edge);
3262+ if (Recursive) {
3263+ assert (NewCallee == NewCaller);
3264+ // In the case of (direct) recursive edges, we update the callee as well
3265+ // so that it becomes recursive on the new caller.
3266+ Edge->Callee = NewCallee;
3267+ NewCallee->CallerEdges .push_back (Edge);
3268+ OldCallee->eraseCallerEdge (Edge.get ());
3269+ }
32073270 // Don't need to update Edge's context ids since we are simply
32083271 // reconnecting it.
32093272 }
@@ -3217,32 +3280,50 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
32173280#ifndef NDEBUG
32183281 bool IsNewNode = NewCaller->CallerEdges .empty ();
32193282#endif
3220- for (auto &OldCallerEdge : OldCaller->CallerEdges ) {
3221- // The context ids moving to the new caller are the subset of this edge's
3222- // context ids and the context ids on the callee edge being moved.
3223- DenseSet<uint32_t > EdgeContextIdsToMove =
3224- set_intersection (OldCallerEdge->getContextIds (), Edge->getContextIds ());
3225- set_subtract (OldCallerEdge->getContextIds (), EdgeContextIdsToMove);
3226- OldCallerEdge->AllocTypes =
3227- computeAllocType (OldCallerEdge->getContextIds ());
3228- // In this function we expect that any pre-existing node already has edges
3229- // from the same callers as the old node. That should be true in the current
3230- // use case, where we will remove None-type edges after copying over all
3231- // caller edges from the callee.
3232- auto *ExistingCallerEdge =
3233- NewCaller->findEdgeFromCaller (OldCallerEdge->Caller );
3234- assert (IsNewNode || ExistingCallerEdge);
3235- if (ExistingCallerEdge) {
3236- ExistingCallerEdge->getContextIds ().insert (EdgeContextIdsToMove.begin (),
3237- EdgeContextIdsToMove.end ());
3238- ExistingCallerEdge->AllocTypes |= computeAllocType (EdgeContextIdsToMove);
3239- continue ;
3283+ // If we just moved a direct recursive edge, presumably its context ids should
3284+ // also flow out of OldCaller via some other non-recursive callee edge. We
3285+ // don't want to remove the recursive context ids from other caller edges yet,
3286+ // otherwise the context ids get into an inconsistent state on OldCaller.
3287+ // We will update these context ids on the non-recursive caller edge when and
3288+ // if they are updated on the non-recursive callee.
3289+ if (!Recursive) {
3290+ for (auto &OldCallerEdge : OldCaller->CallerEdges ) {
3291+ auto OldCallerCaller = OldCallerEdge->Caller ;
3292+ // The context ids moving to the new caller are the subset of this edge's
3293+ // context ids and the context ids on the callee edge being moved.
3294+ DenseSet<uint32_t > EdgeContextIdsToMove = set_intersection (
3295+ OldCallerEdge->getContextIds (), Edge->getContextIds ());
3296+ if (OldCaller == OldCallerCaller) {
3297+ OldCallerCaller = NewCaller;
3298+ // Don't actually move this one. The caller will move it directly via a
3299+ // call to this function with this as the Edge if it is appropriate to
3300+ // move to a diff node that has a matching callee (itself).
3301+ continue ;
3302+ }
3303+ set_subtract (OldCallerEdge->getContextIds (), EdgeContextIdsToMove);
3304+ OldCallerEdge->AllocTypes =
3305+ computeAllocType (OldCallerEdge->getContextIds ());
3306+ // In this function we expect that any pre-existing node already has edges
3307+ // from the same callers as the old node. That should be true in the
3308+ // current use case, where we will remove None-type edges after copying
3309+ // over all caller edges from the callee.
3310+ auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller (OldCallerCaller);
3311+ // Since we would have skipped caller edges when moving a direct recursive
3312+ // edge, this may not hold true when recursive handling enabled.
3313+ assert (IsNewNode || ExistingCallerEdge || AllowRecursiveCallsites);
3314+ if (ExistingCallerEdge) {
3315+ ExistingCallerEdge->getContextIds ().insert (EdgeContextIdsToMove.begin (),
3316+ EdgeContextIdsToMove.end ());
3317+ ExistingCallerEdge->AllocTypes |=
3318+ computeAllocType (EdgeContextIdsToMove);
3319+ continue ;
3320+ }
3321+ auto NewEdge = std::make_shared<ContextEdge>(
3322+ NewCaller, OldCallerCaller, computeAllocType (EdgeContextIdsToMove),
3323+ EdgeContextIdsToMove);
3324+ NewCaller->CallerEdges .push_back (NewEdge);
3325+ NewEdge->Caller ->CalleeEdges .push_back (NewEdge);
32403326 }
3241- auto NewEdge = std::make_shared<ContextEdge>(
3242- NewCaller, OldCallerEdge->Caller ,
3243- computeAllocType (EdgeContextIdsToMove), EdgeContextIdsToMove);
3244- NewCaller->CallerEdges .push_back (NewEdge);
3245- NewEdge->Caller ->CalleeEdges .push_back (NewEdge);
32463327 }
32473328 // Recompute the node alloc type now that its caller edges have been
32483329 // updated (since we will compute from those edges).
@@ -3946,6 +4027,11 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
39464027 // handling and makes it less error-prone.
39474028 auto CloneCallerEdges = Clone->CallerEdges ;
39484029 for (auto &Edge : CloneCallerEdges) {
4030+ // Skip removed edges (due to direct recursive edges updated when
4031+ // updating callee edges when moving an edge and subsequently
4032+ // removed by call to removeNoneTypeCalleeEdges on the Clone).
4033+ if (Edge->isRemoved ())
4034+ continue ;
39494035 // Ignore any caller that does not have a recorded callsite Call.
39504036 if (!Edge->Caller ->hasCall ())
39514037 continue ;
0 commit comments