@@ -4526,6 +4526,16 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
45264526// If Clone not already assigned to a function clone:
45274527// Assign to first function clone without assignment
45284528// Assign caller to selected function clone
4529+ // For each call with graph Node having clones:
4530+ // If number func clones > number call's callsite Node clones:
4531+ // Record func CallInfo clones without Node clone in UnassignedCallClones
4532+ // For callsite Nodes in DFS order from allocations:
4533+ // If IsAllocation:
4534+ // Update allocation with alloc type
4535+ // Else:
4536+ // For Call, all MatchingCalls, and associated UnnassignedCallClones:
4537+ // Update call to call recorded callee clone
4538+ //
45294539template <typename DerivedCCG, typename FuncTy, typename CallTy>
45304540bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
45314541 bool Changed = false ;
@@ -4553,6 +4563,34 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
45534563 DenseMap<CallInfo, CallInfo> CallMap;
45544564 };
45554565
4566+ // Map to keep track of information needed to update calls in function clones
4567+ // when their corresponding callsite node was not itself cloned for that
4568+ // function clone. Because of call context pruning (i.e. we only keep as much
4569+ // caller information as needed to distinguish hot vs cold), we may not have
4570+ // caller edges coming to each callsite node from all possible function
4571+ // callers. A function clone may get created for other callsites in the
4572+ // function for which there are caller edges that were not pruned. Any other
4573+ // callsites in that function clone, which were not themselved cloned for
4574+ // that function clone, should get updated the same way as the corresponding
4575+ // callsite in the original function (which may call a clone of its callee).
4576+ //
4577+ // We build this map after completing function cloning for each function, so
4578+ // that we can record the information from its call maps before they are
4579+ // destructed. The map will be used as we update calls to update any still
4580+ // unassigned call clones. Note that we may create new node clones as we clone
4581+ // other functions, so later on we check which node clones were still not
4582+ // created. To this end, the inner map is a map from function clone number to
4583+ // the list of calls cloned for that function (can be more than one due to the
4584+ // Node's MatchingCalls array).
4585+ //
4586+ // The alternative is creating new callsite clone nodes below as we clone the
4587+ // function, but that is tricker to get right and likely more overhead.
4588+ //
4589+ // Inner map is a std::map so sorted by key (clone number), in order to get
4590+ // ordered remarks in the full LTO case.
4591+ DenseMap<const ContextNode *, std::map<unsigned , SmallVector<CallInfo, 0 >>>
4592+ UnassignedCallClones;
4593+
45564594 // Walk all functions for which we saw calls with memprof metadata, and handle
45574595 // cloning for each of its calls.
45584596 for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
@@ -4996,6 +5034,63 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
49965034 }
49975035 }
49985036 }
5037+
5038+ if (FuncCloneInfos.size () < 2 )
5039+ continue ;
5040+
5041+ // In this case there is more than just the original function copy.
5042+ // Record call clones of any callsite nodes in the function that did not
5043+ // themselves get cloned for all of the function clones.
5044+ for (auto &Call : CallsWithMetadata) {
5045+ ContextNode *Node = getNodeForInst (Call);
5046+ if (!Node || !Node->hasCall () || Node->emptyContextIds ())
5047+ continue ;
5048+ // If Node has enough clones already to cover all function clones, we can
5049+ // skip it. Need to add one for the original copy.
5050+ // Use >= in case there were clones that were skipped due to having empty
5051+ // context ids
5052+ if (Node->Clones .size () + 1 >= FuncCloneInfos.size ())
5053+ continue ;
5054+ // First collect all function clones we cloned this callsite node for.
5055+ // They may not be sequential due to empty clones e.g.
5056+ DenseSet<unsigned > NodeCallClones;
5057+ for (auto *C : Node->Clones )
5058+ NodeCallClones.insert (C->Call .cloneNo ());
5059+ unsigned I = 0 ;
5060+ // Now check all the function clones.
5061+ for (auto &FC : FuncCloneInfos) {
5062+ // Function clones should be sequential.
5063+ assert (FC.FuncClone .cloneNo () == I);
5064+ // Skip the first clone which got the original call.
5065+ // Also skip any other clones created for this Node.
5066+ if (++I == 1 || NodeCallClones.contains (I)) {
5067+ continue ;
5068+ }
5069+ // Record the call clones created for this callsite in this function
5070+ // clone.
5071+ auto &CallVector = UnassignedCallClones[Node][I];
5072+ DenseMap<CallInfo, CallInfo> &CallMap = FC.CallMap ;
5073+ if (auto It = CallMap.find (Call); It != CallMap.end ()) {
5074+ CallInfo CallClone = It->second ;
5075+ CallVector.push_back (CallClone);
5076+ } else {
5077+ // All but the original clone (skipped earlier) should have an entry
5078+ // for all calls.
5079+ assert (false && " Expected to find call in CallMap" );
5080+ }
5081+ // Need to do the same for all matching calls.
5082+ for (auto &MatchingCall : Node->MatchingCalls ) {
5083+ if (auto It = CallMap.find (MatchingCall); It != CallMap.end ()) {
5084+ CallInfo CallClone = It->second ;
5085+ CallVector.push_back (CallClone);
5086+ } else {
5087+ // All but the original clone (skipped earlier) should have an entry
5088+ // for all calls.
5089+ assert (false && " Expected to find call in CallMap" );
5090+ }
5091+ }
5092+ }
5093+ }
49995094 }
50005095
50015096 uint8_t BothTypes =
@@ -5057,6 +5152,26 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
50575152 // Update all the matching calls as well.
50585153 for (auto &Call : Node->MatchingCalls )
50595154 updateCall (Call, CalleeFunc);
5155+
5156+ // Now update all calls recorded earlier that are still in function clones
5157+ // which don't have a clone of this callsite node.
5158+ if (!UnassignedCallClones.contains (Node))
5159+ return ;
5160+ DenseSet<unsigned > NodeCallClones;
5161+ for (auto *C : Node->Clones )
5162+ NodeCallClones.insert (C->Call .cloneNo ());
5163+ // Note that we already confirmed Node is in this map a few lines above.
5164+ auto &ClonedCalls = UnassignedCallClones[Node];
5165+ for (auto &[CloneNo, CallVector] : ClonedCalls) {
5166+ // Should start at 1 as we never create an entry for original node.
5167+ assert (CloneNo > 0 );
5168+ // If we subsequently created a clone, skip this one.
5169+ if (NodeCallClones.contains (CloneNo))
5170+ continue ;
5171+ // Use the original Node's CalleeFunc.
5172+ for (auto &Call : CallVector)
5173+ updateCall (Call, CalleeFunc);
5174+ }
50605175 };
50615176
50625177 // Performs DFS traversal starting from allocation nodes to update calls to
0 commit comments