@@ -4526,6 +4526,16 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4526
4526
// If Clone not already assigned to a function clone:
4527
4527
// Assign to first function clone without assignment
4528
4528
// Assign caller to selected function clone
4529
+ // For each call with graph Node having clones:
4530
+ // If number func clones > number call's callsite Node clones:
4531
+ // Record func CallInfo clones without Node clone in UnassignedCallClones
4532
+ // For callsite Nodes in DFS order from allocations:
4533
+ // If IsAllocation:
4534
+ // Update allocation with alloc type
4535
+ // Else:
4536
+ // For Call, all MatchingCalls, and associated UnnassignedCallClones:
4537
+ // Update call to call recorded callee clone
4538
+ //
4529
4539
template <typename DerivedCCG, typename FuncTy, typename CallTy>
4530
4540
bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4531
4541
bool Changed = false ;
@@ -4553,6 +4563,34 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4553
4563
DenseMap<CallInfo, CallInfo> CallMap;
4554
4564
};
4555
4565
4566
+ // Map to keep track of information needed to update calls in function clones
4567
+ // when their corresponding callsite node was not itself cloned for that
4568
+ // function clone. Because of call context pruning (i.e. we only keep as much
4569
+ // caller information as needed to distinguish hot vs cold), we may not have
4570
+ // caller edges coming to each callsite node from all possible function
4571
+ // callers. A function clone may get created for other callsites in the
4572
+ // function for which there are caller edges that were not pruned. Any other
4573
+ // callsites in that function clone, which were not themselved cloned for
4574
+ // that function clone, should get updated the same way as the corresponding
4575
+ // callsite in the original function (which may call a clone of its callee).
4576
+ //
4577
+ // We build this map after completing function cloning for each function, so
4578
+ // that we can record the information from its call maps before they are
4579
+ // destructed. The map will be used as we update calls to update any still
4580
+ // unassigned call clones. Note that we may create new node clones as we clone
4581
+ // other functions, so later on we check which node clones were still not
4582
+ // created. To this end, the inner map is a map from function clone number to
4583
+ // the list of calls cloned for that function (can be more than one due to the
4584
+ // Node's MatchingCalls array).
4585
+ //
4586
+ // The alternative is creating new callsite clone nodes below as we clone the
4587
+ // function, but that is tricker to get right and likely more overhead.
4588
+ //
4589
+ // Inner map is a std::map so sorted by key (clone number), in order to get
4590
+ // ordered remarks in the full LTO case.
4591
+ DenseMap<const ContextNode *, std::map<unsigned , SmallVector<CallInfo, 0 >>>
4592
+ UnassignedCallClones;
4593
+
4556
4594
// Walk all functions for which we saw calls with memprof metadata, and handle
4557
4595
// cloning for each of its calls.
4558
4596
for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
@@ -4996,6 +5034,63 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4996
5034
}
4997
5035
}
4998
5036
}
5037
+
5038
+ if (FuncCloneInfos.size () < 2 )
5039
+ continue ;
5040
+
5041
+ // In this case there is more than just the original function copy.
5042
+ // Record call clones of any callsite nodes in the function that did not
5043
+ // themselves get cloned for all of the function clones.
5044
+ for (auto &Call : CallsWithMetadata) {
5045
+ ContextNode *Node = getNodeForInst (Call);
5046
+ if (!Node || !Node->hasCall () || Node->emptyContextIds ())
5047
+ continue ;
5048
+ // If Node has enough clones already to cover all function clones, we can
5049
+ // skip it. Need to add one for the original copy.
5050
+ // Use >= in case there were clones that were skipped due to having empty
5051
+ // context ids
5052
+ if (Node->Clones .size () + 1 >= FuncCloneInfos.size ())
5053
+ continue ;
5054
+ // First collect all function clones we cloned this callsite node for.
5055
+ // They may not be sequential due to empty clones e.g.
5056
+ DenseSet<unsigned > NodeCallClones;
5057
+ for (auto *C : Node->Clones )
5058
+ NodeCallClones.insert (C->Call .cloneNo ());
5059
+ unsigned I = 0 ;
5060
+ // Now check all the function clones.
5061
+ for (auto &FC : FuncCloneInfos) {
5062
+ // Function clones should be sequential.
5063
+ assert (FC.FuncClone .cloneNo () == I);
5064
+ // Skip the first clone which got the original call.
5065
+ // Also skip any other clones created for this Node.
5066
+ if (++I == 1 || NodeCallClones.contains (I)) {
5067
+ continue ;
5068
+ }
5069
+ // Record the call clones created for this callsite in this function
5070
+ // clone.
5071
+ auto &CallVector = UnassignedCallClones[Node][I];
5072
+ DenseMap<CallInfo, CallInfo> &CallMap = FC.CallMap ;
5073
+ if (auto It = CallMap.find (Call); It != CallMap.end ()) {
5074
+ CallInfo CallClone = It->second ;
5075
+ CallVector.push_back (CallClone);
5076
+ } else {
5077
+ // All but the original clone (skipped earlier) should have an entry
5078
+ // for all calls.
5079
+ assert (false && " Expected to find call in CallMap" );
5080
+ }
5081
+ // Need to do the same for all matching calls.
5082
+ for (auto &MatchingCall : Node->MatchingCalls ) {
5083
+ if (auto It = CallMap.find (MatchingCall); It != CallMap.end ()) {
5084
+ CallInfo CallClone = It->second ;
5085
+ CallVector.push_back (CallClone);
5086
+ } else {
5087
+ // All but the original clone (skipped earlier) should have an entry
5088
+ // for all calls.
5089
+ assert (false && " Expected to find call in CallMap" );
5090
+ }
5091
+ }
5092
+ }
5093
+ }
4999
5094
}
5000
5095
5001
5096
uint8_t BothTypes =
@@ -5057,6 +5152,26 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
5057
5152
// Update all the matching calls as well.
5058
5153
for (auto &Call : Node->MatchingCalls )
5059
5154
updateCall (Call, CalleeFunc);
5155
+
5156
+ // Now update all calls recorded earlier that are still in function clones
5157
+ // which don't have a clone of this callsite node.
5158
+ if (!UnassignedCallClones.contains (Node))
5159
+ return ;
5160
+ DenseSet<unsigned > NodeCallClones;
5161
+ for (auto *C : Node->Clones )
5162
+ NodeCallClones.insert (C->Call .cloneNo ());
5163
+ // Note that we already confirmed Node is in this map a few lines above.
5164
+ auto &ClonedCalls = UnassignedCallClones[Node];
5165
+ for (auto &[CloneNo, CallVector] : ClonedCalls) {
5166
+ // Should start at 1 as we never create an entry for original node.
5167
+ assert (CloneNo > 0 );
5168
+ // If we subsequently created a clone, skip this one.
5169
+ if (NodeCallClones.contains (CloneNo))
5170
+ continue ;
5171
+ // Use the original Node's CalleeFunc.
5172
+ for (auto &Call : CallVector)
5173
+ updateCall (Call, CalleeFunc);
5174
+ }
5060
5175
};
5061
5176
5062
5177
// Performs DFS traversal starting from allocation nodes to update calls to
0 commit comments