@@ -97,6 +97,8 @@ STATISTIC(MissingAllocForContextId,
97
97
" Number of missing alloc nodes for context ids" );
98
98
STATISTIC (SkippedCallsCloning,
99
99
" Number of calls skipped during cloning due to unexpected operand" );
100
+ STATISTIC (MismatchedCloneAssignments,
101
+ " Number of callsites assigned to call multiple non-matching clones" );
100
102
101
103
static cl::opt<std::string> DotFilePathPrefix (
102
104
" memprof-dot-file-path-prefix" , cl::init(" " ), cl::Hidden,
@@ -2060,6 +2062,20 @@ static bool isMemProfClone(const Function &F) {
2060
2062
return F.getName ().contains (MemProfCloneSuffix);
2061
2063
}
2062
2064
2065
+ // Return the clone number of the given function by extracting it from the
2066
+ // memprof suffix. Assumes the caller has already confirmed it is a memprof
2067
+ // clone.
2068
+ static unsigned getMemProfCloneNum (const Function &F) {
2069
+ assert (isMemProfClone (F));
2070
+ auto Pos = F.getName ().find_last_of (' .' );
2071
+ assert (Pos > 0 );
2072
+ unsigned CloneNo;
2073
+ bool Err = F.getName ().drop_front (Pos + 1 ).getAsInteger (10 , CloneNo);
2074
+ assert (!Err);
2075
+ (void )Err;
2076
+ return CloneNo;
2077
+ }
2078
+
2063
2079
std::string ModuleCallsiteContextGraph::getLabel (const Function *Func,
2064
2080
const Instruction *Call,
2065
2081
unsigned CloneNo) const {
@@ -3979,7 +3995,22 @@ IndexCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const {
3979
3995
3980
3996
void ModuleCallsiteContextGraph::updateCall (CallInfo &CallerCall,
3981
3997
FuncInfo CalleeFunc) {
3982
- if (CalleeFunc.cloneNo () > 0 )
3998
+ auto *CurF = cast<CallBase>(CallerCall.call ())->getCalledFunction ();
3999
+ auto NewCalleeCloneNo = CalleeFunc.cloneNo ();
4000
+ if (isMemProfClone (*CurF)) {
4001
+ // If we already assigned this callsite to call a specific non-default
4002
+ // clone (i.e. not the original function which is clone 0), ensure that we
4003
+ // aren't trying to now update it to call a different clone, which is
4004
+ // indicative of a bug in the graph or function assignment.
4005
+ auto CurCalleeCloneNo = getMemProfCloneNum (*CurF);
4006
+ if (CurCalleeCloneNo != NewCalleeCloneNo) {
4007
+ LLVM_DEBUG (dbgs () << " Mismatch in call clone assignment: was "
4008
+ << CurCalleeCloneNo << " now " << NewCalleeCloneNo
4009
+ << " \n " );
4010
+ MismatchedCloneAssignments++;
4011
+ }
4012
+ }
4013
+ if (NewCalleeCloneNo > 0 )
3983
4014
cast<CallBase>(CallerCall.call ())->setCalledFunction (CalleeFunc.func ());
3984
4015
OREGetter (CallerCall.call ()->getFunction ())
3985
4016
.emit (OptimizationRemark (DEBUG_TYPE, " MemprofCall" , CallerCall.call ())
@@ -3995,7 +4026,19 @@ void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
3995
4026
assert (CI &&
3996
4027
" Caller cannot be an allocation which should not have profiled calls" );
3997
4028
assert (CI->Clones .size () > CallerCall.cloneNo ());
3998
- CI->Clones [CallerCall.cloneNo ()] = CalleeFunc.cloneNo ();
4029
+ auto NewCalleeCloneNo = CalleeFunc.cloneNo ();
4030
+ auto &CurCalleeCloneNo = CI->Clones [CallerCall.cloneNo ()];
4031
+ // If we already assigned this callsite to call a specific non-default
4032
+ // clone (i.e. not the original function which is clone 0), ensure that we
4033
+ // aren't trying to now update it to call a different clone, which is
4034
+ // indicative of a bug in the graph or function assignment.
4035
+ if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4036
+ LLVM_DEBUG (dbgs () << " Mismatch in call clone assignment: was "
4037
+ << CurCalleeCloneNo << " now " << NewCalleeCloneNo
4038
+ << " \n " );
4039
+ MismatchedCloneAssignments++;
4040
+ }
4041
+ CurCalleeCloneNo = NewCalleeCloneNo;
3999
4042
}
4000
4043
4001
4044
// Update the debug information attached to NewFunc to use the clone Name. Note
@@ -4703,6 +4746,19 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4703
4746
// where the callers were assigned to different clones of a function.
4704
4747
}
4705
4748
4749
+ auto FindFirstAvailFuncClone = [&]() {
4750
+ // Find first function in FuncClonesToCallMap without an assigned
4751
+ // clone of this callsite Node. We should always have one
4752
+ // available at this point due to the earlier cloning when the
4753
+ // FuncClonesToCallMap size was smaller than the clone number.
4754
+ for (auto &CF : FuncClonesToCallMap) {
4755
+ if (!FuncCloneToCurNodeCloneMap.count (CF.first ))
4756
+ return CF.first ;
4757
+ }
4758
+ assert (false &&
4759
+ " Expected an available func clone for this callsite clone" );
4760
+ };
4761
+
4706
4762
// See if we can use existing function clone. Walk through
4707
4763
// all caller edges to see if any have already been assigned to
4708
4764
// a clone of this callsite's function. If we can use it, do so. If not,
@@ -4819,16 +4875,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4819
4875
// clone of OrigFunc for another caller during this iteration over
4820
4876
// its caller edges.
4821
4877
if (!FuncCloneAssignedToCurCallsiteClone) {
4822
- // Find first function in FuncClonesToCallMap without an assigned
4823
- // clone of this callsite Node. We should always have one
4824
- // available at this point due to the earlier cloning when the
4825
- // FuncClonesToCallMap size was smaller than the clone number.
4826
- for (auto &CF : FuncClonesToCallMap) {
4827
- if (!FuncCloneToCurNodeCloneMap.count (CF.first )) {
4828
- FuncCloneAssignedToCurCallsiteClone = CF.first ;
4829
- break ;
4830
- }
4831
- }
4878
+ FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone ();
4832
4879
assert (FuncCloneAssignedToCurCallsiteClone);
4833
4880
// Assign Clone to FuncCloneAssignedToCurCallsiteClone
4834
4881
AssignCallsiteCloneToFuncClone (
@@ -4842,6 +4889,31 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4842
4889
FuncCloneAssignedToCurCallsiteClone);
4843
4890
}
4844
4891
}
4892
+ // If we didn't assign a function clone to this callsite clone yet, e.g.
4893
+ // none of its callers has a non-null call, do the assignment here.
4894
+ // We want to ensure that every callsite clone is assigned to some
4895
+ // function clone, so that the call updates below work as expected.
4896
+ // In particular if this is the original callsite, we want to ensure it
4897
+ // is assigned to the original function, otherwise the original function
4898
+ // will appear available for assignment to other callsite clones,
4899
+ // leading to unintended effects. For one, the unknown and not updated
4900
+ // callers will call into cloned paths leading to the wrong hints,
4901
+ // because they still call the original function (clone 0). Also,
4902
+ // because all callsites start out as being clone 0 by default, we can't
4903
+ // easily distinguish between callsites explicitly assigned to clone 0
4904
+ // vs those never assigned, which can lead to multiple updates of the
4905
+ // calls when invoking updateCall below, with mismatched clone values.
4906
+ // TODO: Add a flag to the callsite nodes or some other mechanism to
4907
+ // better distinguish and identify callsite clones that are not getting
4908
+ // assigned to function clones as expected.
4909
+ if (!FuncCloneAssignedToCurCallsiteClone) {
4910
+ FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone ();
4911
+ assert (FuncCloneAssignedToCurCallsiteClone &&
4912
+ " No available func clone for this callsite clone" );
4913
+ AssignCallsiteCloneToFuncClone (
4914
+ FuncCloneAssignedToCurCallsiteClone, Call, Clone,
4915
+ /* IsAlloc=*/ AllocationCallToContextNodeMap.contains (Call));
4916
+ }
4845
4917
}
4846
4918
if (VerifyCCG) {
4847
4919
checkNode<DerivedCCG, FuncTy, CallTy>(Node);
0 commit comments