@@ -97,6 +97,8 @@ STATISTIC(MissingAllocForContextId,
9797 " Number of missing alloc nodes for context ids" );
9898STATISTIC (SkippedCallsCloning,
9999 " Number of calls skipped during cloning due to unexpected operand" );
100+ STATISTIC (MismatchedCloneAssignments,
101+ " Number of callsites assigned to call multiple non-matching clones" );
100102
101103static cl::opt<std::string> DotFilePathPrefix (
102104 " memprof-dot-file-path-prefix" , cl::init(" " ), cl::Hidden,
@@ -2060,6 +2062,18 @@ static bool isMemProfClone(const Function &F) {
20602062 return F.getName ().contains (MemProfCloneSuffix);
20612063}
20622064
2065+ // Return the clone number of the given function by extracting it from the
2066+ // memprof suffix. Assumes the caller has already confirmed it is a memprof
2067+ // clone.
2068+ static unsigned getMemProfCloneNum (const Function &F) {
2069+ assert (isMemProfClone (F));
2070+ auto Pos = F.getName ().find_last_of (' .' );
2071+ assert (Pos > 0 );
2072+ unsigned CloneNo;
2073+ F.getName ().drop_front (Pos + 1 ).getAsInteger (10 , CloneNo);
2074+ return CloneNo;
2075+ }
2076+
20632077std::string ModuleCallsiteContextGraph::getLabel (const Function *Func,
20642078 const Instruction *Call,
20652079 unsigned CloneNo) const {
@@ -3979,7 +3993,22 @@ IndexCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const {
39793993
39803994void ModuleCallsiteContextGraph::updateCall (CallInfo &CallerCall,
39813995 FuncInfo CalleeFunc) {
3982- if (CalleeFunc.cloneNo () > 0 )
3996+ auto *CurF = cast<CallBase>(CallerCall.call ())->getCalledFunction ();
3997+ auto NewCalleeCloneNo = CalleeFunc.cloneNo ();
3998+ if (isMemProfClone (*CurF)) {
3999+ // If we already assigned this callsite to call a specific non-default
4000+ // clone (i.e. not the original function which is clone 0), ensure that we
4001+ // aren't trying to now update it to call a different clone, which is
4002+ // indicative of a bug in the graph or function assignment.
4003+ auto CurCalleeCloneNo = getMemProfCloneNum (*CurF);
4004+ if (CurCalleeCloneNo != NewCalleeCloneNo) {
4005+ LLVM_DEBUG (dbgs () << " Mismatch in call clone assignment: was "
4006+ << CurCalleeCloneNo << " now " << NewCalleeCloneNo
4007+ << " \n " );
4008+ MismatchedCloneAssignments++;
4009+ }
4010+ }
4011+ if (NewCalleeCloneNo > 0 )
39834012 cast<CallBase>(CallerCall.call ())->setCalledFunction (CalleeFunc.func ());
39844013 OREGetter (CallerCall.call ()->getFunction ())
39854014 .emit (OptimizationRemark (DEBUG_TYPE, " MemprofCall" , CallerCall.call ())
@@ -3995,7 +4024,19 @@ void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
39954024 assert (CI &&
39964025 " Caller cannot be an allocation which should not have profiled calls" );
39974026 assert (CI->Clones .size () > CallerCall.cloneNo ());
3998- CI->Clones [CallerCall.cloneNo ()] = CalleeFunc.cloneNo ();
4027+ auto NewCalleeCloneNo = CalleeFunc.cloneNo ();
4028+ auto CurCalleeCloneNo = CI->Clones [CallerCall.cloneNo ()];
4029+ // If we already assigned this callsite to call a specific non-default
4030+ // clone (i.e. not the original function which is clone 0), ensure that we
4031+ // aren't trying to now update it to call a different clone, which is
4032+ // indicative of a bug in the graph or function assignment.
4033+ if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4034+ LLVM_DEBUG (dbgs () << " Mismatch in call clone assignment: was "
4035+ << CurCalleeCloneNo << " now " << NewCalleeCloneNo
4036+ << " \n " );
4037+ MismatchedCloneAssignments++;
4038+ }
4039+ CI->Clones [CallerCall.cloneNo ()] = NewCalleeCloneNo;
39994040}
40004041
40014042// Update the debug information attached to NewFunc to use the clone Name. Note
@@ -4703,6 +4744,18 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
47034744 // where the callers were assigned to different clones of a function.
47044745 }
47054746
4747+ auto FindFirstAvailFuncClone = [&]() {
4748+ // Find first function in FuncClonesToCallMap without an assigned
4749+ // clone of this callsite Node. We should always have one
4750+ // available at this point due to the earlier cloning when the
4751+ // FuncClonesToCallMap size was smaller than the clone number.
4752+ for (auto &CF : FuncClonesToCallMap) {
4753+ if (!FuncCloneToCurNodeCloneMap.count (CF.first ))
4754+ return CF.first ;
4755+ }
4756+ assert (false );
4757+ };
4758+
47064759 // See if we can use existing function clone. Walk through
47074760 // all caller edges to see if any have already been assigned to
47084761 // a clone of this callsite's function. If we can use it, do so. If not,
@@ -4819,16 +4872,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
48194872 // clone of OrigFunc for another caller during this iteration over
48204873 // its caller edges.
48214874 if (!FuncCloneAssignedToCurCallsiteClone) {
4822- // Find first function in FuncClonesToCallMap without an assigned
4823- // clone of this callsite Node. We should always have one
4824- // available at this point due to the earlier cloning when the
4825- // FuncClonesToCallMap size was smaller than the clone number.
4826- for (auto &CF : FuncClonesToCallMap) {
4827- if (!FuncCloneToCurNodeCloneMap.count (CF.first )) {
4828- FuncCloneAssignedToCurCallsiteClone = CF.first ;
4829- break ;
4830- }
4831- }
4875+ FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone ();
48324876 assert (FuncCloneAssignedToCurCallsiteClone);
48334877 // Assign Clone to FuncCloneAssignedToCurCallsiteClone
48344878 AssignCallsiteCloneToFuncClone (
@@ -4842,6 +4886,27 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
48424886 FuncCloneAssignedToCurCallsiteClone);
48434887 }
48444888 }
4889+ // If we didn't assign a function clone to this callsite clone yet, e.g.
4890+ // none of its callers has a non-null call, do the assignment here.
4891+ // We want to ensure that every callsite clone is assigned to some
4892+ // function clone, so that the call updates below work as expected.
4893+ // In particular if this is the original callsite, we want to ensure it
4894+ // is assigned to the original function, otherwise the original function
4895+ // will appear available for assignment to other callsite clones,
4896+ // leading to unintended effects. For one, the unknown and not updated
4897+ // callers will call into cloned paths leading to the wrong hints,
4898+ // because they still call the original function (clone 0). Also,
4899+ // because all callsites start out as being clone 0 by default, we can't
4900+ // easily distinguish between callsites explicitly assigned to clone 0
4901+ // vs those never assigned, which can lead to multiple updates of the
4902+ // calls when invoking updateCall below, with mismatched clone values.
4903+ if (!FuncCloneAssignedToCurCallsiteClone) {
4904+ FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone ();
4905+ assert (FuncCloneAssignedToCurCallsiteClone);
4906+ AssignCallsiteCloneToFuncClone (
4907+ FuncCloneAssignedToCurCallsiteClone, Call, Clone,
4908+ AllocationCallToContextNodeMap.count (Call));
4909+ }
48454910 }
48464911 if (VerifyCCG) {
48474912 checkNode<DerivedCCG, FuncTy, CallTy>(Node);
0 commit comments