@@ -97,6 +97,8 @@ STATISTIC(MissingAllocForContextId,
9797 " Number of missing alloc nodes for context ids" );
9898STATISTIC (SkippedCallsCloning,
9999 " Number of calls skipped during cloning due to unexpected operand" );
100+ STATISTIC (MismatchedCloneAssignments,
101+ " Number of callsites assigned to call multiple non-matching clones" );
100102
101103static cl::opt<std::string> DotFilePathPrefix (
102104 " memprof-dot-file-path-prefix" , cl::init(" " ), cl::Hidden,
@@ -2060,6 +2062,20 @@ static bool isMemProfClone(const Function &F) {
20602062 return F.getName ().contains (MemProfCloneSuffix);
20612063}
20622064
2065+ // Return the clone number of the given function by extracting it from the
2066+ // memprof suffix. Assumes the caller has already confirmed it is a memprof
2067+ // clone.
2068+ static unsigned getMemProfCloneNum (const Function &F) {
2069+ assert (isMemProfClone (F));
2070+ auto Pos = F.getName ().find_last_of (' .' );
2071+ assert (Pos > 0 );
2072+ unsigned CloneNo;
2073+ bool Err = F.getName ().drop_front (Pos + 1 ).getAsInteger (10 , CloneNo);
2074+ assert (!Err);
2075+ (void )Err;
2076+ return CloneNo;
2077+ }
2078+
20632079std::string ModuleCallsiteContextGraph::getLabel (const Function *Func,
20642080 const Instruction *Call,
20652081 unsigned CloneNo) const {
@@ -3979,7 +3995,22 @@ IndexCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const {
39793995
39803996void ModuleCallsiteContextGraph::updateCall (CallInfo &CallerCall,
39813997 FuncInfo CalleeFunc) {
3982- if (CalleeFunc.cloneNo () > 0 )
3998+ auto *CurF = cast<CallBase>(CallerCall.call ())->getCalledFunction ();
3999+ auto NewCalleeCloneNo = CalleeFunc.cloneNo ();
4000+ if (isMemProfClone (*CurF)) {
4001+ // If we already assigned this callsite to call a specific non-default
4002+ // clone (i.e. not the original function which is clone 0), ensure that we
4003+ // aren't trying to now update it to call a different clone, which is
4004+ // indicative of a bug in the graph or function assignment.
4005+ auto CurCalleeCloneNo = getMemProfCloneNum (*CurF);
4006+ if (CurCalleeCloneNo != NewCalleeCloneNo) {
4007+ LLVM_DEBUG (dbgs () << " Mismatch in call clone assignment: was "
4008+ << CurCalleeCloneNo << " now " << NewCalleeCloneNo
4009+ << " \n " );
4010+ MismatchedCloneAssignments++;
4011+ }
4012+ }
4013+ if (NewCalleeCloneNo > 0 )
39834014 cast<CallBase>(CallerCall.call ())->setCalledFunction (CalleeFunc.func ());
39844015 OREGetter (CallerCall.call ()->getFunction ())
39854016 .emit (OptimizationRemark (DEBUG_TYPE, " MemprofCall" , CallerCall.call ())
@@ -3995,7 +4026,19 @@ void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
39954026 assert (CI &&
39964027 " Caller cannot be an allocation which should not have profiled calls" );
39974028 assert (CI->Clones .size () > CallerCall.cloneNo ());
3998- CI->Clones [CallerCall.cloneNo ()] = CalleeFunc.cloneNo ();
4029+ auto NewCalleeCloneNo = CalleeFunc.cloneNo ();
4030+ auto &CurCalleeCloneNo = CI->Clones [CallerCall.cloneNo ()];
4031+ // If we already assigned this callsite to call a specific non-default
4032+ // clone (i.e. not the original function which is clone 0), ensure that we
4033+ // aren't trying to now update it to call a different clone, which is
4034+ // indicative of a bug in the graph or function assignment.
4035+ if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4036+ LLVM_DEBUG (dbgs () << " Mismatch in call clone assignment: was "
4037+ << CurCalleeCloneNo << " now " << NewCalleeCloneNo
4038+ << " \n " );
4039+ MismatchedCloneAssignments++;
4040+ }
4041+ CurCalleeCloneNo = NewCalleeCloneNo;
39994042}
40004043
40014044// Update the debug information attached to NewFunc to use the clone Name. Note
@@ -4703,6 +4746,19 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
47034746 // where the callers were assigned to different clones of a function.
47044747 }
47054748
4749+ auto FindFirstAvailFuncClone = [&]() {
4750+ // Find first function in FuncClonesToCallMap without an assigned
4751+ // clone of this callsite Node. We should always have one
4752+ // available at this point due to the earlier cloning when the
4753+ // FuncClonesToCallMap size was smaller than the clone number.
4754+ for (auto &CF : FuncClonesToCallMap) {
4755+ if (!FuncCloneToCurNodeCloneMap.count (CF.first ))
4756+ return CF.first ;
4757+ }
4758+ assert (false &&
4759+ " Expected an available func clone for this callsite clone" );
4760+ };
4761+
47064762 // See if we can use existing function clone. Walk through
47074763 // all caller edges to see if any have already been assigned to
47084764 // a clone of this callsite's function. If we can use it, do so. If not,
@@ -4819,16 +4875,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
48194875 // clone of OrigFunc for another caller during this iteration over
48204876 // its caller edges.
48214877 if (!FuncCloneAssignedToCurCallsiteClone) {
4822- // Find first function in FuncClonesToCallMap without an assigned
4823- // clone of this callsite Node. We should always have one
4824- // available at this point due to the earlier cloning when the
4825- // FuncClonesToCallMap size was smaller than the clone number.
4826- for (auto &CF : FuncClonesToCallMap) {
4827- if (!FuncCloneToCurNodeCloneMap.count (CF.first )) {
4828- FuncCloneAssignedToCurCallsiteClone = CF.first ;
4829- break ;
4830- }
4831- }
4878+ FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone ();
48324879 assert (FuncCloneAssignedToCurCallsiteClone);
48334880 // Assign Clone to FuncCloneAssignedToCurCallsiteClone
48344881 AssignCallsiteCloneToFuncClone (
@@ -4842,6 +4889,31 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
48424889 FuncCloneAssignedToCurCallsiteClone);
48434890 }
48444891 }
4892+ // If we didn't assign a function clone to this callsite clone yet, e.g.
4893+ // none of its callers has a non-null call, do the assignment here.
4894+ // We want to ensure that every callsite clone is assigned to some
4895+ // function clone, so that the call updates below work as expected.
4896+ // In particular if this is the original callsite, we want to ensure it
4897+ // is assigned to the original function, otherwise the original function
4898+ // will appear available for assignment to other callsite clones,
4899+ // leading to unintended effects. For one, the unknown and not updated
4900+ // callers will call into cloned paths leading to the wrong hints,
4901+ // because they still call the original function (clone 0). Also,
4902+ // because all callsites start out as being clone 0 by default, we can't
4903+ // easily distinguish between callsites explicitly assigned to clone 0
4904+ // vs those never assigned, which can lead to multiple updates of the
4905+ // calls when invoking updateCall below, with mismatched clone values.
4906+ // TODO: Add a flag to the callsite nodes or some other mechanism to
4907+ // better distinguish and identify callsite clones that are not getting
4908+ // assigned to function clones as expected.
4909+ if (!FuncCloneAssignedToCurCallsiteClone) {
4910+ FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone ();
4911+ assert (FuncCloneAssignedToCurCallsiteClone &&
4912+ " No available func clone for this callsite clone" );
4913+ AssignCallsiteCloneToFuncClone (
4914+ FuncCloneAssignedToCurCallsiteClone, Call, Clone,
4915+ /* IsAlloc=*/ AllocationCallToContextNodeMap.contains (Call));
4916+ }
48454917 }
48464918 if (VerifyCCG) {
48474919 checkNode<DerivedCCG, FuncTy, CallTy>(Node);
0 commit comments