-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[MemProf] Ensure all callsite clones are assigned a function clone #150735
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -97,6 +97,8 @@ STATISTIC(MissingAllocForContextId, | |
| "Number of missing alloc nodes for context ids"); | ||
| STATISTIC(SkippedCallsCloning, | ||
| "Number of calls skipped during cloning due to unexpected operand"); | ||
| STATISTIC(MismatchedCloneAssignments, | ||
| "Number of callsites assigned to call multiple non-matching clones"); | ||
|
|
||
| static cl::opt<std::string> DotFilePathPrefix( | ||
| "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, | ||
|
|
@@ -2060,6 +2062,18 @@ static bool isMemProfClone(const Function &F) { | |
| return F.getName().contains(MemProfCloneSuffix); | ||
| } | ||
|
|
||
| // Return the clone number of the given function by extracting it from the | ||
| // memprof suffix. Assumes the caller has already confirmed it is a memprof | ||
| // clone. | ||
| static unsigned getMemProfCloneNum(const Function &F) { | ||
| assert(isMemProfClone(F)); | ||
| auto Pos = F.getName().find_last_of('.'); | ||
| assert(Pos > 0); | ||
| unsigned CloneNo; | ||
| F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo); | ||
| return CloneNo; | ||
| } | ||
|
|
||
| std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, | ||
| const Instruction *Call, | ||
| unsigned CloneNo) const { | ||
|
|
@@ -3979,7 +3993,22 @@ IndexCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const { | |
|
|
||
| void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall, | ||
| FuncInfo CalleeFunc) { | ||
| if (CalleeFunc.cloneNo() > 0) | ||
| auto *CurF = cast<CallBase>(CallerCall.call())->getCalledFunction(); | ||
| auto NewCalleeCloneNo = CalleeFunc.cloneNo(); | ||
| if (isMemProfClone(*CurF)) { | ||
| // If we already assigned this callsite to call a specific non-default | ||
| // clone (i.e. not the original function which is clone 0), ensure that we | ||
| // aren't trying to now update it to call a different clone, which is | ||
| // indicative of a bug in the graph or function assignment. | ||
| auto CurCalleeCloneNo = getMemProfCloneNum(*CurF); | ||
| if (CurCalleeCloneNo != NewCalleeCloneNo) { | ||
| LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was " | ||
| << CurCalleeCloneNo << " now " << NewCalleeCloneNo | ||
| << "\n"); | ||
| MismatchedCloneAssignments++; | ||
| } | ||
| } | ||
| if (NewCalleeCloneNo > 0) | ||
| cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func()); | ||
| OREGetter(CallerCall.call()->getFunction()) | ||
| .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call()) | ||
|
|
@@ -3995,7 +4024,19 @@ void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall, | |
| assert(CI && | ||
| "Caller cannot be an allocation which should not have profiled calls"); | ||
| assert(CI->Clones.size() > CallerCall.cloneNo()); | ||
| CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo(); | ||
| auto NewCalleeCloneNo = CalleeFunc.cloneNo(); | ||
| auto CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()]; | ||
|
||
| // If we already assigned this callsite to call a specific non-default | ||
| // clone (i.e. not the original function which is clone 0), ensure that we | ||
| // aren't trying to now update it to call a different clone, which is | ||
| // indicative of a bug in the graph or function assignment. | ||
| if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) { | ||
| LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was " | ||
| << CurCalleeCloneNo << " now " << NewCalleeCloneNo | ||
| << "\n"); | ||
| MismatchedCloneAssignments++; | ||
| } | ||
| CI->Clones[CallerCall.cloneNo()] = NewCalleeCloneNo; | ||
| } | ||
|
|
||
| // Update the debug information attached to NewFunc to use the clone Name. Note | ||
|
|
@@ -4703,6 +4744,18 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { | |
| // where the callers were assigned to different clones of a function. | ||
| } | ||
|
|
||
| auto FindFirstAvailFuncClone = [&]() { | ||
| // Find first function in FuncClonesToCallMap without an assigned | ||
| // clone of this callsite Node. We should always have one | ||
| // available at this point due to the earlier cloning when the | ||
| // FuncClonesToCallMap size was smaller than the clone number. | ||
| for (auto &CF : FuncClonesToCallMap) { | ||
| if (!FuncCloneToCurNodeCloneMap.count(CF.first)) | ||
| return CF.first; | ||
| } | ||
| assert(false); | ||
snehasish marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| }; | ||
|
|
||
| // See if we can use existing function clone. Walk through | ||
| // all caller edges to see if any have already been assigned to | ||
| // a clone of this callsite's function. If we can use it, do so. If not, | ||
|
|
@@ -4819,16 +4872,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { | |
| // clone of OrigFunc for another caller during this iteration over | ||
| // its caller edges. | ||
| if (!FuncCloneAssignedToCurCallsiteClone) { | ||
| // Find first function in FuncClonesToCallMap without an assigned | ||
| // clone of this callsite Node. We should always have one | ||
| // available at this point due to the earlier cloning when the | ||
| // FuncClonesToCallMap size was smaller than the clone number. | ||
| for (auto &CF : FuncClonesToCallMap) { | ||
| if (!FuncCloneToCurNodeCloneMap.count(CF.first)) { | ||
| FuncCloneAssignedToCurCallsiteClone = CF.first; | ||
| break; | ||
| } | ||
| } | ||
| FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone(); | ||
| assert(FuncCloneAssignedToCurCallsiteClone); | ||
| // Assign Clone to FuncCloneAssignedToCurCallsiteClone | ||
| AssignCallsiteCloneToFuncClone( | ||
|
|
@@ -4842,6 +4886,27 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { | |
| FuncCloneAssignedToCurCallsiteClone); | ||
| } | ||
| } | ||
| // If we didn't assign a function clone to this callsite clone yet, e.g. | ||
| // none of its callers has a non-null call, do the assignment here. | ||
| // We want to ensure that every callsite clone is assigned to some | ||
| // function clone, so that the call updates below work as expected. | ||
| // In particular if this is the original callsite, we want to ensure it | ||
| // is assigned to the original function, otherwise the original function | ||
| // will appear available for assignment to other callsite clones, | ||
| // leading to unintended effects. For one, the unknown and not updated | ||
| // callers will call into cloned paths leading to the wrong hints, | ||
| // because they still call the original function (clone 0). Also, | ||
| // because all callsites start out as being clone 0 by default, we can't | ||
| // easily distinguish between callsites explicitly assigned to clone 0 | ||
| // vs those never assigned, which can lead to multiple updates of the | ||
|
Comment on lines
+4903
to
+4904
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe if we used a struct instead of an integer to indicate the clone number as value in the map, we could hold additional data about whether it was assigned along with the assigned clone number. Would that simplify any of the logic here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not completely sure which map you are referring to, but I started out making a change to add a bool in the ContextNode as to whether its callsite had been assigned to a func clone, but that turns out to be tricky to get right, and this was more targeted and simpler to get right as a fix for the specific problem I was looking at. In particular, we want callsites that we weren't able to update callers for to get assigned to clone 0, which this change ensures. I've added a TODO though to look at adding a mechanism to better identify and distinguish callsite clones that aren't getting assigned to any clone. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wasn't referring to any particular map, rather some way of noting that the callsite has been assigned. Your response about adding it in ContextNode was helpful. Adding a TODO for now sounds good. |
||
| // calls when invoking updateCall below, with mismatched clone values. | ||
| if (!FuncCloneAssignedToCurCallsiteClone) { | ||
| FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone(); | ||
| assert(FuncCloneAssignedToCurCallsiteClone); | ||
snehasish marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| AssignCallsiteCloneToFuncClone( | ||
| FuncCloneAssignedToCurCallsiteClone, Call, Clone, | ||
| AllocationCallToContextNodeMap.count(Call)); | ||
snehasish marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
| if (VerifyCCG) { | ||
| checkNode<DerivedCCG, FuncTy, CallTy>(Node); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,145 @@ | ||
| ;; Make sure we assign the original callsite to a function clone (which will be | ||
| ;; the original function clone), even when we cannot update its caller (due to | ||
| ;; missing metadata e.g. from mismatched profiles). Otherwise we will try to use | ||
| ;; the original function for a different clone, leading to confusion later when | ||
| ;; rewriting the calls. | ||
|
|
||
| ;; -stats requires asserts | ||
| ; REQUIRES: asserts | ||
|
|
||
| ; RUN: opt -thinlto-bc %s >%t.o | ||
| ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ | ||
| ; RUN: -supports-hot-cold-new \ | ||
| ; RUN: -r=%t.o,A,plx \ | ||
| ; RUN: -r=%t.o,B,plx \ | ||
| ; RUN: -r=%t.o,C,plx \ | ||
| ; RUN: -r=%t.o,D,plx \ | ||
| ; RUN: -r=%t.o,E,plx \ | ||
| ; RUN: -r=%t.o,F,plx \ | ||
| ; RUN: -r=%t.o,G,plx \ | ||
| ; RUN: -r=%t.o,A1,plx \ | ||
| ; RUN: -r=%t.o,B1,plx \ | ||
| ; RUN: -r=%t.o,_Znwm, \ | ||
| ; RUN: -memprof-verify-ccg -memprof-verify-nodes -debug-only=memprof-context-disambiguation \ | ||
| ; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \ | ||
| ; RUN: -o %t.out 2>&1 | FileCheck %s \ | ||
| ; RUN: --implicit-check-not="Mismatch in call clone assignment" \ | ||
| ; RUN: --implicit-check-not="Number of callsites assigned to call multiple non-matching clones" | ||
|
|
||
| ; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR | ||
|
|
||
| ; ModuleID = '<stdin>' | ||
| source_filename = "reduced.ll" | ||
| target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" | ||
| target triple = "x86_64-grtev4-linux-gnu" | ||
|
|
||
| ; IR-LABEL: define dso_local void @A() | ||
| define void @A() #0 { | ||
| ; IR: call void @C() | ||
| call void @C() | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @B() | ||
| define void @B() #0 { | ||
| ; IR: call void @C.memprof.1() | ||
| call void @C(), !callsite !1 | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @C() | ||
| define void @C() #0 { | ||
| ; IR: call void @F() | ||
| call void @F(), !callsite !16 | ||
| ; IR: call void @D() | ||
| call void @D(), !callsite !2 | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @D() | ||
| define void @D() #0 { | ||
| ; IR: call void @E() | ||
| call void @E(), !callsite !3 | ||
| ; IR: call void @G() | ||
| call void @G(), !callsite !17 | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @E() | ||
| define void @E() #0 { | ||
| ; IR: call ptr @_Znwm(i64 0) #[[NOTCOLD:[0-9]+]] | ||
| %1 = call ptr @_Znwm(i64 0), !memprof !4, !callsite !9 | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @F() | ||
| define void @F() #0 { | ||
| ; IR: call void @G() | ||
| call void @G(), !callsite !17 | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @G() | ||
| define void @G() #0 { | ||
| ; IR: call ptr @_Znwm(i64 0) #[[NOTCOLD]] | ||
| %2 = call ptr @_Znwm(i64 0), !memprof !10, !callsite !15 | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @A1() | ||
| define void @A1() #0 { | ||
| ; IR: call void @C() | ||
| call void @C(), !callsite !18 | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @B1() | ||
| define void @B1() #0 { | ||
| ; IR: call void @C.memprof.1() | ||
| call void @C(), !callsite !19 | ||
| ret void | ||
| } | ||
|
|
||
| ; IR-LABEL: define dso_local void @C.memprof.1() | ||
| ; IR: call void @F.memprof.1() | ||
| ; IR: call void @D.memprof.1() | ||
|
|
||
| ; IR-LABEL: define dso_local void @D.memprof.1() | ||
| ; IR: call void @E.memprof.1() | ||
| ; IR: call void @G() | ||
|
|
||
| ; IR-LABEL: define dso_local void @E.memprof.1() | ||
| ; IR: call ptr @_Znwm(i64 0) #[[COLD:[0-9]+]] | ||
|
|
||
| ; IR-LABEL: define dso_local void @F.memprof.1() | ||
| ; IR: call void @G.memprof.1() | ||
|
|
||
| ; IR-LABEL: define dso_local void @G.memprof.1() | ||
| ; IR: call ptr @_Znwm(i64 0) #[[COLD]] | ||
|
|
||
| declare ptr @_Znwm(i64) | ||
|
|
||
| attributes #0 = { noinline optnone } | ||
| ; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" } | ||
| ; IR: attributes #[[COLD]] = { "memprof"="cold" } | ||
|
|
||
| !0 = !{i64 123} | ||
| !1 = !{i64 234} | ||
| !2 = !{i64 345} | ||
| !3 = !{i64 456} | ||
| !4 = !{!5, !7} | ||
| !5 = !{!6, !"notcold"} | ||
| !6 = !{i64 567, i64 456, i64 345, i64 123} | ||
| !7 = !{!8, !"cold"} | ||
| !8 = !{i64 567, i64 456, i64 345, i64 234} | ||
| !9 = !{i64 567} | ||
| !10 = !{!11, !13} | ||
| !11 = !{!12, !"notcold"} | ||
| !12 = !{i64 678, i64 891, i64 789, i64 912} | ||
| !13 = !{!14, !"cold"} | ||
| !14 = !{i64 678, i64 891, i64 789, i64 812} | ||
| !15 = !{i64 678} | ||
| !16 = !{i64 789} | ||
| !17 = !{i64 891} | ||
| !18 = !{i64 912} | ||
| !19 = !{i64 812} |
Uh oh!
There was an error while loading. Please reload this page.