Skip to content

Commit 0f2484a

Browse files
[MemProf] Ensure all callsite clones are assigned a function clone (#150735)
Fix a bug in function assignment where we were not assigning all callsite clones to a function clone. This led to incorrect call updates because multiple callsite clones could look like they were assigned to the same function clone. Add in a stat and debug message to help identify and debug cases where this is still happening.
1 parent 03dc2a4 commit 0f2484a

File tree

3 files changed

+359
-12
lines changed

3 files changed

+359
-12
lines changed

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 84 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ STATISTIC(MissingAllocForContextId,
9797
"Number of missing alloc nodes for context ids");
9898
STATISTIC(SkippedCallsCloning,
9999
"Number of calls skipped during cloning due to unexpected operand");
100+
STATISTIC(MismatchedCloneAssignments,
101+
"Number of callsites assigned to call multiple non-matching clones");
100102

101103
static cl::opt<std::string> DotFilePathPrefix(
102104
"memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
@@ -2060,6 +2062,20 @@ static bool isMemProfClone(const Function &F) {
20602062
return F.getName().contains(MemProfCloneSuffix);
20612063
}
20622064

2065+
// Return the clone number of the given function by extracting it from the
2066+
// memprof suffix. Assumes the caller has already confirmed it is a memprof
2067+
// clone.
2068+
static unsigned getMemProfCloneNum(const Function &F) {
2069+
assert(isMemProfClone(F));
2070+
auto Pos = F.getName().find_last_of('.');
2071+
assert(Pos > 0);
2072+
unsigned CloneNo;
2073+
bool Err = F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2074+
assert(!Err);
2075+
(void)Err;
2076+
return CloneNo;
2077+
}
2078+
20632079
std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
20642080
const Instruction *Call,
20652081
unsigned CloneNo) const {
@@ -3979,7 +3995,22 @@ IndexCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const {
39793995

39803996
void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
39813997
FuncInfo CalleeFunc) {
3982-
if (CalleeFunc.cloneNo() > 0)
3998+
auto *CurF = cast<CallBase>(CallerCall.call())->getCalledFunction();
3999+
auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4000+
if (isMemProfClone(*CurF)) {
4001+
// If we already assigned this callsite to call a specific non-default
4002+
// clone (i.e. not the original function which is clone 0), ensure that we
4003+
// aren't trying to now update it to call a different clone, which is
4004+
// indicative of a bug in the graph or function assignment.
4005+
auto CurCalleeCloneNo = getMemProfCloneNum(*CurF);
4006+
if (CurCalleeCloneNo != NewCalleeCloneNo) {
4007+
LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was "
4008+
<< CurCalleeCloneNo << " now " << NewCalleeCloneNo
4009+
<< "\n");
4010+
MismatchedCloneAssignments++;
4011+
}
4012+
}
4013+
if (NewCalleeCloneNo > 0)
39834014
cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
39844015
OREGetter(CallerCall.call()->getFunction())
39854016
.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call())
@@ -3995,7 +4026,19 @@ void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
39954026
assert(CI &&
39964027
"Caller cannot be an allocation which should not have profiled calls");
39974028
assert(CI->Clones.size() > CallerCall.cloneNo());
3998-
CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
4029+
auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4030+
auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4031+
// If we already assigned this callsite to call a specific non-default
4032+
// clone (i.e. not the original function which is clone 0), ensure that we
4033+
// aren't trying to now update it to call a different clone, which is
4034+
// indicative of a bug in the graph or function assignment.
4035+
if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4036+
LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was "
4037+
<< CurCalleeCloneNo << " now " << NewCalleeCloneNo
4038+
<< "\n");
4039+
MismatchedCloneAssignments++;
4040+
}
4041+
CurCalleeCloneNo = NewCalleeCloneNo;
39994042
}
40004043

40014044
// Update the debug information attached to NewFunc to use the clone Name. Note
@@ -4703,6 +4746,19 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
47034746
// where the callers were assigned to different clones of a function.
47044747
}
47054748

4749+
auto FindFirstAvailFuncClone = [&]() {
4750+
// Find first function in FuncClonesToCallMap without an assigned
4751+
// clone of this callsite Node. We should always have one
4752+
// available at this point due to the earlier cloning when the
4753+
// FuncClonesToCallMap size was smaller than the clone number.
4754+
for (auto &CF : FuncClonesToCallMap) {
4755+
if (!FuncCloneToCurNodeCloneMap.count(CF.first))
4756+
return CF.first;
4757+
}
4758+
assert(false &&
4759+
"Expected an available func clone for this callsite clone");
4760+
};
4761+
47064762
// See if we can use existing function clone. Walk through
47074763
// all caller edges to see if any have already been assigned to
47084764
// a clone of this callsite's function. If we can use it, do so. If not,
@@ -4819,16 +4875,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
48194875
// clone of OrigFunc for another caller during this iteration over
48204876
// its caller edges.
48214877
if (!FuncCloneAssignedToCurCallsiteClone) {
4822-
// Find first function in FuncClonesToCallMap without an assigned
4823-
// clone of this callsite Node. We should always have one
4824-
// available at this point due to the earlier cloning when the
4825-
// FuncClonesToCallMap size was smaller than the clone number.
4826-
for (auto &CF : FuncClonesToCallMap) {
4827-
if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
4828-
FuncCloneAssignedToCurCallsiteClone = CF.first;
4829-
break;
4830-
}
4831-
}
4878+
FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
48324879
assert(FuncCloneAssignedToCurCallsiteClone);
48334880
// Assign Clone to FuncCloneAssignedToCurCallsiteClone
48344881
AssignCallsiteCloneToFuncClone(
@@ -4842,6 +4889,31 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
48424889
FuncCloneAssignedToCurCallsiteClone);
48434890
}
48444891
}
4892+
// If we didn't assign a function clone to this callsite clone yet, e.g.
4893+
// none of its callers has a non-null call, do the assignment here.
4894+
// We want to ensure that every callsite clone is assigned to some
4895+
// function clone, so that the call updates below work as expected.
4896+
// In particular if this is the original callsite, we want to ensure it
4897+
// is assigned to the original function, otherwise the original function
4898+
// will appear available for assignment to other callsite clones,
4899+
// leading to unintended effects. For one, the unknown and not updated
4900+
// callers will call into cloned paths leading to the wrong hints,
4901+
// because they still call the original function (clone 0). Also,
4902+
// because all callsites start out as being clone 0 by default, we can't
4903+
// easily distinguish between callsites explicitly assigned to clone 0
4904+
// vs those never assigned, which can lead to multiple updates of the
4905+
// calls when invoking updateCall below, with mismatched clone values.
4906+
// TODO: Add a flag to the callsite nodes or some other mechanism to
4907+
// better distinguish and identify callsite clones that are not getting
4908+
// assigned to function clones as expected.
4909+
if (!FuncCloneAssignedToCurCallsiteClone) {
4910+
FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
4911+
assert(FuncCloneAssignedToCurCallsiteClone &&
4912+
"No available func clone for this callsite clone");
4913+
AssignCallsiteCloneToFuncClone(
4914+
FuncCloneAssignedToCurCallsiteClone, Call, Clone,
4915+
/*IsAlloc=*/AllocationCallToContextNodeMap.contains(Call));
4916+
}
48454917
}
48464918
if (VerifyCCG) {
48474919
checkNode<DerivedCCG, FuncTy, CallTy>(Node);
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
;; Make sure we assign the original callsite to a function clone (which will be
2+
;; the original function clone), even when we cannot update its caller (due to
3+
;; missing metadata e.g. from mismatched profiles). Otherwise we will try to use
4+
;; the original function for a different clone, leading to confusion later when
5+
;; rewriting the calls.
6+
7+
;; -stats requires asserts
8+
; REQUIRES: asserts
9+
10+
; RUN: opt -thinlto-bc %s >%t.o
11+
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
12+
; RUN: -supports-hot-cold-new \
13+
; RUN: -r=%t.o,A,plx \
14+
; RUN: -r=%t.o,B,plx \
15+
; RUN: -r=%t.o,C,plx \
16+
; RUN: -r=%t.o,D,plx \
17+
; RUN: -r=%t.o,E,plx \
18+
; RUN: -r=%t.o,F,plx \
19+
; RUN: -r=%t.o,G,plx \
20+
; RUN: -r=%t.o,A1,plx \
21+
; RUN: -r=%t.o,B1,plx \
22+
; RUN: -r=%t.o,_Znwm, \
23+
; RUN: -memprof-verify-ccg -memprof-verify-nodes -debug-only=memprof-context-disambiguation \
24+
; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \
25+
; RUN: -o %t.out 2>&1 | FileCheck %s \
26+
; RUN: --implicit-check-not="Mismatch in call clone assignment" \
27+
; RUN: --implicit-check-not="Number of callsites assigned to call multiple non-matching clones"
28+
29+
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
30+
31+
; ModuleID = '<stdin>'
32+
source_filename = "reduced.ll"
33+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
34+
target triple = "x86_64-grtev4-linux-gnu"
35+
36+
; IR-LABEL: define dso_local void @A()
37+
define void @A() #0 {
38+
; IR: call void @C()
39+
call void @C()
40+
ret void
41+
}
42+
43+
; IR-LABEL: define dso_local void @B()
44+
define void @B() #0 {
45+
; IR: call void @C.memprof.1()
46+
call void @C(), !callsite !1
47+
ret void
48+
}
49+
50+
; IR-LABEL: define dso_local void @C()
51+
define void @C() #0 {
52+
; IR: call void @F()
53+
call void @F(), !callsite !16
54+
; IR: call void @D()
55+
call void @D(), !callsite !2
56+
ret void
57+
}
58+
59+
; IR-LABEL: define dso_local void @D()
60+
define void @D() #0 {
61+
; IR: call void @E()
62+
call void @E(), !callsite !3
63+
; IR: call void @G()
64+
call void @G(), !callsite !17
65+
ret void
66+
}
67+
68+
; IR-LABEL: define dso_local void @E()
69+
define void @E() #0 {
70+
; IR: call ptr @_Znwm(i64 0) #[[NOTCOLD:[0-9]+]]
71+
%1 = call ptr @_Znwm(i64 0), !memprof !4, !callsite !9
72+
ret void
73+
}
74+
75+
; IR-LABEL: define dso_local void @F()
76+
define void @F() #0 {
77+
; IR: call void @G()
78+
call void @G(), !callsite !17
79+
ret void
80+
}
81+
82+
; IR-LABEL: define dso_local void @G()
83+
define void @G() #0 {
84+
; IR: call ptr @_Znwm(i64 0) #[[NOTCOLD]]
85+
%2 = call ptr @_Znwm(i64 0), !memprof !10, !callsite !15
86+
ret void
87+
}
88+
89+
; IR-LABEL: define dso_local void @A1()
90+
define void @A1() #0 {
91+
; IR: call void @C()
92+
call void @C(), !callsite !18
93+
ret void
94+
}
95+
96+
; IR-LABEL: define dso_local void @B1()
97+
define void @B1() #0 {
98+
; IR: call void @C.memprof.1()
99+
call void @C(), !callsite !19
100+
ret void
101+
}
102+
103+
; IR-LABEL: define dso_local void @C.memprof.1()
104+
; IR: call void @F.memprof.1()
105+
; IR: call void @D.memprof.1()
106+
107+
; IR-LABEL: define dso_local void @D.memprof.1()
108+
; IR: call void @E.memprof.1()
109+
; IR: call void @G()
110+
111+
; IR-LABEL: define dso_local void @E.memprof.1()
112+
; IR: call ptr @_Znwm(i64 0) #[[COLD:[0-9]+]]
113+
114+
; IR-LABEL: define dso_local void @F.memprof.1()
115+
; IR: call void @G.memprof.1()
116+
117+
; IR-LABEL: define dso_local void @G.memprof.1()
118+
; IR: call ptr @_Znwm(i64 0) #[[COLD]]
119+
120+
declare ptr @_Znwm(i64)
121+
122+
attributes #0 = { noinline optnone }
123+
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
124+
; IR: attributes #[[COLD]] = { "memprof"="cold" }
125+
126+
!0 = !{i64 123}
127+
!1 = !{i64 234}
128+
!2 = !{i64 345}
129+
!3 = !{i64 456}
130+
!4 = !{!5, !7}
131+
!5 = !{!6, !"notcold"}
132+
!6 = !{i64 567, i64 456, i64 345, i64 123}
133+
!7 = !{!8, !"cold"}
134+
!8 = !{i64 567, i64 456, i64 345, i64 234}
135+
!9 = !{i64 567}
136+
!10 = !{!11, !13}
137+
!11 = !{!12, !"notcold"}
138+
!12 = !{i64 678, i64 891, i64 789, i64 912}
139+
!13 = !{!14, !"cold"}
140+
!14 = !{i64 678, i64 891, i64 789, i64 812}
141+
!15 = !{i64 678}
142+
!16 = !{i64 789}
143+
!17 = !{i64 891}
144+
!18 = !{i64 912}
145+
!19 = !{i64 812}

0 commit comments

Comments
 (0)