Skip to content

Commit d3b8bcb

Browse files
committed
MemProf: Add minimum count threshold for inlining of promoted calls
Allow users to set the minimum absolute count for inlining of indirect calls promoted during cloning. This is primarily meant to enable generation of synthetic vp metadata introduced in PR141164 when profiling memprof-optimized binaries.
1 parent 7563531 commit d3b8bcb

File tree

2 files changed

+53
-7
lines changed

2 files changed

+53
-7
lines changed

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,12 @@ static cl::opt<bool> AllowRecursiveContexts(
181181
"memprof-allow-recursive-contexts", cl::init(true), cl::Hidden,
182182
cl::desc("Allow cloning of contexts having recursive cycles"));
183183

184+
// Set the minimum absolute count threshold for inlining of indirect calls
185+
// promoted during cloning.
186+
static cl::opt<unsigned> ICPInlineMinimumCountThreshold(
187+
"memprof-icp-inline-minimum-count-threshold", cl::init(0), cl::Hidden,
188+
cl::desc("Minimum absolute count for promoted target to be inlinable"));
189+
184190
namespace llvm {
185191
cl::opt<bool> EnableMemProfContextDisambiguation(
186192
"enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
@@ -5573,6 +5579,9 @@ void MemProfContextDisambiguation::performICP(
55735579
.getCallee());
55745580
}
55755581
DirectCall.setCalledFunction(TargetToUse);
5582+
if (ICPInlineMinimumCountThreshold &&
5583+
Candidate.Count < ICPInlineMinimumCountThreshold)
5584+
DirectCall.setIsNoInline();
55765585
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
55775586
<< ore::NV("Call", CBClone) << " in clone "
55785587
<< ore::NV("Caller", CBClone->getFunction())

llvm/test/ThinLTO/X86/memprof-icp-recursive.ll

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,39 @@
5454
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \
5555
; RUN: --check-prefix=REMARKS
5656

57-
; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefix=IR
57+
; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-INLINE
58+
59+
;; Next, add a threshold to prevent inlining of small count promoted calls.
60+
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
61+
; RUN: -memprof-icp-inline-minimum-count-threshold=3 \
62+
; RUN: -enable-memprof-indirect-call-support=true \
63+
; RUN: -memprof-allow-recursive-callsites \
64+
; RUN: -supports-hot-cold-new \
65+
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
66+
; RUN: -r=%t/foo.o,_ZN2B03barEj, \
67+
; RUN: -r=%t/foo.o,_ZN1B3barEj, \
68+
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
69+
; RUN: -r=%t/main.o,_Znwm, \
70+
; RUN: -r=%t/main.o,_ZdlPvm, \
71+
; RUN: -r=%t/main.o,_Z8externalPi, \
72+
; RUN: -r=%t/main.o,main,plx \
73+
; RUN: -r=%t/main.o,_ZN2B03barEj,plx \
74+
; RUN: -r=%t/main.o,_ZN1B3barEj,plx \
75+
; RUN: -r=%t/main.o,_ZTV1B,plx \
76+
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv120__si_class_type_infoE,plx \
77+
; RUN: -r=%t/main.o,_ZTS1B,plx \
78+
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv117__class_type_infoE,plx \
79+
; RUN: -r=%t/main.o,_ZTS2B0,plx \
80+
; RUN: -r=%t/main.o,_ZTI2B0,plx \
81+
; RUN: -r=%t/main.o,_ZTI1B,plx \
82+
; RUN: -r=%t/main.o,_ZTV2B0,plx \
83+
; RUN: -thinlto-threads=1 \
84+
; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \
85+
; RUN: -pass-remarks=. -save-temps \
86+
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \
87+
; RUN: --check-prefix=REMARKS
88+
89+
; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-NOINLINE
5890

5991
; REMARKS: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
6092
; REMARKS: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
@@ -98,12 +130,14 @@
98130
; IR: %[[R1:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj
99131
; IR: br i1 %[[R1]], label %if.true.direct_targ, label %if.false.orig_indirect
100132
; IR: if.true.direct_targ:
101-
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]]
133+
; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]]
134+
; IR-NOINLINE: call {{.*}} @_ZN1B3barEj(ptr null, i32 0) #[[NOINLINE:[0-9]+]]
102135
; IR: if.false.orig_indirect:
103136
; IR: %[[R2:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj
104137
; IR: br i1 %[[R2]], label %if.true.direct_targ1, label %if.false.orig_indirect2
105138
; IR: if.true.direct_targ1:
106-
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]]
139+
; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]]
140+
; IR-NOINLINE: call {{.*}} @_ZN2B03barEj(ptr null, i32 0) #[[NOINLINE]]
107141
; IR: if.false.orig_indirect2:
108142
; IR: call {{.*}} %0
109143

@@ -114,17 +148,20 @@
114148
; IR: %[[R3:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj
115149
; IR: br i1 %[[R3]], label %if.true.direct_targ, label %if.false.orig_indirect
116150
; IR: if.true.direct_targ:
117-
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]]
151+
; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]]
152+
; IR-NOINLINE: call {{.*}} @_ZN1B3barEj.memprof.1(ptr null, i32 0) #[[NOINLINE]]
118153
; IR: if.false.orig_indirect:
119154
; IR: %[[R4:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj
120155
; IR: br i1 %[[R4]], label %if.true.direct_targ1, label %if.false.orig_indirect2
121156
; IR: if.true.direct_targ1:
122-
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]]
157+
; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]]
158+
; IR-NOINLINE: call {{.*}} @_ZN2B03barEj.memprof.1(ptr null, i32 0) #[[NOINLINE]]
123159
; IR: if.false.orig_indirect2:
124160
; IR: call {{.*}} %0
125161

126-
; IR: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold"
127-
; IR: attributes #[[COLD]] = {{.*}} "memprof"="cold"
162+
; IR-INLINE: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold"
163+
; IR-INLINE: attributes #[[COLD]] = {{.*}} "memprof"="cold"
164+
; IR-NOINLINE: attributes #[[NOINLINE]] = { noinline }
128165

129166
;--- foo.ll
130167
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"

0 commit comments

Comments
 (0)