Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,12 @@ static cl::opt<bool> AllowRecursiveContexts(
"memprof-allow-recursive-contexts", cl::init(true), cl::Hidden,
cl::desc("Allow cloning of contexts having recursive cycles"));

// Set the minimum absolute count threshold for inlining of indirect calls
// promoted during cloning.
static cl::opt<unsigned> ICPInlineMinimumCountThreshold(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer a var name with MemProf in there somewhere to distinguish it from general ICP knobs. Also this var allows inlining but doesn't guarantee it. How about renaming it to -- MemProfICPNoInlineThreshold?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

"memprof-icp-inline-minimum-count-threshold", cl::init(0), cl::Hidden,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we set the default value to 2 so that the synthetic calls we insert don't get inlined?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea, done

cl::desc("Minimum absolute count for promoted target to be inlinable"));

namespace llvm {
cl::opt<bool> EnableMemProfContextDisambiguation(
"enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
Expand Down Expand Up @@ -5573,6 +5579,9 @@ void MemProfContextDisambiguation::performICP(
.getCallee());
}
DirectCall.setCalledFunction(TargetToUse);
if (ICPInlineMinimumCountThreshold &&
Candidate.Count < ICPInlineMinimumCountThreshold)
DirectCall.setIsNoInline();
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
<< ore::NV("Call", CBClone) << " in clone "
<< ore::NV("Caller", CBClone->getFunction())
Expand Down
51 changes: 44 additions & 7 deletions llvm/test/ThinLTO/X86/memprof-icp-recursive.ll
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,39 @@
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \
; RUN: --check-prefix=REMARKS

; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefix=IR
; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-INLINE

;; Next, add a threshold to prevent inlining of small count promoted calls.
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
; RUN: -memprof-icp-inline-minimum-count-threshold=3 \
; RUN: -enable-memprof-indirect-call-support=true \
; RUN: -memprof-allow-recursive-callsites \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
; RUN: -r=%t/foo.o,_ZN2B03barEj, \
; RUN: -r=%t/foo.o,_ZN1B3barEj, \
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
; RUN: -r=%t/main.o,_Znwm, \
; RUN: -r=%t/main.o,_ZdlPvm, \
; RUN: -r=%t/main.o,_Z8externalPi, \
; RUN: -r=%t/main.o,main,plx \
; RUN: -r=%t/main.o,_ZN2B03barEj,plx \
; RUN: -r=%t/main.o,_ZN1B3barEj,plx \
; RUN: -r=%t/main.o,_ZTV1B,plx \
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv120__si_class_type_infoE,plx \
; RUN: -r=%t/main.o,_ZTS1B,plx \
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv117__class_type_infoE,plx \
; RUN: -r=%t/main.o,_ZTS2B0,plx \
; RUN: -r=%t/main.o,_ZTI2B0,plx \
; RUN: -r=%t/main.o,_ZTI1B,plx \
; RUN: -r=%t/main.o,_ZTV2B0,plx \
; RUN: -thinlto-threads=1 \
; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \
; RUN: -pass-remarks=. -save-temps \
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \
; RUN: --check-prefix=REMARKS

; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-NOINLINE

; REMARKS: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
; REMARKS: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
Expand Down Expand Up @@ -98,12 +130,14 @@
; IR: %[[R1:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj
; IR: br i1 %[[R1]], label %if.true.direct_targ, label %if.false.orig_indirect
; IR: if.true.direct_targ:
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]]
; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]]
; IR-NOINLINE: call {{.*}} @_ZN1B3barEj(ptr null, i32 0) #[[NOINLINE:[0-9]+]]
; IR: if.false.orig_indirect:
; IR: %[[R2:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj
; IR: br i1 %[[R2]], label %if.true.direct_targ1, label %if.false.orig_indirect2
; IR: if.true.direct_targ1:
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]]
; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]]
; IR-NOINLINE: call {{.*}} @_ZN2B03barEj(ptr null, i32 0) #[[NOINLINE]]
; IR: if.false.orig_indirect2:
; IR: call {{.*}} %0

Expand All @@ -114,17 +148,20 @@
; IR: %[[R3:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj
; IR: br i1 %[[R3]], label %if.true.direct_targ, label %if.false.orig_indirect
; IR: if.true.direct_targ:
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]]
; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]]
; IR-NOINLINE: call {{.*}} @_ZN1B3barEj.memprof.1(ptr null, i32 0) #[[NOINLINE]]
; IR: if.false.orig_indirect:
; IR: %[[R4:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj
; IR: br i1 %[[R4]], label %if.true.direct_targ1, label %if.false.orig_indirect2
; IR: if.true.direct_targ1:
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]]
; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]]
; IR-NOINLINE: call {{.*}} @_ZN2B03barEj.memprof.1(ptr null, i32 0) #[[NOINLINE]]
; IR: if.false.orig_indirect2:
; IR: call {{.*}} %0

; IR: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold"
; IR: attributes #[[COLD]] = {{.*}} "memprof"="cold"
; IR-INLINE: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold"
; IR-INLINE: attributes #[[COLD]] = {{.*}} "memprof"="cold"
; IR-NOINLINE: attributes #[[NOINLINE]] = { noinline }

;--- foo.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
Expand Down
Loading