Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,12 @@ static cl::opt<bool> AllowRecursiveContexts(
"memprof-allow-recursive-contexts", cl::init(true), cl::Hidden,
cl::desc("Allow cloning of contexts having recursive cycles"));

// Set the minimum absolute count threshold for allowing inlining of indirect
// calls promoted during cloning.
static cl::opt<unsigned> MemProfICPNoInlineThreshold(
"memprof-icp-noinline-threshold", cl::init(2), cl::Hidden,
cl::desc("Minimum absolute count for promoted target to be inlinable"));

namespace llvm {
cl::opt<bool> EnableMemProfContextDisambiguation(
"enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
Expand Down Expand Up @@ -5573,6 +5579,15 @@ void MemProfContextDisambiguation::performICP(
.getCallee());
}
DirectCall.setCalledFunction(TargetToUse);
// During matching we generate synthetic VP metadata for indirect calls
// not already having any, from the memprof profile's callee GUIDs. If
// we subsequently promote and inline those callees, we currently lose
// the ability to generate this synthetic VP metadata. Optionally apply
// a noinline attribute to promoted direct calls, where the threshold is
// set to capture synthetic VP metadata targets which get a count of 1.
if (MemProfICPNoInlineThreshold &&

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a comment here to document why we need to do this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Candidate.Count < MemProfICPNoInlineThreshold)
DirectCall.setIsNoInline();
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
<< ore::NV("Call", CBClone) << " in clone "
<< ore::NV("Caller", CBClone->getFunction())
Expand Down
52 changes: 45 additions & 7 deletions llvm/test/ThinLTO/X86/memprof-icp-recursive.ll
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,40 @@
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \
; RUN: --check-prefix=REMARKS

; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefix=IR
; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-INLINE

;; Next, add a threshold to prevent inlining of the promoted calls which have
;; count 2 (the default threshold of 2 means they are inlinable by default).
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
; RUN: -memprof-icp-noinline-threshold=3 \
; RUN: -enable-memprof-indirect-call-support=true \
; RUN: -memprof-allow-recursive-callsites \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
; RUN: -r=%t/foo.o,_ZN2B03barEj, \
; RUN: -r=%t/foo.o,_ZN1B3barEj, \
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
; RUN: -r=%t/main.o,_Znwm, \
; RUN: -r=%t/main.o,_ZdlPvm, \
; RUN: -r=%t/main.o,_Z8externalPi, \
; RUN: -r=%t/main.o,main,plx \
; RUN: -r=%t/main.o,_ZN2B03barEj,plx \
; RUN: -r=%t/main.o,_ZN1B3barEj,plx \
; RUN: -r=%t/main.o,_ZTV1B,plx \
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv120__si_class_type_infoE,plx \
; RUN: -r=%t/main.o,_ZTS1B,plx \
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv117__class_type_infoE,plx \
; RUN: -r=%t/main.o,_ZTS2B0,plx \
; RUN: -r=%t/main.o,_ZTI2B0,plx \
; RUN: -r=%t/main.o,_ZTI1B,plx \
; RUN: -r=%t/main.o,_ZTV2B0,plx \
; RUN: -thinlto-threads=1 \
; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \
; RUN: -pass-remarks=. -save-temps \
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \
; RUN: --check-prefix=REMARKS

; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-NOINLINE

; REMARKS: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
; REMARKS: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
Expand Down Expand Up @@ -98,12 +131,14 @@
; IR: %[[R1:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj
; IR: br i1 %[[R1]], label %if.true.direct_targ, label %if.false.orig_indirect
; IR: if.true.direct_targ:
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]]
; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]]
; IR-NOINLINE: call {{.*}} @_ZN1B3barEj(ptr null, i32 0) #[[NOINLINE:[0-9]+]]
; IR: if.false.orig_indirect:
; IR: %[[R2:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj
; IR: br i1 %[[R2]], label %if.true.direct_targ1, label %if.false.orig_indirect2
; IR: if.true.direct_targ1:
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]]
; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]]
; IR-NOINLINE: call {{.*}} @_ZN2B03barEj(ptr null, i32 0) #[[NOINLINE]]
; IR: if.false.orig_indirect2:
; IR: call {{.*}} %0

Expand All @@ -114,17 +149,20 @@
; IR: %[[R3:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj
; IR: br i1 %[[R3]], label %if.true.direct_targ, label %if.false.orig_indirect
; IR: if.true.direct_targ:
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]]
; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]]
; IR-NOINLINE: call {{.*}} @_ZN1B3barEj.memprof.1(ptr null, i32 0) #[[NOINLINE]]
; IR: if.false.orig_indirect:
; IR: %[[R4:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj
; IR: br i1 %[[R4]], label %if.true.direct_targ1, label %if.false.orig_indirect2
; IR: if.true.direct_targ1:
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]]
; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]]
; IR-NOINLINE: call {{.*}} @_ZN2B03barEj.memprof.1(ptr null, i32 0) #[[NOINLINE]]
; IR: if.false.orig_indirect2:
; IR: call {{.*}} %0

; IR: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold"
; IR: attributes #[[COLD]] = {{.*}} "memprof"="cold"
; IR-INLINE: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold"
; IR-INLINE: attributes #[[COLD]] = {{.*}} "memprof"="cold"
; IR-NOINLINE: attributes #[[NOINLINE]] = { noinline }

;--- foo.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
Expand Down
Loading