From d3b8bcbcedb12eef9a8db787192defbf73d913a1 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Thu, 10 Jul 2025 09:56:30 -0700 Subject: [PATCH 1/3] MemProf: Add minimum count threshold for inlining of promoted calls Allow users to set the minimum absolute count for inlining of indirect calls promoted during cloning. This is primarily meant to enable generation of synthetic vp metadata introduced in PR141164 when profiling memprof-optimized binaries. --- .../IPO/MemProfContextDisambiguation.cpp | 9 ++++ .../test/ThinLTO/X86/memprof-icp-recursive.ll | 51 ++++++++++++++++--- 2 files changed, 53 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index c0f84456d2b27..3a7c3cd773b0a 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -181,6 +181,12 @@ static cl::opt AllowRecursiveContexts( "memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles")); +// Set the minimum absolute count threshold for inlining of indirect calls +// promoted during cloning. +static cl::opt ICPInlineMinimumCountThreshold( + "memprof-icp-inline-minimum-count-threshold", cl::init(0), cl::Hidden, + cl::desc("Minimum absolute count for promoted target to be inlinable")); + namespace llvm { cl::opt EnableMemProfContextDisambiguation( "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden, @@ -5573,6 +5579,9 @@ void MemProfContextDisambiguation::performICP( .getCallee()); } DirectCall.setCalledFunction(TargetToUse); + if (ICPInlineMinimumCountThreshold && + Candidate.Count < ICPInlineMinimumCountThreshold) + DirectCall.setIsNoInline(); ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone) << ore::NV("Call", CBClone) << " in clone " << ore::NV("Caller", CBClone->getFunction()) diff --git a/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll b/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll index f8dcd80d4e141..9e2f6e6130829 100644 --- a/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll +++ b/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll @@ -54,7 +54,39 @@ ; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \ ; RUN: --check-prefix=REMARKS -; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefix=IR +; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-INLINE + +;; Next, add a threshold to prevent inlining of small count promoted calls. +; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \ +; RUN: -memprof-icp-inline-minimum-count-threshold=3 \ +; RUN: -enable-memprof-indirect-call-support=true \ +; RUN: -memprof-allow-recursive-callsites \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \ +; RUN: -r=%t/foo.o,_ZN2B03barEj, \ +; RUN: -r=%t/foo.o,_ZN1B3barEj, \ +; RUN: -r=%t/main.o,_Z3fooR2B0j, \ +; RUN: -r=%t/main.o,_Znwm, \ +; RUN: -r=%t/main.o,_ZdlPvm, \ +; RUN: -r=%t/main.o,_Z8externalPi, \ +; RUN: -r=%t/main.o,main,plx \ +; RUN: -r=%t/main.o,_ZN2B03barEj,plx \ +; RUN: -r=%t/main.o,_ZN1B3barEj,plx \ +; RUN: -r=%t/main.o,_ZTV1B,plx \ +; RUN: -r=%t/main.o,_ZTVN10__cxxabiv120__si_class_type_infoE,plx \ +; RUN: -r=%t/main.o,_ZTS1B,plx \ +; RUN: -r=%t/main.o,_ZTVN10__cxxabiv117__class_type_infoE,plx \ +; RUN: -r=%t/main.o,_ZTS2B0,plx \ +; RUN: -r=%t/main.o,_ZTI2B0,plx \ +; RUN: -r=%t/main.o,_ZTI1B,plx \ +; RUN: -r=%t/main.o,_ZTV2B0,plx \ +; RUN: -thinlto-threads=1 \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ +; RUN: -pass-remarks=. -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \ +; RUN: --check-prefix=REMARKS + +; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-NOINLINE ; REMARKS: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1 ; REMARKS: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1 @@ -98,12 +130,14 @@ ; IR: %[[R1:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj ; IR: br i1 %[[R1]], label %if.true.direct_targ, label %if.false.orig_indirect ; IR: if.true.direct_targ: -; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]] +; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]] +; IR-NOINLINE: call {{.*}} @_ZN1B3barEj(ptr null, i32 0) #[[NOINLINE:[0-9]+]] ; IR: if.false.orig_indirect: ; IR: %[[R2:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj ; IR: br i1 %[[R2]], label %if.true.direct_targ1, label %if.false.orig_indirect2 ; IR: if.true.direct_targ1: -; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]] +; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]] +; IR-NOINLINE: call {{.*}} @_ZN2B03barEj(ptr null, i32 0) #[[NOINLINE]] ; IR: if.false.orig_indirect2: ; IR: call {{.*}} %0 @@ -114,17 +148,20 @@ ; IR: %[[R3:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj ; IR: br i1 %[[R3]], label %if.true.direct_targ, label %if.false.orig_indirect ; IR: if.true.direct_targ: -; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]] +; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]] +; IR-NOINLINE: call {{.*}} @_ZN1B3barEj.memprof.1(ptr null, i32 0) #[[NOINLINE]] ; IR: if.false.orig_indirect: ; IR: %[[R4:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj ; IR: br i1 %[[R4]], label %if.true.direct_targ1, label %if.false.orig_indirect2 ; IR: if.true.direct_targ1: -; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]] +; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]] +; IR-NOINLINE: call {{.*}} @_ZN2B03barEj.memprof.1(ptr null, i32 0) #[[NOINLINE]] ; IR: if.false.orig_indirect2: ; IR: call {{.*}} %0 -; IR: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold" -; IR: attributes #[[COLD]] = {{.*}} "memprof"="cold" +; IR-INLINE: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold" +; IR-INLINE: attributes #[[COLD]] = {{.*}} "memprof"="cold" +; IR-NOINLINE: attributes #[[NOINLINE]] = { noinline } ;--- foo.ll target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" From 49aaca2abdc5dd7628fb918daf6342bf49a40630 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Thu, 10 Jul 2025 12:36:48 -0700 Subject: [PATCH 2/3] Address comments --- .../Transforms/IPO/MemProfContextDisambiguation.cpp | 12 ++++++------ llvm/test/ThinLTO/X86/memprof-icp-recursive.ll | 5 +++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 3a7c3cd773b0a..15961bcb2972b 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -181,10 +181,10 @@ static cl::opt AllowRecursiveContexts( "memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles")); -// Set the minimum absolute count threshold for inlining of indirect calls -// promoted during cloning. -static cl::opt ICPInlineMinimumCountThreshold( - "memprof-icp-inline-minimum-count-threshold", cl::init(0), cl::Hidden, +// Set the minimum absolute count threshold for allowing inlining of indirect +// calls promoted during cloning. +static cl::opt MemProfICPNoInlineThreshold( + "memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable")); namespace llvm { @@ -5579,8 +5579,8 @@ void MemProfContextDisambiguation::performICP( .getCallee()); } DirectCall.setCalledFunction(TargetToUse); - if (ICPInlineMinimumCountThreshold && - Candidate.Count < ICPInlineMinimumCountThreshold) + if (MemProfICPNoInlineThreshold && + Candidate.Count < MemProfICPNoInlineThreshold) DirectCall.setIsNoInline(); ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone) << ore::NV("Call", CBClone) << " in clone " diff --git a/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll b/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll index 9e2f6e6130829..3394efd52a3ba 100644 --- a/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll +++ b/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll @@ -56,9 +56,10 @@ ; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-INLINE -;; Next, add a threshold to prevent inlining of small count promoted calls. +;; Next, add a threshold to prevent inlining of the promoted calls which have +;; count 2 (the default threshold of 2 means they are inlinable by default). ; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \ -; RUN: -memprof-icp-inline-minimum-count-threshold=3 \ +; RUN: -memprof-icp-noinline-threshold=3 \ ; RUN: -enable-memprof-indirect-call-support=true \ ; RUN: -memprof-allow-recursive-callsites \ ; RUN: -supports-hot-cold-new \ From 5639cfabfccd83bc7d923774398a05e8bbbe217e Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Thu, 10 Jul 2025 13:14:55 -0700 Subject: [PATCH 3/3] Add comment --- llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 15961bcb2972b..3bf1ebb226d37 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -5579,6 +5579,12 @@ void MemProfContextDisambiguation::performICP( .getCallee()); } DirectCall.setCalledFunction(TargetToUse); + // During matching we generate synthetic VP metadata for indirect calls + // not already having any, from the memprof profile's callee GUIDs. If + // we subsequently promote and inline those callees, we currently lose + // the ability to generate this synthetic VP metadata. Optionally apply + // a noinline attribute to promoted direct calls, where the threshold is + // set to capture synthetic VP metadata targets which get a count of 1. if (MemProfICPNoInlineThreshold && Candidate.Count < MemProfICPNoInlineThreshold) DirectCall.setIsNoInline();