Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion llvm/include/llvm/Transforms/IPO/SampleProfile.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ class SampleProfileLoaderPass : public PassInfoMixin<SampleProfileLoaderPass> {
SampleProfileLoaderPass(
std::string File = "", std::string RemappingFile = "",
ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None,
IntrusiveRefCntPtr<vfs::FileSystem> FS = nullptr);
IntrusiveRefCntPtr<vfs::FileSystem> FS = nullptr,
bool DisableSampleProfileInlining = false);

PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);

Expand All @@ -50,6 +51,7 @@ class SampleProfileLoaderPass : public PassInfoMixin<SampleProfileLoaderPass> {
std::string ProfileRemappingFileName;
const ThinOrFullLTOPhase LTOPhase;
IntrusiveRefCntPtr<vfs::FileSystem> FS;
bool DisableSampleProfileInlining;
};

} // end namespace llvm
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2153,6 +2153,17 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
if (PGOOpt && PGOOpt->DebugInfoForProfiling)
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));

if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
// Explicitly disable sample loader inlining in O0 pipeline.
MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile,
ThinOrFullLTOPhase::None, nullptr,
/*DisableSampleProfileInlining=*/true));
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
// RequireAnalysisPass for PSI before subsequent non-module passes.
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
}

invokePipelineEarlySimplificationEPCallbacks(MPM, Level);

// Build a minimal pipeline based on the semantics required by LLVM,
Expand Down
29 changes: 20 additions & 9 deletions llvm/lib/Transforms/IPO/SampleProfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,7 @@ static cl::opt<bool> ProfileSizeInline(
static cl::opt<bool> DisableSampleLoaderInlining(
"disable-sample-loader-inlining", cl::Hidden, cl::init(false),
cl::desc("If true, artifically skip inline transformation in sample-loader "
"pass, and merge (or scale) profiles (as configured by "
"--sample-profile-merge-inlinee)."));
"pass, and use flattened profiles to emit annotation."));

namespace llvm {
cl::opt<bool>
Expand Down Expand Up @@ -470,7 +469,7 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
std::function<const TargetLibraryInfo &(Function &)> GetTLI,
LazyCallGraph &CG)
LazyCallGraph &CG, bool DisableSampleProfileInlining)
: SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName),
std::move(FS)),
GetAC(std::move(GetAssumptionCache)),
Expand All @@ -479,7 +478,8 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
AnnotatedPassName(AnnotateSampleProfileInlinePhase
? llvm::AnnotateInlinePassName(InlineContext{
LTOPhase, InlinePass::SampleProfileInliner})
: CSINLINE_DEBUG) {}
: CSINLINE_DEBUG),
DisableSampleProfileInlining(DisableSampleProfileInlining) {}

bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
Expand Down Expand Up @@ -593,6 +593,8 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
// attribute.
bool ProfAccForSymsInList;

bool DisableSampleProfileInlining;

// External inline advisor used to replay inline decision from remarks.
std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;

Expand Down Expand Up @@ -920,7 +922,7 @@ bool SampleProfileLoader::tryPromoteAndInlineCandidate(
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
SmallVector<CallBase *, 8> *InlinedCallSite) {
// Bail out early if sample-loader inliner is disabled.
if (DisableSampleLoaderInlining)
if (DisableSampleProfileInlining)
return false;

// Bail out early if MaxNumPromotions is zero.
Expand Down Expand Up @@ -1231,7 +1233,7 @@ bool SampleProfileLoader::tryInlineCandidate(
InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
// Do not attempt to inline a candidate if
// --disable-sample-loader-inlining is true.
if (DisableSampleLoaderInlining)
if (DisableSampleProfileInlining)
return false;

CallBase &CB = *Candidate.CallInstr;
Expand Down Expand Up @@ -1975,6 +1977,13 @@ bool SampleProfileLoader::doInitialization(Module &M,

PSL = Reader->getProfileSymbolList();

if (DisableSampleLoaderInlining.getNumOccurrences())
DisableSampleProfileInlining = DisableSampleLoaderInlining;

// Use flattened profile if inlining is disabled.
if (DisableSampleProfileInlining && !Reader->profileIsCS())
ProfileConverter::flattenProfile(Reader->getProfiles());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It makes sense to flatten profile if we are loading profile for O0. However, if we just disable sample loader inlining, but still runs O3/LTO, we may not want to flatten profile, because CGSCC inliner may have done some inlining, and we wanted to leverage context profile if possible?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. Good point, sounds good to use flattened profile for only O0 mode.


// While profile-sample-accurate is on, ignore symbol list.
ProfAccForSymsInList =
ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate;
Expand Down Expand Up @@ -2305,9 +2314,10 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
}
SampleProfileLoaderPass::SampleProfileLoaderPass(
std::string File, std::string RemappingFile, ThinOrFullLTOPhase LTOPhase,
IntrusiveRefCntPtr<vfs::FileSystem> FS)
IntrusiveRefCntPtr<vfs::FileSystem> FS, bool DisableSampleProfileInlining)
: ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
LTOPhase(LTOPhase), FS(std::move(FS)) {}
LTOPhase(LTOPhase), FS(std::move(FS)),
DisableSampleProfileInlining(DisableSampleProfileInlining) {}

PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
ModuleAnalysisManager &AM) {
Expand All @@ -2332,7 +2342,8 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
: ProfileRemappingFileName,
LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, CG);
LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, CG,
DisableSampleProfileInlining);
if (!SampleLoader.doInitialization(M, &FAM))
return PreservedAnalyses::all();

Expand Down
6 changes: 5 additions & 1 deletion llvm/test/Other/new-pm-pgo-O0.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,19 @@
; RUN: |FileCheck %s --check-prefixes=USE_POST_LINK,USE
; RUN: opt -debug-pass-manager -passes='lto<O0>' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' %s 2>&1 \
; RUN: |FileCheck %s --check-prefixes=USE_POST_LINK,USE
; RUN: opt -debug-pass-manager -passes='default<O0>' -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 \
; RUN: |FileCheck %s --check-prefixes=SAMPLE_USE

;
; GEN: Running pass: PGOInstrumentationGen
; USE_DEFAULT: Running pass: PGOInstrumentationUse
; USE_PRE_LINK: Running pass: PGOInstrumentationUse
; USE_POST_LINK-NOT: Running pass: PGOInstrumentationUse
; USE-NOT: Running pass: PGOIndirectCallPromotion
; USE-NOT: Running pass: PGOMemOPSizeOpt

; SAMPLE_USE: Running pass: AddDiscriminatorsPass
; SAMPLE_USE: Running pass: SampleProfileLoaderPass

define void @foo() {
ret void
}
12 changes: 10 additions & 2 deletions llvm/test/Transforms/SampleProfile/inline-mergeprof.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
; when the profile uses md5.
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.md5.prof -sample-profile-merge-inlinee=true -use-profiled-call-graph=0 -S | FileCheck -check-prefix=MERGE %s

; Test we properly merge not inlined profile with '--sample-profile-merge-inlinee' even if '--disable-sample-loader-inlining' is true
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.md5.prof -sample-profile-merge-inlinee=true --disable-sample-loader-inlining -use-profiled-call-graph=0 -S | FileCheck -check-prefix=MERGE %s
; Test we properly use flattened profile when using '--disable-sample-loader-inlining'.

; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=true --disable-sample-loader-inlining -use-profiled-call-graph=0 -S | FileCheck -check-prefix=FLATTEN %s

@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1

Expand Down Expand Up @@ -103,3 +104,10 @@ declare i32 @printf(ptr, ...)
; MERGE: !{!"branch_weights", i32 10}
; MERGE: name: "sub"
; MERGE-NEXT: {!"function_entry_count", i64 3}

; FLATTEN: name: "sum"
; FLATTEN-NEXT: {!"function_entry_count", i64 35}
; FLATTEN: !{!"branch_weights", i32 13, i32 23}
; FLATTEN: !{!"branch_weights", i32 12}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, after switch using flattened profile, it triggered a test failure, I took a deep look at this, the reason might explain the difference using flattened profile vs merging not inlined profile on-the-fly(merged profile):
Original profile (two sum are in different context)


...
 sum:46
  2:10 sub:10
...
 sum:11:22
  2: sub:2
   1: 2

The merged profile:

sum:46
 2:10 sub:10
 ...
 2: sub:2
  1: 2

For a flattened profile

sum:46
 2:12 sub:12

Then for the line 2, it gives different block count 12 vs 10, because the inlinee profile isn't counted for the block count, which I think it should be, so this seems using flattened profile makes more sense(though the difference is not big)

; FLATTEN: name: "sub"
; FLATTEN-NEXT: {!"function_entry_count", i64 3}
Loading