From 302e247a1c1530344f78b3802abb9860d74c92da Mon Sep 17 00:00:00 2001 From: wlei Date: Tue, 2 Sep 2025 01:15:04 -0700 Subject: [PATCH] [llvm-profgen] Add an option to force profile to be marked preinlined --- llvm/include/llvm/ProfileData/SampleProf.h | 11 ++++ .../llvm-profgen/inline-cs-pseudoprobe.test | 25 +++++++++- .../tools/llvm-profgen/inline-noprobe.test | 50 +++++++++++++------ .../llvm-profgen/inline-pseudoprobe.test | 22 ++++++++ llvm/tools/llvm-profgen/ProfileGenerator.cpp | 16 ++++++ llvm/tools/llvm-profgen/ProfileGenerator.h | 2 + 6 files changed, 111 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index fb2d4d3cc50ed..a626071d23915 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -868,6 +868,17 @@ class FunctionSamples { } } + // Propagate the given attribute to this profile context and all callee + // contexts. + void setContextAttribute(ContextAttributeMask Attr) { + Context.setAttribute(Attr); + for (auto &I : CallsiteSamples) { + for (auto &CS : I.second) { + CS.second.setContextAttribute(Attr); + } + } + } + // Query the stale profile matching results and remap the location. const LineLocation &mapIRLocToProfileLoc(const LineLocation &IRLoc) const { // There is no remapping if the profile is not stale or the matching gives diff --git a/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test index 205e467091352..15f625d278e1e 100644 --- a/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test @@ -6,6 +6,9 @@ ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNWINDER-OFFSET2 ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 ; RUN: FileCheck %s --input-file %t +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0 --csspgo-preinliner=0 --gen-cs-nested-profile=0 --force-profile-preinlined +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-FORCE-PREINLINED + ; CHECK: [main:2 @ foo]:74:0 ; CHECK-NEXT: 1: 0 @@ -18,10 +21,30 @@ ; CHECK-NEXT: 8: 14 bar:14 ; CHECK-NEXT: 9: 0 ; CHECK-NEXT: !CFGChecksum: 563088904013236 -; CHECK:[main:2 @ foo:8 @ bar]:28:14 +; CHECK-NEXT: !Attributes: 1 +; CHECK-NEXT:[main:2 @ foo:8 @ bar]:28:14 ; CHECK-NEXT: 1: 14 ; CHECK-NEXT: 4: 14 ; CHECK-NEXT: !CFGChecksum: 72617220756 +; CHECK-NEXT: !Attributes: 1 + +; CHECK-FORCE-PREINLINED: [main:2 @ foo]:74:0 +; CHECK-FORCE-PREINLINED-NEXT: 1: 0 +; CHECK-FORCE-PREINLINED-NEXT: 2: 15 +; CHECK-FORCE-PREINLINED-NEXT: 3: 15 +; CHECK-FORCE-PREINLINED-NEXT: 4: 14 +; CHECK-FORCE-PREINLINED-NEXT: 5: 1 +; CHECK-FORCE-PREINLINED-NEXT: 6: 15 +; CHECK-FORCE-PREINLINED-NEXT: 7: 0 +; CHECK-FORCE-PREINLINED-NEXT: 8: 14 bar:14 +; CHECK-FORCE-PREINLINED-NEXT: 9: 0 +; CHECK-FORCE-PREINLINED-NEXT: !CFGChecksum: 563088904013236 +; CHECK-FORCE-PREINLINED-NEXT: !Attributes: 3 +; CHECK-FORCE-PREINLINED-NEXT:[main:2 @ foo:8 @ bar]:28:14 +; CHECK-FORCE-PREINLINED-NEXT: 1: 14 +; CHECK-FORCE-PREINLINED-NEXT: 4: 14 +; CHECK-FORCE-PREINLINED-NEXT: !CFGChecksum: 72617220756 +; CHECK-FORCE-PREINLINED-NEXT: !Attributes: 3 ; CHECK-UNWINDER: 3 ; CHECK-UNWINDER-NEXT: 201800-201858:1 diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe.test b/llvm/test/tools/llvm-profgen/inline-noprobe.test index f960f074fefff..7e7d7e7f20d58 100644 --- a/llvm/test/tools/llvm-profgen/inline-noprobe.test +++ b/llvm/test/tools/llvm-profgen/inline-noprobe.test @@ -8,26 +8,48 @@ ; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-UPDATE-TOTAL-SAMPLE ; RUN: llvm-profgen --format=text --use-dwarf-correlation --perfscript=%S/Inputs/inline-noprobe.perfscript --binary=%S/Inputs/inline-noprobe.perfbin --output=%t ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK +; RUN: llvm-profgen --format=text --use-dwarf-correlation --perfscript=%S/Inputs/inline-noprobe.perfscript --binary=%S/Inputs/inline-noprobe.perfbin --output=%t --force-profile-preinlined +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-FORCE-PREINLINED + ; RUN: echo -e "0\n0" > %t ; RUN: llvm-profgen --format=text --unsymbolized-profile=%t --binary=%S/Inputs/inline-noprobe.perfbin --output=%t1 --fill-zero-for-all-funcs ; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-ALL-ZERO ; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/out-of-bounds.raw.prof --binary=%S/Inputs/inline-noprobe.perfbin --output=%t1 ; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-OB -CHECK: main:2609:0 -CHECK: 0: 0 -CHECK: 2: 0 -CHECK: 1: foo:2609 -CHECK: 2.1: 42 -CHECK: 3: 62 -CHECK: 3.2: 21 -CHECK: 4: 0 -CHECK: 65526: 62 -CHECK: 3.1: bar:546 -CHECK: 1: 42 -CHECK: 65533: 42 -CHECK: 3.2: bar:189 -CHECK: 1: 21 +CHECK: main:2609:0 +CHECK-NEXT: 0: 0 +CHECK-NEXT: 2: 0 +CHECK-NEXT: 1: foo:2609 +CHECK-NEXT: 2.1: 42 +CHECK-NEXT: 3: 62 +CHECK-NEXT: 3.2: 21 +CHECK-NEXT: 4: 0 +CHECK-NEXT: 65526: 62 +CHECK-NEXT: 3.1: bar:546 +CHECK-NEXT: 1: 42 +CHECK-NEXT: 65533: 42 +CHECK-NEXT: 3.2: bar:189 +CHECK-NEXT: 1: 21 + +CHECK-FORCE-PREINLINED: main:2609:0 +CHECK-FORCE-PREINLINED-NEXT: 0: 0 +CHECK-FORCE-PREINLINED-NEXT: 2: 0 +CHECK-FORCE-PREINLINED-NEXT: 1: foo:2609 +CHECK-FORCE-PREINLINED-NEXT: 2.1: 42 +CHECK-FORCE-PREINLINED-NEXT: 3: 62 +CHECK-FORCE-PREINLINED-NEXT: 3.2: 21 +CHECK-FORCE-PREINLINED-NEXT: 4: 0 +CHECK-FORCE-PREINLINED-NEXT: 65526: 62 +CHECK-FORCE-PREINLINED-NEXT: 3.1: bar:546 +CHECK-FORCE-PREINLINED-NEXT: 1: 42 +CHECK-FORCE-PREINLINED-NEXT: 65533: 42 +CHECK-FORCE-PREINLINED-NEXT: !Attributes: 2 +CHECK-FORCE-PREINLINED-NEXT: 3.2: bar:189 +CHECK-FORCE-PREINLINED-NEXT: 1: 21 +CHECK-FORCE-PREINLINED-NEXT: !Attributes: 2 +CHECK-FORCE-PREINLINED-NEXT: !Attributes: 2 +CHECK-FORCE-PREINLINED-NEXT: !Attributes: 2 CHECK-UPDATE-TOTAL-SAMPLE: main:292:0 CHECK-UPDATE-TOTAL-SAMPLE: 0: 0 diff --git a/llvm/test/tools/llvm-profgen/inline-pseudoprobe.test b/llvm/test/tools/llvm-profgen/inline-pseudoprobe.test index 2dacf7fef8f13..2086f0e94c073 100644 --- a/llvm/test/tools/llvm-profgen/inline-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/inline-pseudoprobe.test @@ -1,5 +1,7 @@ ; RUN: llvm-profgen --format=text --ignore-stack-samples --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --profile-summary-cold-count=0 ; RUN: FileCheck %s --input-file %t +; RUN: llvm-profgen --format=text --ignore-stack-samples --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t1 --profile-summary-cold-count=0 --force-profile-preinlined +; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-FORCE-PREINLINED ; CHECK: main:88:0 ; CHECK-NEXT: 1: 0 @@ -19,6 +21,26 @@ ; CHECK-NEXT: !CFGChecksum: 563088904013236 ; CHECK-NEXT: !CFGChecksum: 281479271677951 +; CHECK-FORCE-PREINLINED: main:88:0 +; CHECK-FORCE-PREINLINED-NEXT: 1: 0 +; CHECK-FORCE-PREINLINED-NEXT: 2: foo:88 +; CHECK-FORCE-PREINLINED-NEXT: 1: 0 +; CHECK-FORCE-PREINLINED-NEXT: 2: 15 +; CHECK-FORCE-PREINLINED-NEXT: 3: 15 +; CHECK-FORCE-PREINLINED-NEXT: 4: 14 +; CHECK-FORCE-PREINLINED-NEXT: 5: 1 +; CHECK-FORCE-PREINLINED-NEXT: 6: 15 +; CHECK-FORCE-PREINLINED-NEXT: 7: 0 +; CHECK-FORCE-PREINLINED-NEXT: 9: 0 +; CHECK-FORCE-PREINLINED-NEXT: 8: bar:28 +; CHECK-FORCE-PREINLINED-NEXT: 1: 14 +; CHECK-FORCE-PREINLINED-NEXT: 4: 14 +; CHECK-FORCE-PREINLINED-NEXT: !CFGChecksum: 72617220756 +; CHECK-FORCE-PREINLINED-NEXT: !Attributes: 2 +; CHECK-FORCE-PREINLINED-NEXT: !CFGChecksum: 563088904013236 +; CHECK-FORCE-PREINLINED-NEXT: !Attributes: 2 +; CHECK-FORCE-PREINLINED-NEXT: !CFGChecksum: 281479271677951 +; CHECK-FORCE-PREINLINED-NEXT: !Attributes: 2 ; clang -O3 -fuse-ld=lld -fpseudo-probe-for-profiling ; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 33575b9c67625..d776dbb406542 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -64,6 +64,12 @@ static cl::opt "than threshold, it will be trimmed."), cl::cat(ProfGenCategory)); +static cl::opt ForceProfilePreinlined( + "force-profile-preinlined", + cl::desc("Force marking all function samples as preinlined(set " + "ContextShouldBeInlined attribute)."), + cl::init(false)); + static cl::opt CSProfMergeColdContext( "csprof-merge-cold-context", cl::init(true), cl::desc("If the total count of context profile is smaller than " @@ -511,10 +517,18 @@ void ProfileGenerator::generateProfile() { postProcessProfiles(); } +void ProfileGeneratorBase::markProfilePreinlined(SampleProfileMap &ProfileMap) { + for (auto &I : ProfileMap) + I.second.setContextAttribute(ContextShouldBeInlined); + FunctionSamples::ProfileIsPreInlined = true; +} + void ProfileGenerator::postProcessProfiles() { computeSummaryAndThreshold(ProfileMap); trimColdProfiles(ProfileMap, ColdCountThreshold); filterAmbiguousProfile(ProfileMap); + if (ForceProfilePreinlined) + markProfilePreinlined(ProfileMap); calculateAndShowDensity(ProfileMap); } @@ -1130,6 +1144,8 @@ void CSProfileGenerator::postProcessProfiles() { FunctionSamples::ProfileIsCS = false; } filterAmbiguousProfile(ProfileMap); + if (ForceProfilePreinlined) + markProfilePreinlined(ProfileMap); ProfileGeneratorBase::calculateAndShowDensity(ProfileMap); } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h index d3e04563a81c2..f71ad62a39c77 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -123,6 +123,8 @@ class ProfileGeneratorBase { void showDensitySuggestion(double Density); + void markProfilePreinlined(SampleProfileMap &ProfileMap); + void collectProfiledFunctions(); bool collectFunctionsFromRawProfile(