Skip to content

Commit c93b3a3

Browse files
[MemProf] Extend MemProfUse pass to make use of data access profiles to partition data (#151238)
f3f2832 introduces the data access profile format as a payload inside [memprof](https://llvm.org/docs/InstrProfileFormat.html#memprof-profile-data), and the MemProfUse pass reads the memprof payload. This change extends the MemProfUse pass to read the data access profiles to annotate global variables' section prefix. 1. If there are samples for a global variable, it's annotated as hot. 2. If a global variable is seen in the profiled binary file but doesn't have access samples, it's annotated as unlikely. Introduce an option `annotate-static-data-prefix` to flag-gate the global-variable annotation path, and make it false by default. #155337 is the (WIP) draft change to "reconcile" two sources of hotness.
1 parent c957df0 commit c93b3a3

File tree

4 files changed

+246
-3
lines changed

4 files changed

+246
-3
lines changed

llvm/include/llvm/ProfileData/InstrProfReader.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,11 @@ class IndexedMemProfReader {
729729
LLVM_ABI DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
730730
getMemProfCallerCalleePairs() const;
731731

732+
// Returns non-owned pointer to data access profile data.
733+
LLVM_ABI memprof::DataAccessProfData *getDataAccessProfileData() const {
734+
return DataAccessProfileData.get();
735+
}
736+
732737
// Return the entire MemProf profile.
733738
LLVM_ABI memprof::AllMemProfData getAllMemProfData() const;
734739

@@ -900,6 +905,12 @@ class LLVM_ABI IndexedInstrProfReader : public InstrProfReader {
900905
return MemProfReader.getSummary();
901906
}
902907

908+
/// Returns non-owned pointer to the data access profile data.
909+
/// Will be null if unavailable (version < 4).
910+
memprof::DataAccessProfData *getDataAccessProfileData() const {
911+
return MemProfReader.getDataAccessProfileData();
912+
}
913+
903914
Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
904915
Error printBinaryIds(raw_ostream &OS) override;
905916
};

llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "llvm/ADT/IntrusiveRefCntPtr.h"
1616
#include "llvm/IR/PassManager.h"
17+
#include "llvm/ProfileData/DataAccessProf.h"
1718
#include "llvm/ProfileData/MemProf.h"
1819
#include "llvm/Support/Compiler.h"
1920

@@ -36,6 +37,11 @@ class MemProfUsePass : public PassInfoMixin<MemProfUsePass> {
3637
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
3738

3839
private:
40+
// Annotate global variables' section prefix based on data access profile,
41+
// return true if any global variable is annotated and false otherwise.
42+
bool
43+
annotateGlobalVariables(Module &M,
44+
const memprof::DataAccessProfData *DataAccessProf);
3945
std::string MemoryProfileFileName;
4046
IntrusiveRefCntPtr<vfs::FileSystem> FS;
4147
};

llvm/lib/Transforms/Instrumentation/MemProfUse.cpp

Lines changed: 117 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/IR/Function.h"
2323
#include "llvm/IR/IntrinsicInst.h"
2424
#include "llvm/IR/Module.h"
25+
#include "llvm/ProfileData/DataAccessProf.h"
2526
#include "llvm/ProfileData/InstrProf.h"
2627
#include "llvm/ProfileData/InstrProfReader.h"
2728
#include "llvm/ProfileData/MemProfCommon.h"
@@ -75,6 +76,10 @@ static cl::opt<unsigned> MinMatchedColdBytePercent(
7576
"memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
7677
cl::desc("Min percent of cold bytes matched to hint allocation cold"));
7778

79+
static cl::opt<bool> AnnotateStaticDataSectionPrefix(
80+
"memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden,
81+
cl::desc("If true, annotate the static data section prefix"));
82+
7883
// Matching statistics
7984
STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
8085
STATISTIC(NumOfMemProfMismatch,
@@ -90,6 +95,14 @@ STATISTIC(NumOfMemProfMatchedAllocs,
9095
"Number of matched memory profile allocs.");
9196
STATISTIC(NumOfMemProfMatchedCallSites,
9297
"Number of matched memory profile callsites.");
98+
STATISTIC(NumOfMemProfHotGlobalVars,
99+
"Number of global vars annotated with 'hot' section prefix.");
100+
STATISTIC(NumOfMemProfColdGlobalVars,
101+
"Number of global vars annotated with 'unlikely' section prefix.");
102+
STATISTIC(NumOfMemProfUnknownGlobalVars,
103+
"Number of global vars with unknown hotness (no section prefix).");
104+
STATISTIC(NumOfMemProfExplicitSectionGlobalVars,
105+
"Number of global vars with user-specified section (not annotated).");
93106

94107
static void addCallsiteMetadata(Instruction &I,
95108
ArrayRef<uint64_t> InlinedCallStack,
@@ -674,11 +687,12 @@ MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
674687
}
675688

676689
PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
677-
// Return immediately if the module doesn't contain any function.
678-
if (M.empty())
690+
// Return immediately if the module doesn't contain any function or global
691+
// variables.
692+
if (M.empty() && M.globals().empty())
679693
return PreservedAnalyses::all();
680694

681-
LLVM_DEBUG(dbgs() << "Read in memory profile:");
695+
LLVM_DEBUG(dbgs() << "Read in memory profile:\n");
682696
auto &Ctx = M.getContext();
683697
auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
684698
if (Error E = ReaderOrErr.takeError()) {
@@ -703,6 +717,14 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
703717
return PreservedAnalyses::all();
704718
}
705719

720+
const bool Changed =
721+
annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData());
722+
723+
// If the module doesn't contain any function, return after we process all
724+
// global variables.
725+
if (M.empty())
726+
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
727+
706728
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
707729

708730
TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());
@@ -752,3 +774,95 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
752774

753775
return PreservedAnalyses::none();
754776
}
777+
778+
// Returns true iff the global variable has custom section either by
779+
// __attribute__((section("name")))
780+
// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate)
781+
// or #pragma clang section directives
782+
// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section).
783+
static bool hasExplicitSectionName(const GlobalVariable &GVar) {
784+
if (GVar.hasSection())
785+
return true;
786+
787+
auto Attrs = GVar.getAttributes();
788+
if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") ||
789+
Attrs.hasAttribute("relro-section") ||
790+
Attrs.hasAttribute("rodata-section"))
791+
return true;
792+
return false;
793+
}
794+
795+
bool MemProfUsePass::annotateGlobalVariables(
796+
Module &M, const memprof::DataAccessProfData *DataAccessProf) {
797+
if (!AnnotateStaticDataSectionPrefix || M.globals().empty())
798+
return false;
799+
800+
if (!DataAccessProf) {
801+
M.getContext().diagnose(DiagnosticInfoPGOProfile(
802+
MemoryProfileFileName.data(),
803+
StringRef("Data access profiles not found in memprof. Ignore "
804+
"-memprof-annotate-static-data-prefix."),
805+
DS_Warning));
806+
return false;
807+
}
808+
809+
bool Changed = false;
810+
// Iterate all global variables in the module and annotate them based on
811+
// data access profiles. Note it's up to the linker to decide how to map input
812+
// sections to output sections, and one conservative practice is to map
813+
// unlikely-prefixed ones to unlikely output section, and map the rest
814+
// (hot-prefixed or prefix-less) to the canonical output section.
815+
for (GlobalVariable &GVar : M.globals()) {
816+
assert(!GVar.getSectionPrefix().has_value() &&
817+
"GVar shouldn't have section prefix yet");
818+
if (GVar.isDeclarationForLinker())
819+
continue;
820+
821+
if (hasExplicitSectionName(GVar)) {
822+
++NumOfMemProfExplicitSectionGlobalVars;
823+
LLVM_DEBUG(dbgs() << "Global variable " << GVar.getName()
824+
<< " has explicit section name. Skip annotating.\n");
825+
continue;
826+
}
827+
828+
StringRef Name = GVar.getName();
829+
// Skip string literals as their mangled names don't stay stable across
830+
// binary releases.
831+
// TODO: Track string content hash in the profiles and compute it inside the
832+
// compiler to categeorize the hotness string literals.
833+
if (Name.starts_with(".str")) {
834+
835+
LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n");
836+
continue;
837+
}
838+
839+
// DataAccessProfRecord's get* methods will canonicalize the name under the
840+
// hood before looking it up, so optimizer doesn't need to do it.
841+
std::optional<DataAccessProfRecord> Record =
842+
DataAccessProf->getProfileRecord(Name);
843+
// Annotate a global variable as hot if it has non-zero sampled count, and
844+
// annotate it as cold if it's seen in the profiled binary
845+
// file but doesn't have any access sample.
846+
// For logging, optimization remark emitter requires a llvm::Function, but
847+
// it's not well defined how to associate a global variable with a function.
848+
// So we just print out the static data section prefix in LLVM_DEBUG.
849+
if (Record && Record->AccessCount > 0) {
850+
++NumOfMemProfHotGlobalVars;
851+
GVar.setSectionPrefix("hot");
852+
Changed = true;
853+
LLVM_DEBUG(dbgs() << "Global variable " << Name
854+
<< " is annotated as hot\n");
855+
} else if (DataAccessProf->isKnownColdSymbol(Name)) {
856+
++NumOfMemProfColdGlobalVars;
857+
GVar.setSectionPrefix("unlikely");
858+
Changed = true;
859+
LLVM_DEBUG(dbgs() << "Global variable " << Name
860+
<< " is annotated as unlikely\n");
861+
} else {
862+
++NumOfMemProfUnknownGlobalVars;
863+
LLVM_DEBUG(dbgs() << "Global variable " << Name << " is not annotated\n");
864+
}
865+
}
866+
867+
return Changed;
868+
}
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
; REQUIRES: asserts
2+
; asserts are required for -debug-only=<pass-name>
3+
4+
; RUN: rm -rf %t && split-file %s %t && cd %t
5+
6+
;; Read a text profile and merge it into indexed profile.
7+
; RUN: llvm-profdata merge --memprof-version=4 memprof.yaml -o memprof.profdata
8+
9+
;; Run optimizer pass on an IR module without IR functions, and test that global
10+
;; variables in the module could be annotated (i.e., no early return),
11+
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -memprof-annotate-static-data-prefix \
12+
; RUN: -debug-only=memprof -stats -S funcless-module.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT
13+
14+
;; Run optimizer pass on the IR, and check the section prefix.
15+
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -memprof-annotate-static-data-prefix \
16+
; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT
17+
18+
;; Run optimizer pass without explicitly setting -memprof-annotate-static-data-prefix.
19+
;; The output text IR shouldn't have `section_prefix`
20+
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' \
21+
; RUN: -debug-only=memprof -stats -S input.ll -o - | FileCheck %s --implicit-check-not="section_prefix"
22+
23+
; LOG: Skip annotating string literal .str
24+
; LOG: Global variable var1 is annotated as hot
25+
; LOG: Global variable var2.llvm.125 is annotated as hot
26+
; LOG: Global variable bar is not annotated
27+
; LOG: Global variable foo is annotated as unlikely
28+
; LOG: Global variable var3 has explicit section name. Skip annotating.
29+
; LOG: Global variable var4 has explicit section name. Skip annotating.
30+
31+
;; String literals are not annotated.
32+
; PREFIX: @.str = unnamed_addr constant [5 x i8] c"abcde"
33+
; PREFIX-NOT: section_prefix
34+
; PREFIX: @var1 = global i32 123, !section_prefix !0
35+
36+
;; @var.llvm.125 will be canonicalized to @var2 for profile look-up.
37+
; PREFIX-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0
38+
39+
;; @bar is not seen in hot symbol or known symbol set, so it won't get a section
40+
;; prefix. Test this by testing that there is no section_prefix between @bar and
41+
;; @foo.
42+
; PREFIX-NEXT: @bar = global i16 3
43+
; PREFIX-NOT: !section_prefix
44+
45+
;; @foo is unlikely.
46+
; PREFIX-NEXT: @foo = global i8 2, !section_prefix !1
47+
48+
; PREFIX-NEXT: @var3 = constant [2 x i32] [i32 12345, i32 6789], section "sec1"
49+
; PREFIX-NEXT: @var4 = constant [1 x i64] [i64 98765] #0
50+
51+
; PREFIX: attributes #0 = { "rodata-section"="sec2" }
52+
53+
; PREFIX: !0 = !{!"section_prefix", !"hot"}
54+
; PREFIX-NEXT: !1 = !{!"section_prefix", !"unlikely"}
55+
56+
; STAT: 1 memprof - Number of global vars annotated with 'unlikely' section prefix.
57+
; STAT: 2 memprof - Number of global vars with user-specified section (not annotated).
58+
; STAT: 2 memprof - Number of global vars annotated with 'hot' section prefix.
59+
; STAT: 1 memprof - Number of global vars with unknown hotness (no section prefix).
60+
61+
;--- memprof.yaml
62+
---
63+
DataAccessProfiles:
64+
SampledRecords:
65+
- Symbol: var1
66+
AccessCount: 1000
67+
- Symbol: var2
68+
AccessCount: 5
69+
- Hash: 101010
70+
AccessCount: 145
71+
KnownColdSymbols:
72+
- foo
73+
KnownColdStrHashes: [ 999, 1001 ]
74+
...
75+
;--- input.ll
76+
77+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
78+
target triple = "x86_64-unknown-linux-gnu"
79+
80+
@.str = unnamed_addr constant [5 x i8] c"abcde"
81+
@var1 = global i32 123
82+
@var2.llvm.125 = global i64 0
83+
@bar = global i16 3
84+
@foo = global i8 2
85+
@var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1"
86+
@var4 = constant [1 x i64][i64 98765] #0
87+
88+
define i32 @func() {
89+
%a = load i32, ptr @var1
90+
%b = load i32, ptr @var2.llvm.125
91+
%ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b)
92+
ret i32 %ret
93+
}
94+
95+
declare i32 @func_taking_arbitrary_param(...)
96+
97+
attributes #0 = { "rodata-section"="sec2" }
98+
99+
;--- funcless-module.ll
100+
101+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
102+
target triple = "x86_64-unknown-linux-gnu"
103+
104+
@.str = unnamed_addr constant [5 x i8] c"abcde"
105+
@var1 = global i32 123
106+
@var2.llvm.125 = global i64 0
107+
@bar = global i16 3
108+
@foo = global i8 2
109+
@var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1"
110+
@var4 = constant [1 x i64][i64 98765] #0
111+
112+
attributes #0 = { "rodata-section"="sec2" }

0 commit comments

Comments
 (0)