-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[MemProf] Extend MemProfUse pass to make use of data access profiles to partition data #151238
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6311383
8f8cd69
a6e5cc6
291bae7
6c816c2
296f343
1d1f68e
30c860b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ | |
#include "llvm/IR/Function.h" | ||
#include "llvm/IR/IntrinsicInst.h" | ||
#include "llvm/IR/Module.h" | ||
#include "llvm/ProfileData/DataAccessProf.h" | ||
#include "llvm/ProfileData/InstrProf.h" | ||
#include "llvm/ProfileData/InstrProfReader.h" | ||
#include "llvm/ProfileData/MemProfCommon.h" | ||
|
@@ -75,6 +76,10 @@ static cl::opt<unsigned> MinMatchedColdBytePercent( | |
"memprof-matching-cold-threshold", cl::init(100), cl::Hidden, | ||
cl::desc("Min percent of cold bytes matched to hint allocation cold")); | ||
|
||
static cl::opt<bool> AnnotateStaticDataSectionPrefix( | ||
"memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden, | ||
cl::desc("If true, annotate the static data section prefix")); | ||
|
||
// Matching statistics | ||
STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); | ||
STATISTIC(NumOfMemProfMismatch, | ||
|
@@ -90,6 +95,14 @@ STATISTIC(NumOfMemProfMatchedAllocs, | |
"Number of matched memory profile allocs."); | ||
STATISTIC(NumOfMemProfMatchedCallSites, | ||
"Number of matched memory profile callsites."); | ||
STATISTIC(NumOfMemProfHotGlobalVars, | ||
"Number of global vars annotated with 'hot' section prefix."); | ||
STATISTIC(NumOfMemProfColdGlobalVars, | ||
"Number of global vars annotated with 'unlikely' section prefix."); | ||
STATISTIC(NumOfMemProfUnknownGlobalVars, | ||
"Number of global vars with unknown hotness (no section prefix)."); | ||
STATISTIC(NumOfMemProfExplicitSectionGlobalVars, | ||
"Number of global vars with user-specified section (not annotated)."); | ||
|
||
static void addCallsiteMetadata(Instruction &I, | ||
ArrayRef<uint64_t> InlinedCallStack, | ||
|
@@ -674,11 +687,12 @@ MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile, | |
} | ||
|
||
PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { | ||
// Return immediately if the module doesn't contain any function. | ||
if (M.empty()) | ||
// Return immediately if the module doesn't contain any function or global | ||
// variables. | ||
if (M.empty() && M.globals().empty()) | ||
return PreservedAnalyses::all(); | ||
|
||
LLVM_DEBUG(dbgs() << "Read in memory profile:"); | ||
LLVM_DEBUG(dbgs() << "Read in memory profile:\n"); | ||
auto &Ctx = M.getContext(); | ||
auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS); | ||
if (Error E = ReaderOrErr.takeError()) { | ||
|
@@ -703,6 +717,14 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { | |
return PreservedAnalyses::all(); | ||
} | ||
|
||
const bool Changed = | ||
annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData()); | ||
|
||
// If the module doesn't contain any function, return after we process all | ||
// global variables. | ||
if (M.empty()) | ||
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); | ||
|
||
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); | ||
|
||
TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin()); | ||
|
@@ -752,3 +774,95 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { | |
|
||
return PreservedAnalyses::none(); | ||
} | ||
|
||
// Returns true iff the global variable has custom section either by | ||
// __attribute__((section("name"))) | ||
// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate) | ||
// or #pragma clang section directives | ||
// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section). | ||
static bool hasExplicitSectionName(const GlobalVariable &GVar) { | ||
if (GVar.hasSection()) | ||
return true; | ||
|
||
auto Attrs = GVar.getAttributes(); | ||
if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") || | ||
Attrs.hasAttribute("relro-section") || | ||
Attrs.hasAttribute("rodata-section")) | ||
return true; | ||
return false; | ||
} | ||
|
||
bool MemProfUsePass::annotateGlobalVariables( | ||
Module &M, const memprof::DataAccessProfData *DataAccessProf) { | ||
if (!AnnotateStaticDataSectionPrefix || M.globals().empty()) | ||
return false; | ||
|
||
if (!DataAccessProf) { | ||
M.getContext().diagnose(DiagnosticInfoPGOProfile( | ||
MemoryProfileFileName.data(), | ||
StringRef("Data access profiles not found in memprof. Ignore " | ||
"-memprof-annotate-static-data-prefix."), | ||
DS_Warning)); | ||
return false; | ||
} | ||
|
||
bool Changed = false; | ||
// Iterate all global variables in the module and annotate them based on | ||
// data access profiles. Note it's up to the linker to decide how to map input | ||
// sections to output sections, and one conservative practice is to map | ||
// unlikely-prefixed ones to unlikely output section, and map the rest | ||
// (hot-prefixed or prefix-less) to the canonical output section. | ||
for (GlobalVariable &GVar : M.globals()) { | ||
assert(!GVar.getSectionPrefix().has_value() && | ||
"GVar shouldn't have section prefix yet"); | ||
if (GVar.isDeclarationForLinker()) | ||
continue; | ||
|
||
if (hasExplicitSectionName(GVar)) { | ||
++NumOfMemProfExplicitSectionGlobalVars; | ||
LLVM_DEBUG(dbgs() << "Global variable " << GVar.getName() | ||
<< " has explicit section name. Skip annotating.\n"); | ||
continue; | ||
} | ||
|
||
StringRef Name = GVar.getName(); | ||
// Skip string literals as their mangled names don't stay stable across | ||
// binary releases. | ||
// TODO: Track string content hash in the profiles and compute it inside the | ||
// compiler to categeorize the hotness string literals. | ||
if (Name.starts_with(".str")) { | ||
|
||
LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n"); | ||
continue; | ||
} | ||
|
||
// DataAccessProfRecord's get* methods will canonicalize the name under the | ||
// hood before looking it up, so optimizer doesn't need to do it. | ||
std::optional<DataAccessProfRecord> Record = | ||
DataAccessProf->getProfileRecord(Name); | ||
// Annotate a global variable as hot if it has non-zero sampled count, and | ||
// annotate it as cold if it's seen in the profiled binary | ||
// file but doesn't have any access sample. | ||
// For logging, optimization remark emitter requires a llvm::Function, but | ||
// it's not well defined how to associate a global variable with a function. | ||
// So we just print out the static data section prefix in LLVM_DEBUG. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only downside with that is the need for a debug built compiler to get any information out. Maybe at least add STATISTICs to enable getting a count of hot and unlikely annotated gvs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm onboard with adding counters so implemented it; though if I remember correctly stats are not always enabled in a release build compiler, and there is a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, shoot, well the stats are helpful in any case. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using a statistic sounds good to me too. |
||
if (Record && Record->AccessCount > 0) { | ||
++NumOfMemProfHotGlobalVars; | ||
GVar.setSectionPrefix("hot"); | ||
Changed = true; | ||
LLVM_DEBUG(dbgs() << "Global variable " << Name | ||
<< " is annotated as hot\n"); | ||
} else if (DataAccessProf->isKnownColdSymbol(Name)) { | ||
++NumOfMemProfColdGlobalVars; | ||
GVar.setSectionPrefix("unlikely"); | ||
Changed = true; | ||
LLVM_DEBUG(dbgs() << "Global variable " << Name | ||
<< " is annotated as unlikely\n"); | ||
} else { | ||
++NumOfMemProfUnknownGlobalVars; | ||
LLVM_DEBUG(dbgs() << "Global variable " << Name << " is not annotated\n"); | ||
} | ||
} | ||
|
||
return Changed; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
; REQUIRES: asserts | ||
; asserts are required for -debug-only=<pass-name> | ||
|
||
; RUN: rm -rf %t && split-file %s %t && cd %t | ||
|
||
;; Read a text profile and merge it into indexed profile. | ||
; RUN: llvm-profdata merge --memprof-version=4 memprof.yaml -o memprof.profdata | ||
|
||
;; Run optimizer pass on an IR module without IR functions, and test that global | ||
;; variables in the module could be annotated (i.e., no early return), | ||
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -memprof-annotate-static-data-prefix \ | ||
; RUN: -debug-only=memprof -stats -S funcless-module.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT | ||
|
||
;; Run optimizer pass on the IR, and check the section prefix. | ||
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -memprof-annotate-static-data-prefix \ | ||
; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT | ||
|
||
;; Run optimizer pass without explicitly setting -memprof-annotate-static-data-prefix. | ||
;; The output text IR shouldn't have `section_prefix` | ||
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' \ | ||
; RUN: -debug-only=memprof -stats -S input.ll -o - | FileCheck %s --implicit-check-not="section_prefix" | ||
|
||
; LOG: Skip annotating string literal .str | ||
; LOG: Global variable var1 is annotated as hot | ||
; LOG: Global variable var2.llvm.125 is annotated as hot | ||
; LOG: Global variable bar is not annotated | ||
; LOG: Global variable foo is annotated as unlikely | ||
; LOG: Global variable var3 has explicit section name. Skip annotating. | ||
; LOG: Global variable var4 has explicit section name. Skip annotating. | ||
|
||
;; String literals are not annotated. | ||
; PREFIX: @.str = unnamed_addr constant [5 x i8] c"abcde" | ||
; PREFIX-NOT: section_prefix | ||
; PREFIX: @var1 = global i32 123, !section_prefix !0 | ||
|
||
;; @var.llvm.125 will be canonicalized to @var2 for profile look-up. | ||
; PREFIX-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0 | ||
|
||
;; @bar is not seen in hot symbol or known symbol set, so it won't get a section | ||
;; prefix. Test this by testing that there is no section_prefix between @bar and | ||
;; @foo. | ||
; PREFIX-NEXT: @bar = global i16 3 | ||
; PREFIX-NOT: !section_prefix | ||
|
||
;; @foo is unlikely. | ||
; PREFIX-NEXT: @foo = global i8 2, !section_prefix !1 | ||
|
||
; PREFIX-NEXT: @var3 = constant [2 x i32] [i32 12345, i32 6789], section "sec1" | ||
; PREFIX-NEXT: @var4 = constant [1 x i64] [i64 98765] #0 | ||
|
||
; PREFIX: attributes #0 = { "rodata-section"="sec2" } | ||
|
||
; PREFIX: !0 = !{!"section_prefix", !"hot"} | ||
; PREFIX-NEXT: !1 = !{!"section_prefix", !"unlikely"} | ||
|
||
; STAT: 1 memprof - Number of global vars annotated with 'unlikely' section prefix. | ||
; STAT: 2 memprof - Number of global vars with user-specified section (not annotated). | ||
; STAT: 2 memprof - Number of global vars annotated with 'hot' section prefix. | ||
; STAT: 1 memprof - Number of global vars with unknown hotness (no section prefix). | ||
|
||
;--- memprof.yaml | ||
--- | ||
DataAccessProfiles: | ||
SampledRecords: | ||
- Symbol: var1 | ||
AccessCount: 1000 | ||
- Symbol: var2 | ||
AccessCount: 5 | ||
- Hash: 101010 | ||
AccessCount: 145 | ||
KnownColdSymbols: | ||
- foo | ||
KnownColdStrHashes: [ 999, 1001 ] | ||
... | ||
;--- input.ll | ||
snehasish marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
@.str = unnamed_addr constant [5 x i8] c"abcde" | ||
@var1 = global i32 123 | ||
@var2.llvm.125 = global i64 0 | ||
@bar = global i16 3 | ||
@foo = global i8 2 | ||
@var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1" | ||
@var4 = constant [1 x i64][i64 98765] #0 | ||
|
||
define i32 @func() { | ||
%a = load i32, ptr @var1 | ||
%b = load i32, ptr @var2.llvm.125 | ||
%ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b) | ||
ret i32 %ret | ||
} | ||
|
||
declare i32 @func_taking_arbitrary_param(...) | ||
|
||
attributes #0 = { "rodata-section"="sec2" } | ||
|
||
;--- funcless-module.ll | ||
|
||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
@.str = unnamed_addr constant [5 x i8] c"abcde" | ||
@var1 = global i32 123 | ||
@var2.llvm.125 = global i64 0 | ||
@bar = global i16 3 | ||
@foo = global i8 2 | ||
@var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1" | ||
@var4 = constant [1 x i64][i64 98765] #0 | ||
|
||
attributes #0 = { "rodata-section"="sec2" } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What would happen here for globals that the user has explicitly assigned a section using e.g. https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate?
I'd like to see a test for this attribute too to ensure we don't break user annotations.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Basically, with profile generator's filtering and linker's mapping, the chance of partitioning custom-section global variables inadvertently should be very minimal, as described below. But I agree it's good to not annotate them in the first place, so implemented that.
From profile generator perspective, it will only retrieve symbols and their sample counts from the 4 x 2 relevant static data sections, and it will not read symbols from custom sections. This way, data access profile payload won't have profiles for custom section global variables -- however, it's possible that internal-linkage global variables have the same name, and intermediate object files (not the executable) sees custom sections with prefix.
From linker's input -> output section mapping perspective, the
keep-data-section-prefix
option targets the section with one of known prefix ([lld][ELF] Introduce an option to keep data section prefix. #148985).done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
291bae7 implements the change and test.
https://gcc.godbolt.org/z/3c59K3rqM is a small C++ example to illustrate how source code section names (section attribute and
#pragma clang section
directive) affects section name in asm. From the asm output, we can see thata
andb
hassec1
andsec2
respectively.