Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions llvm/include/llvm/ProfileData/InstrProfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,11 @@ class IndexedMemProfReader {
LLVM_ABI DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
getMemProfCallerCalleePairs() const;

// Returns non-owned pointer to data access profile data.
LLVM_ABI memprof::DataAccessProfData *getDataAccessProfileData() const {
return DataAccessProfileData.get();
}

// Return the entire MemProf profile.
LLVM_ABI memprof::AllMemProfData getAllMemProfData() const;

Expand Down Expand Up @@ -900,6 +905,12 @@ class LLVM_ABI IndexedInstrProfReader : public InstrProfReader {
return MemProfReader.getSummary();
}

/// Returns non-owned pointer to the data access profile data.
/// Will be null if unavailable (version < 4).
memprof::DataAccessProfData *getDataAccessProfileData() const {
return MemProfReader.getDataAccessProfileData();
}

Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
Error printBinaryIds(raw_ostream &OS) override;
};
Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/IR/PassManager.h"
#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Compiler.h"

Expand All @@ -36,6 +37,11 @@ class MemProfUsePass : public PassInfoMixin<MemProfUsePass> {
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);

private:
// Annotate global variables' section prefix based on data access profile,
// return true if any global variable is annotated and false otherwise.
bool
annotateGlobalVariables(Module &M,
const memprof::DataAccessProfData *DataAccessProf);
std::string MemoryProfileFileName;
IntrusiveRefCntPtr<vfs::FileSystem> FS;
};
Expand Down
120 changes: 117 additions & 3 deletions llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/MemProfCommon.h"
Expand Down Expand Up @@ -75,6 +76,10 @@ static cl::opt<unsigned> MinMatchedColdBytePercent(
"memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
cl::desc("Min percent of cold bytes matched to hint allocation cold"));

static cl::opt<bool> AnnotateStaticDataSectionPrefix(
"memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden,
cl::desc("If true, annotate the static data section prefix"));

// Matching statistics
STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
STATISTIC(NumOfMemProfMismatch,
Expand All @@ -90,6 +95,14 @@ STATISTIC(NumOfMemProfMatchedAllocs,
"Number of matched memory profile allocs.");
STATISTIC(NumOfMemProfMatchedCallSites,
"Number of matched memory profile callsites.");
STATISTIC(NumOfMemProfHotGlobalVars,
"Number of global vars annotated with 'hot' section prefix.");
STATISTIC(NumOfMemProfColdGlobalVars,
"Number of global vars annotated with 'unlikely' section prefix.");
STATISTIC(NumOfMemProfUnknownGlobalVars,
"Number of global vars with unknown hotness (no section prefix).");
STATISTIC(NumOfMemProfExplicitSectionGlobalVars,
"Number of global vars with user-specified section (not annotated).");

static void addCallsiteMetadata(Instruction &I,
ArrayRef<uint64_t> InlinedCallStack,
Expand Down Expand Up @@ -674,11 +687,12 @@ MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
}

PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
// Return immediately if the module doesn't contain any function.
if (M.empty())
// Return immediately if the module doesn't contain any function or global
// variables.
if (M.empty() && M.globals().empty())
return PreservedAnalyses::all();

LLVM_DEBUG(dbgs() << "Read in memory profile:");
LLVM_DEBUG(dbgs() << "Read in memory profile:\n");
auto &Ctx = M.getContext();
auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
if (Error E = ReaderOrErr.takeError()) {
Expand All @@ -703,6 +717,14 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
return PreservedAnalyses::all();
}

const bool Changed =
annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData());

// If the module doesn't contain any function, return after we process all
// global variables.
if (M.empty())
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();

auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();

TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());
Expand Down Expand Up @@ -752,3 +774,95 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {

return PreservedAnalyses::none();
}

// Returns true iff the global variable has custom section either by
// __attribute__((section("name")))
// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate)
// or #pragma clang section directives
// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section).
static bool hasExplicitSectionName(const GlobalVariable &GVar) {
if (GVar.hasSection())
return true;

auto Attrs = GVar.getAttributes();
if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") ||
Attrs.hasAttribute("relro-section") ||
Attrs.hasAttribute("rodata-section"))
return true;
return false;
}

bool MemProfUsePass::annotateGlobalVariables(
Module &M, const memprof::DataAccessProfData *DataAccessProf) {
if (!AnnotateStaticDataSectionPrefix || M.globals().empty())
return false;

if (!DataAccessProf) {
M.getContext().diagnose(DiagnosticInfoPGOProfile(
MemoryProfileFileName.data(),
StringRef("Data access profiles not found in memprof. Ignore "
"-memprof-annotate-static-data-prefix."),
DS_Warning));
return false;
}

bool Changed = false;
// Iterate all global variables in the module and annotate them based on
// data access profiles. Note it's up to the linker to decide how to map input
// sections to output sections, and one conservative practice is to map
// unlikely-prefixed ones to unlikely output section, and map the rest
// (hot-prefixed or prefix-less) to the canonical output section.
for (GlobalVariable &GVar : M.globals()) {
assert(!GVar.getSectionPrefix().has_value() &&

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would happen here for globals that the user has explicitly assigned a section using e.g. https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate?

I'd like to see a test for this attribute too to ensure we don't break user annotations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would happen here for globals that the user has explicitly assigned a section using e.g. https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate

Basically, with profile generator's filtering and linker's mapping, the chance of partitioning custom-section global variables inadvertently should be very minimal, as described below. But I agree it's good to not annotate them in the first place, so implemented that.

  • From profile generator perspective, it will only retrieve symbols and their sample counts from the 4 x 2 relevant static data sections, and it will not read symbols from custom sections. This way, data access profile payload won't have profiles for custom section global variables -- however, it's possible that internal-linkage global variables have the same name, and intermediate object files (not the executable) sees custom sections with prefix.

  • From linker's input -> output section mapping perspective, the keep-data-section-prefix option targets the section with one of known prefix ([lld][ELF] Introduce an option to keep data section prefix. #148985).

I'd like to see a test for this attribute too to ensure we don't break user annotations.

done.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

291bae7 implements the change and test.

https://gcc.godbolt.org/z/3c59K3rqM is a small C++ example to illustrate how source code section names (section attribute and #pragma clang section directive) affects section name in asm. From the asm output, we can see that a and b has sec1 and sec2 respectively.

"GVar shouldn't have section prefix yet");
if (GVar.isDeclarationForLinker())
continue;

if (hasExplicitSectionName(GVar)) {
++NumOfMemProfExplicitSectionGlobalVars;
LLVM_DEBUG(dbgs() << "Global variable " << GVar.getName()
<< " has explicit section name. Skip annotating.\n");
continue;
}

StringRef Name = GVar.getName();
// Skip string literals as their mangled names don't stay stable across
// binary releases.
// TODO: Track string content hash in the profiles and compute it inside the
// compiler to categeorize the hotness string literals.
if (Name.starts_with(".str")) {

LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n");
continue;
}

// DataAccessProfRecord's get* methods will canonicalize the name under the
// hood before looking it up, so optimizer doesn't need to do it.
std::optional<DataAccessProfRecord> Record =
DataAccessProf->getProfileRecord(Name);
// Annotate a global variable as hot if it has non-zero sampled count, and
// annotate it as cold if it's seen in the profiled binary
// file but doesn't have any access sample.
// For logging, optimization remark emitter requires a llvm::Function, but
// it's not well defined how to associate a global variable with a function.
// So we just print out the static data section prefix in LLVM_DEBUG.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only downside with that is the need for a debug built compiler to get any information out. Maybe at least add STATISTICs to enable getting a count of hot and unlikely annotated gvs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm onboard with adding counters so implemented it; though if I remember correctly stats are not always enabled in a release build compiler, and there is a DLLVM_ENABLE_STATS flag.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, shoot, well the stats are helpful in any case.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using a statistic sounds good to me too.

if (Record && Record->AccessCount > 0) {
++NumOfMemProfHotGlobalVars;
GVar.setSectionPrefix("hot");
Changed = true;
LLVM_DEBUG(dbgs() << "Global variable " << Name
<< " is annotated as hot\n");
} else if (DataAccessProf->isKnownColdSymbol(Name)) {
++NumOfMemProfColdGlobalVars;
GVar.setSectionPrefix("unlikely");
Changed = true;
LLVM_DEBUG(dbgs() << "Global variable " << Name
<< " is annotated as unlikely\n");
} else {
++NumOfMemProfUnknownGlobalVars;
LLVM_DEBUG(dbgs() << "Global variable " << Name << " is not annotated\n");
}
}

return Changed;
}
112 changes: 112 additions & 0 deletions llvm/test/Transforms/PGOProfile/data-access-profile.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
; REQUIRES: asserts
; asserts are required for -debug-only=<pass-name>

; RUN: rm -rf %t && split-file %s %t && cd %t

;; Read a text profile and merge it into indexed profile.
; RUN: llvm-profdata merge --memprof-version=4 memprof.yaml -o memprof.profdata

;; Run optimizer pass on an IR module without IR functions, and test that global
;; variables in the module could be annotated (i.e., no early return),
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -memprof-annotate-static-data-prefix \
; RUN: -debug-only=memprof -stats -S funcless-module.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT

;; Run optimizer pass on the IR, and check the section prefix.
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -memprof-annotate-static-data-prefix \
; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT

;; Run optimizer pass without explicitly setting -memprof-annotate-static-data-prefix.
;; The output text IR shouldn't have `section_prefix`
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' \
; RUN: -debug-only=memprof -stats -S input.ll -o - | FileCheck %s --implicit-check-not="section_prefix"

; LOG: Skip annotating string literal .str
; LOG: Global variable var1 is annotated as hot
; LOG: Global variable var2.llvm.125 is annotated as hot
; LOG: Global variable bar is not annotated
; LOG: Global variable foo is annotated as unlikely
; LOG: Global variable var3 has explicit section name. Skip annotating.
; LOG: Global variable var4 has explicit section name. Skip annotating.

;; String literals are not annotated.
; PREFIX: @.str = unnamed_addr constant [5 x i8] c"abcde"
; PREFIX-NOT: section_prefix
; PREFIX: @var1 = global i32 123, !section_prefix !0

;; @var.llvm.125 will be canonicalized to @var2 for profile look-up.
; PREFIX-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0

;; @bar is not seen in hot symbol or known symbol set, so it won't get a section
;; prefix. Test this by testing that there is no section_prefix between @bar and
;; @foo.
; PREFIX-NEXT: @bar = global i16 3
; PREFIX-NOT: !section_prefix

;; @foo is unlikely.
; PREFIX-NEXT: @foo = global i8 2, !section_prefix !1

; PREFIX-NEXT: @var3 = constant [2 x i32] [i32 12345, i32 6789], section "sec1"
; PREFIX-NEXT: @var4 = constant [1 x i64] [i64 98765] #0

; PREFIX: attributes #0 = { "rodata-section"="sec2" }

; PREFIX: !0 = !{!"section_prefix", !"hot"}
; PREFIX-NEXT: !1 = !{!"section_prefix", !"unlikely"}

; STAT: 1 memprof - Number of global vars annotated with 'unlikely' section prefix.
; STAT: 2 memprof - Number of global vars with user-specified section (not annotated).
; STAT: 2 memprof - Number of global vars annotated with 'hot' section prefix.
; STAT: 1 memprof - Number of global vars with unknown hotness (no section prefix).

;--- memprof.yaml
---
DataAccessProfiles:
SampledRecords:
- Symbol: var1
AccessCount: 1000
- Symbol: var2
AccessCount: 5
- Hash: 101010
AccessCount: 145
KnownColdSymbols:
- foo
KnownColdStrHashes: [ 999, 1001 ]
...
;--- input.ll

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = unnamed_addr constant [5 x i8] c"abcde"
@var1 = global i32 123
@var2.llvm.125 = global i64 0
@bar = global i16 3
@foo = global i8 2
@var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1"
@var4 = constant [1 x i64][i64 98765] #0

define i32 @func() {
%a = load i32, ptr @var1
%b = load i32, ptr @var2.llvm.125
%ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b)
ret i32 %ret
}

declare i32 @func_taking_arbitrary_param(...)

attributes #0 = { "rodata-section"="sec2" }

;--- funcless-module.ll

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = unnamed_addr constant [5 x i8] c"abcde"
@var1 = global i32 123
@var2.llvm.125 = global i64 0
@bar = global i16 3
@foo = global i8 2
@var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1"
@var4 = constant [1 x i64][i64 98765] #0

attributes #0 = { "rodata-section"="sec2" }