From 6311383dc31b5cf15b0f0851b73326af6502bc6e Mon Sep 17 00:00:00 2001 From: mingmingl Date: Mon, 28 Jul 2025 01:47:15 -0700 Subject: [PATCH 1/6] Extend MemProfUse pass to make use of data access profiles --- .../llvm/ProfileData/InstrProfReader.h | 11 +++ .../Transforms/Instrumentation/MemProfUse.h | 5 ++ .../Transforms/Instrumentation/MemProfUse.cpp | 80 ++++++++++++++++- .../PGOProfile/data-access-profile.ll | 88 +++++++++++++++++++ 4 files changed, 182 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/PGOProfile/data-access-profile.ll diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index deb5cd17d8fd9..bccbc2006b898 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -729,6 +729,11 @@ class IndexedMemProfReader { LLVM_ABI DenseMap> getMemProfCallerCalleePairs() const; + // Returns non-owned pointer to data access profile data. + memprof::DataAccessProfData *getDataAccessProfileData() const { + return DataAccessProfileData.get(); + } + // Return the entire MemProf profile. LLVM_ABI memprof::AllMemProfData getAllMemProfData() const; @@ -900,6 +905,12 @@ class LLVM_ABI IndexedInstrProfReader : public InstrProfReader { return MemProfReader.getSummary(); } + /// Returns non-owned pointer to the data access profile data. + /// Will be null if unavailable (version < 4). + memprof::DataAccessProfData *getDataAccessProfileData() const { + return MemProfReader.getDataAccessProfileData(); + } + Error readBinaryIds(std::vector &BinaryIds) override; Error printBinaryIds(raw_ostream &OS) override; }; diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h index 6170bf48e4695..c11333bf8ce5b 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h @@ -14,6 +14,7 @@ #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/IR/PassManager.h" +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Compiler.h" @@ -36,6 +37,10 @@ class MemProfUsePass : public PassInfoMixin { LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: + // Annotate global variables' section prefix based on data access profile. + bool + annotateGlobalVariables(Module &M, + const memprof::DataAccessProfData *DataAccessProf); std::string MemoryProfileFileName; IntrusiveRefCntPtr FS; }; diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp index a9a0731f16d90..3c1e41f8eb374 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/MemProfCommon.h" @@ -75,6 +76,17 @@ static cl::opt MinMatchedColdBytePercent( "memprof-matching-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes matched to hint allocation cold")); +static cl::opt AnnotationStaticDataPrefix( + "annotate-static-data-prefix", cl::init(false), cl::Hidden, + cl::desc("If true, annotate the static data section prefix")); + +static cl::opt + PrintStaticDataPrefix("print-static-data-prefix", cl::init(false), + cl::Hidden, + cl::desc("If true, print the static data section " + "prefix in errs(). This option is " + "meant for debugging.")); + // Matching statistics STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); STATISTIC(NumOfMemProfMismatch, @@ -674,8 +686,9 @@ MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile, } PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { - // Return immediately if the module doesn't contain any function. - if (M.empty()) + // Return immediately if the module doesn't contain any function or global + // variables. + if (M.empty() && M.globals().empty()) return PreservedAnalyses::all(); LLVM_DEBUG(dbgs() << "Read in memory profile:"); @@ -703,6 +716,14 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { return PreservedAnalyses::all(); } + const bool Changed = + annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData()); + + // If the module doesn't contain any function, return after we process all + // global variables. + if (M.empty()) + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); + auto &FAM = AM.getResult(M).getManager(); TargetLibraryInfo &TLI = FAM.getResult(*M.begin()); @@ -752,3 +773,58 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { return PreservedAnalyses::none(); } + +bool MemProfUsePass::annotateGlobalVariables( + Module &M, const memprof::DataAccessProfData *DataAccessProf) { + if (!AnnotationStaticDataPrefix || M.globals().empty() || !DataAccessProf) + return false; + + bool Changed = false; + for (GlobalVariable &GVar : M.globals()) { + assert(!GVar.getSectionPrefix().has_value() && + "GVar shouldn't have section prefix yet"); + if (GVar.isDeclarationForLinker()) + continue; + + StringRef Name = GVar.getName(); + // Skip string literals whose mangled names doesn't stay stable across + // binary releases. + // TODO: Track string content hash in the profiles and compute it inside the + // compiler to categeorize the hotness string literals. + if (Name.starts_with(".str")) + continue; + + // DataAccessProfRecord's look-up methods will canonicalize the variable + // name before looking up methods, so optimizer doesn't need to do it. + std::optional Record = + DataAccessProf->getProfileRecord(Name); + // Annotate a global variable as hot if it has non-zero sampled count, and + // annotate it as cold if it's seen in the profiled binary + // file but doesn't have any access sample. + if (Record && Record->AccessCount > 0) { + GVar.setSectionPrefix("hot"); + Changed = true; + } else if (DataAccessProf->isKnownColdSymbol(Name)) { + GVar.setSectionPrefix("unlikely"); + Changed = true; + } + } + + // Optimization remark emitter requires a llvm::Function, but it's not well + // defined to associate a global variable with a function. So we just print + // out the static data section prefix in errs(). + if (PrintStaticDataPrefix) { + for (GlobalVariable &GVar : M.globals()) { + if (GVar.isDeclarationForLinker()) + continue; + StringRef Name = GVar.getName(); + auto SectionPrefix = GVar.getSectionPrefix(); + if (SectionPrefix.has_value()) + errs() << "Global variable " << Name + << " has section prefix: " << SectionPrefix.value() << "\n"; + else + errs() << "Global variable " << Name << " has no section prefix\n"; + } + } + return Changed; +} diff --git a/llvm/test/Transforms/PGOProfile/data-access-profile.ll b/llvm/test/Transforms/PGOProfile/data-access-profile.ll new file mode 100644 index 0000000000000..33f0d8e0ab887 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/data-access-profile.ll @@ -0,0 +1,88 @@ +; RUN: rm -rf %t && split-file %s %t && cd %t + +;; Read a text profile and merge it into indexed profile. +; RUN: llvm-profdata merge --memprof-version=4 memprof.yaml -o memprof.profdata + +;; Run optimizer pass on the IR, and check the section prefix. +; RUN: opt -passes='memprof-use' -annotate-static-data-prefix \ +; RUN: -S input.ll -o - 2>&1 | FileCheck %s + +;; Repeat the command line above and enable -print-static-data-prefix. Test both IR and log. +; RUN: opt -passes='memprof-use' -annotate-static-data-prefix \ +; RUN: -print-static-data-prefix -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,CHECK + + +; LOG: Global variable .str has no section prefix +; LOG: Global variable var1 has section prefix: hot +; LOG: Global variable var2.llvm.125 has section prefix: hot +; LOG: Global variable foo has section prefix: unlikely +; LOG: Global variable bar has no section prefix + +;; String literals are not annotated. +; CHECK: @.str = unnamed_addr constant [5 x i8] c"abcde" +; CHECK-NOT: section_prefix +; CHECK: @var1 = global i32 123, !section_prefix !0 + +;; @var.llvm.125 will be canonicalized to @var2 for profile look-up. +; CHECK-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0 +; CHECK-NEXT: @foo = global i8 2, !section_prefix !1 + +;; @bar is not seen in hot symbol or known symbol set, so it doesn't get +;; a section prefix. It's up to the linker to decide how to map input sections +;; to output, and one conservative practice is to map unlikely-prefixed ones to +;; unlikely output section, and map the rest (hot-prefixed or prefix-less) to +;; the canonical output section. +; CHECK-NEXT: @bar = global i16 3 + +; CHECK: !0 = !{!"section_prefix", !"hot"} +; CHECK-NEXT: !1 = !{!"section_prefix", !"unlikely"} + +;--- memprof.yaml +--- +HeapProfileRecords: + - GUID: 0xdeadbeef12345678 + AllocSites: + - Callstack: + - { Function: 0x1111111111111111, LineOffset: 11, Column: 10, IsInlineFrame: true } + - { Function: 0x2222222222222222, LineOffset: 22, Column: 20, IsInlineFrame: false } + MemInfoBlock: + AllocCount: 111 + TotalSize: 222 + TotalLifetime: 333 + TotalLifetimeAccessDensity: 444 + CallSites: + - Frames: + - { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true } + - { Function: 0x6666666666666666, LineOffset: 66, Column: 60, IsInlineFrame: false } + CalleeGuids: [ 0x100, 0x200 ] +DataAccessProfiles: + SampledRecords: + - Symbol: var1 + AccessCount: 1000 + - Symbol: var2 + AccessCount: 5 + - Hash: 101010 + AccessCount: 145 + KnownColdSymbols: + - foo + KnownColdStrHashes: [ 999, 1001 ] +... +;--- input.ll + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str = unnamed_addr constant [5 x i8] c"abcde" +@var1 = global i32 123 +@var2.llvm.125 = global i64 0 +@foo = global i8 2 +@bar = global i16 3 + +define i32 @func() { + %a = load i32, ptr @var1 + %b = load i32, ptr @var2.llvm.125 + %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b) + ret i32 %ret +} + +declare i32 @func_taking_arbitrary_param(...) From 8f8cd69f4996567458402f176fc1aac7c3548eba Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Mon, 25 Aug 2025 08:23:29 -0700 Subject: [PATCH 2/6] Update data-access-profile.ll --- llvm/test/Transforms/PGOProfile/data-access-profile.ll | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/test/Transforms/PGOProfile/data-access-profile.ll b/llvm/test/Transforms/PGOProfile/data-access-profile.ll index 33f0d8e0ab887..91eaa934374b3 100644 --- a/llvm/test/Transforms/PGOProfile/data-access-profile.ll +++ b/llvm/test/Transforms/PGOProfile/data-access-profile.ll @@ -11,7 +11,6 @@ ; RUN: opt -passes='memprof-use' -annotate-static-data-prefix \ ; RUN: -print-static-data-prefix -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,CHECK - ; LOG: Global variable .str has no section prefix ; LOG: Global variable var1 has section prefix: hot ; LOG: Global variable var2.llvm.125 has section prefix: hot From aae52ce54615234a27d195749d6908e9c107b376 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Mon, 25 Aug 2025 13:42:54 -0700 Subject: [PATCH 3/6] For static data layout, update codegen passes to reconcile data hotness from two sources of profile information --- .../llvm/Analysis/StaticDataProfileInfo.h | 4 ++ llvm/lib/Analysis/StaticDataProfileInfo.cpp | 46 +++++++++++++++++-- llvm/lib/CodeGen/StaticDataAnnotator.cpp | 9 ---- .../CodeGen/X86/global-variable-partition.ll | 28 +++++++---- 4 files changed, 63 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h index fa21eba1377df..d7bbf719e5692 100644 --- a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h +++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h @@ -26,6 +26,10 @@ class StaticDataProfileInfo { LLVM_ABI std::optional getConstantProfileCount(const Constant *C) const; + LLVM_ABI std::optional + getDataHotnessBasedOnProfileCount(const Constant *C, + const ProfileSummaryInfo *PSI) const; + public: StaticDataProfileInfo() = default; diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp index b036b2dde770e..b7ca654e9c043 100644 --- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp +++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp @@ -28,11 +28,15 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const { return I->second; } -StringRef StaticDataProfileInfo::getConstantSectionPrefix( +std::optional +StaticDataProfileInfo::getDataHotnessBasedOnProfileCount( const Constant *C, const ProfileSummaryInfo *PSI) const { auto Count = getConstantProfileCount(C); + // The constant `C` doesn't have a profile count. `C` might be a external + // linkage global variable, whose PGO-based counter is not tracked within one + // IR module. if (!Count) - return ""; + return std::nullopt; // The accummulated counter shows the constant is hot. Return 'hot' whether // this variable is seen by unprofiled functions or not. if (PSI->isHotCount(*Count)) @@ -41,14 +45,46 @@ StringRef StaticDataProfileInfo::getConstantSectionPrefix( // assign it to unlikely sections, even if the counter says 'cold'. So return // an empty prefix before checking whether the counter is cold. if (ConstantWithoutCounts.count(C)) - return ""; + return std::nullopt; // The accummulated counter shows the constant is cold. Return 'unlikely'. - if (PSI->isColdCount(*Count)) + if (PSI->isColdCount(*Count)) { return "unlikely"; - // The counter says lukewarm. Return an empty prefix. + } return ""; } +static StringRef reconcileHotness(StringRef SectionPrefix, StringRef Hotness) { + assert((SectionPrefix == "hot" || SectionPrefix == "unlikely") && + "Section prefix must be 'hot' or 'unlikely'"); + + if (SectionPrefix == "hot" || Hotness == "hot") + return "hot"; + assert(SectionPrefix == "unlikely" && "Section prefix must be 'unlikely'."); + return Hotness; +} + +static StringRef +reconcileOptionalHotness(std::optional SectionPrefix, + std::optional Hotness) { + if (!SectionPrefix) + return Hotness.value_or(""); + if (!Hotness) + return SectionPrefix.value_or(""); + + return reconcileHotness(*SectionPrefix, *Hotness); +} + +StringRef StaticDataProfileInfo::getConstantSectionPrefix( + const Constant *C, const ProfileSummaryInfo *PSI) const { + std::optional HotnessBasedOnCount = + getDataHotnessBasedOnProfileCount(C, PSI); + if (const GlobalVariable *GV = dyn_cast(C)) + return reconcileOptionalHotness(GV->getSectionPrefix(), + HotnessBasedOnCount); + + return HotnessBasedOnCount.value_or(""); +} + bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) { Info.reset(new StaticDataProfileInfo()); return false; diff --git a/llvm/lib/CodeGen/StaticDataAnnotator.cpp b/llvm/lib/CodeGen/StaticDataAnnotator.cpp index 2d9b489a80acb..fdf48e1f00cda 100644 --- a/llvm/lib/CodeGen/StaticDataAnnotator.cpp +++ b/llvm/lib/CodeGen/StaticDataAnnotator.cpp @@ -78,15 +78,6 @@ bool StaticDataAnnotator::runOnModule(Module &M) { if (GV.isDeclarationForLinker()) continue; - // The implementation below assumes prior passes don't set section prefixes, - // and specifically do 'assign' rather than 'update'. So report error if a - // section prefix is already set. - if (auto maybeSectionPrefix = GV.getSectionPrefix(); - maybeSectionPrefix && !maybeSectionPrefix->empty()) - llvm::report_fatal_error("Global variable " + GV.getName() + - " already has a section prefix " + - *maybeSectionPrefix); - StringRef SectionPrefix = SDPI->getConstantSectionPrefix(&GV, PSI); if (SectionPrefix.empty()) continue; diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll index ce06d1712f840..63d37f2b3ccea 100644 --- a/llvm/test/CodeGen/X86/global-variable-partition.ll +++ b/llvm/test/CodeGen/X86/global-variable-partition.ll @@ -90,17 +90,24 @@ target triple = "x86_64-unknown-linux-gnu" ; UNIQ-NEXT: .section .data.rel.ro.unlikely.,"aw",@progbits,unique,6 ; AGG-NEXT: .section .data.rel.ro.unlikely.,"aw",@progbits -; Currently static-data-splitter only analyzes access from code. -; @bss2 and @data3 are indirectly accessed by code through @hot_relro_array -; and @cold_relro_array. A follow-up item is to analyze indirect access via data -; and prune the unlikely list. -; For @bss2 + +; @bss2 and @data3 are indirectly accessed via @hot_relro_array and +; @cold_relro_array, and actually hot due to accesses via @hot_relro_array. +; Under the hood, the static data splitter pass analyzes accesses from code but +; won't aggressively propgate the hotness of @hot_relro_array into the array +; elements -- instead, this pass reconciles the hotness information from both +; global variable section prefix and PGO counters. + +; @bss2 has a section prefix 'hot' in the IR. StaticDataProfileInfo reconciles +; it into a hot prefix. ; COMMON: .type bss2,@object -; SYM-NEXT: .section .bss.unlikely.bss2,"aw",@nobits -; UNIQ-NEXT: .section .bss.unlikely.,"aw",@nobits,unique,7 -; AGG-NEXT: .section .bss.unlikely.,"aw",@nobits +; SYM-NEXT: .section .bss.hot.bss2,"aw",@nobits +; UNIQ-NEXT: .section .bss.hot.,"aw",@nobits,unique,7 +; AGG-NEXT: .section .bss.hot.,"aw",@nobits -; For @data3 +; @data3 doesn't have data access profile coverage and thereby doesn't have a +; section prefix. PGO counter analysis categorizes it as cold, so it will have +; section name `.data.unlikely`. ; COMMON: .type data3,@object ; SYM-NEXT: .section .data.unlikely.data3,"aw",@progbits ; UNIQ-NEXT: .section .data.unlikely.,"aw",@progbits,unique,8 @@ -133,7 +140,7 @@ target triple = "x86_64-unknown-linux-gnu" @cold_data = internal global i32 4 @cold_data_custom_foo_section = internal global i32 100, section "foo" @cold_relro_array = internal constant [2 x ptr] [ptr @data3, ptr @bss2] -@bss2 = internal global i32 0 +@bss2 = internal global i32 0, !section_prefix !17 @data3 = internal global i32 3 @data_with_unknown_hotness = private global i32 5 @hot_data_custom_bar_section = internal global i32 101 #0 @@ -227,3 +234,4 @@ attributes #0 = {"data-section"="bar"} !14 = !{!"function_entry_count", i64 100000} !15 = !{!"function_entry_count", i64 1} !16 = !{!"branch_weights", i32 1, i32 99999} +!17 = !{!"section_prefix", !"hot"} From d4e4d958ce165b82f4da2b509bec5a7e4aaec756 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Sat, 30 Aug 2025 20:34:28 -0700 Subject: [PATCH 4/6] reconcile hotness --- .../llvm/Analysis/StaticDataProfileInfo.h | 37 ++++--- llvm/include/llvm/IR/GlobalObject.h | 6 +- llvm/lib/Analysis/StaticDataProfileInfo.cpp | 98 +++++++++++-------- llvm/lib/CodeGen/StaticDataAnnotator.cpp | 8 +- llvm/lib/IR/Globals.cpp | 17 ++++ llvm/lib/ProfileData/DataAccessProf.cpp | 4 + .../Transforms/Instrumentation/MemProfUse.cpp | 11 ++- .../CodeGen/X86/global-variable-partition.ll | 29 ++++-- 8 files changed, 138 insertions(+), 72 deletions(-) diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h index d7bbf719e5692..fbbc13a148ffa 100644 --- a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h +++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h @@ -14,8 +14,10 @@ namespace llvm { /// profile information and provides methods to operate on them. class StaticDataProfileInfo { public: - /// Accummulate the profile count of a constant that will be lowered to static - /// data sections. + /// A constant and its profile count. + /// A constant is tracked if both conditions are met: + /// 1) It has local (i.e., private or internal) linkage. + // 2) Its data kind is one of {.rodata, .data, .bss, .data.rel.ro}. DenseMap ConstantProfileCounts; /// Keeps track of the constants that are seen at least once without profile @@ -26,11 +28,24 @@ class StaticDataProfileInfo { LLVM_ABI std::optional getConstantProfileCount(const Constant *C) const; - LLVM_ABI std::optional - getDataHotnessBasedOnProfileCount(const Constant *C, - const ProfileSummaryInfo *PSI) const; + enum class StaticDataHotness : uint8_t { + Cold = 0, + LukewarmOrUnknown = 1, + Hot = 2, + }; + + LLVM_ABI StaticDataHotness getSectionHotnessUsingProfileCount( + const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const; + LLVM_ABI StaticDataHotness + getSectionHotnessUsingDAP(std::optional SectionPrefix) const; + + LLVM_ABI StringRef hotnessToStr(StaticDataHotness Hotness) const; + + bool HasDataAccessProf = false; public: + StaticDataProfileInfo(bool HasDataAccessProf) + : HasDataAccessProf(HasDataAccessProf) {} StaticDataProfileInfo() = default; /// If \p Count is not nullopt, add it to the profile count of the constant \p @@ -40,14 +55,10 @@ class StaticDataProfileInfo { LLVM_ABI void addConstantProfileCount(const Constant *C, std::optional Count); - /// Return a section prefix for the constant \p C based on its profile count. - /// - If a constant doesn't have a counter, return an empty string. - /// - Otherwise, - /// - If it has a hot count, return "hot". - /// - If it is seen by unprofiled function, return an empty string. - /// - If it has a cold count, return "unlikely". - /// - Otherwise (e.g. it's used by lukewarm functions), return an empty - /// string. + /// Given a constant \p C, returns a section prefix. + /// If \p C is a global variable, the section prefix is the bigger one + /// between its existing section prefix and its use profile count. Otherwise, + /// the section prefix is based on its use profile count. LLVM_ABI StringRef getConstantSectionPrefix( const Constant *C, const ProfileSummaryInfo *PSI) const; }; diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h index 08a02b42bdc14..58138c792dbb5 100644 --- a/llvm/include/llvm/IR/GlobalObject.h +++ b/llvm/include/llvm/IR/GlobalObject.h @@ -121,9 +121,13 @@ class GlobalObject : public GlobalValue { /// appropriate default object file section. LLVM_ABI void setSection(StringRef S); - /// Set the section prefix for this global object. + /// Set the section prefix for this global object. If \p Prefix is empty, + /// the section prefix metadata will be cleared if it exists. LLVM_ABI void setSectionPrefix(StringRef Prefix); + /// If \p Prefix is different from existing prefix, update section prefix. Returns true if an update happens and false otherwise. + LLVM_ABI bool updateSectionPrefix(StringRef Prefix); + /// Get the section prefix for this global object. LLVM_ABI std::optional getSectionPrefix() const; diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp index b7ca654e9c043..05b1adf58af60 100644 --- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp +++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp @@ -1,11 +1,17 @@ #include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/ProfileData/InstrProf.h" using namespace llvm; + +extern cl::opt AnnotateStaticDataSectionPrefix; + void StaticDataProfileInfo::addConstantProfileCount( const Constant *C, std::optional Count) { if (!Count) { @@ -28,65 +34,79 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const { return I->second; } -std::optional -StaticDataProfileInfo::getDataHotnessBasedOnProfileCount( - const Constant *C, const ProfileSummaryInfo *PSI) const { - auto Count = getConstantProfileCount(C); - // The constant `C` doesn't have a profile count. `C` might be a external - // linkage global variable, whose PGO-based counter is not tracked within one - // IR module. - if (!Count) - return std::nullopt; +StaticDataProfileInfo::StaticDataHotness +StaticDataProfileInfo::getSectionHotnessUsingProfileCount( + const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const { // The accummulated counter shows the constant is hot. Return 'hot' whether // this variable is seen by unprofiled functions or not. - if (PSI->isHotCount(*Count)) - return "hot"; + if (PSI->isHotCount(Count)) + return StaticDataHotness::Hot; // The constant is not hot, and seen by unprofiled functions. We don't want to // assign it to unlikely sections, even if the counter says 'cold'. So return // an empty prefix before checking whether the counter is cold. if (ConstantWithoutCounts.count(C)) - return std::nullopt; + return StaticDataHotness::LukewarmOrUnknown; // The accummulated counter shows the constant is cold. Return 'unlikely'. - if (PSI->isColdCount(*Count)) { - return "unlikely"; - } - return ""; -} + if (PSI->isColdCount(Count)) + return StaticDataHotness::Cold; -static StringRef reconcileHotness(StringRef SectionPrefix, StringRef Hotness) { - assert((SectionPrefix == "hot" || SectionPrefix == "unlikely") && - "Section prefix must be 'hot' or 'unlikely'"); + return StaticDataHotness::LukewarmOrUnknown; +} - if (SectionPrefix == "hot" || Hotness == "hot") +StringRef StaticDataProfileInfo::hotnessToStr( + StaticDataProfileInfo::StaticDataHotness Hotness) const { + switch (Hotness) { + case StaticDataProfileInfo::StaticDataHotness::Cold: + return "unlikely"; + case StaticDataProfileInfo::StaticDataHotness::Hot: return "hot"; - assert(SectionPrefix == "unlikely" && "Section prefix must be 'unlikely'."); - return Hotness; + default: + return ""; + } } -static StringRef -reconcileOptionalHotness(std::optional SectionPrefix, - std::optional Hotness) { - if (!SectionPrefix) - return Hotness.value_or(""); - if (!Hotness) - return SectionPrefix.value_or(""); - - return reconcileHotness(*SectionPrefix, *Hotness); +StaticDataProfileInfo::StaticDataHotness +StaticDataProfileInfo::getSectionHotnessUsingDAP( + std::optional MaybeSectionPrefix) const { + if (!MaybeSectionPrefix) + return StaticDataProfileInfo::StaticDataHotness::LukewarmOrUnknown; + StringRef Prefix = *MaybeSectionPrefix; + assert((Prefix == "hot" || Prefix == "unlikely") && + "Expect section_prefix to be one of hot or unlikely"); + return Prefix == "hot" ? StaticDataProfileInfo::StaticDataHotness::Hot + : StaticDataProfileInfo::StaticDataHotness::Cold; } StringRef StaticDataProfileInfo::getConstantSectionPrefix( const Constant *C, const ProfileSummaryInfo *PSI) const { - std::optional HotnessBasedOnCount = - getDataHotnessBasedOnProfileCount(C, PSI); - if (const GlobalVariable *GV = dyn_cast(C)) - return reconcileOptionalHotness(GV->getSectionPrefix(), - HotnessBasedOnCount); + auto Count = getConstantProfileCount(C); - return HotnessBasedOnCount.value_or(""); + if (HasDataAccessProf) { + // Module flag `HasDataAccessProf` is 1 -> empty section prefix means uknown + // hotness except for string literals. + if (const GlobalVariable *GV = dyn_cast(C); + GV && !GV->getName().starts_with(".str")) { + auto HotnessFromDAP = getSectionHotnessUsingDAP(GV->getSectionPrefix()); + + if (!Count) + return hotnessToStr(HotnessFromDAP); + + auto HotnessFromPGO = getSectionHotnessUsingProfileCount(C, PSI, *Count); + return hotnessToStr(std::max(HotnessFromDAP, HotnessFromPGO)); + } + } + + if (!Count) + return ""; + return hotnessToStr(getSectionHotnessUsingProfileCount(C, PSI, *Count)); } bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) { - Info.reset(new StaticDataProfileInfo()); + bool HasDataAccessProf = false; + if (auto *MD = mdconst::extract_or_null( + M.getModuleFlag("HasDataAccessProf"))) + HasDataAccessProf = MD->getZExtValue(); + Info.reset(new StaticDataProfileInfo(HasDataAccessProf)); return false; } diff --git a/llvm/lib/CodeGen/StaticDataAnnotator.cpp b/llvm/lib/CodeGen/StaticDataAnnotator.cpp index fdf48e1f00cda..72735d21d45e9 100644 --- a/llvm/lib/CodeGen/StaticDataAnnotator.cpp +++ b/llvm/lib/CodeGen/StaticDataAnnotator.cpp @@ -31,6 +31,8 @@ #include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Analysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" @@ -79,11 +81,7 @@ bool StaticDataAnnotator::runOnModule(Module &M) { continue; StringRef SectionPrefix = SDPI->getConstantSectionPrefix(&GV, PSI); - if (SectionPrefix.empty()) - continue; - - GV.setSectionPrefix(SectionPrefix); - Changed = true; + Changed |= GV.updateSectionPrefix(SectionPrefix); } return Changed; diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index 11d33e262fecb..0731fcbde106a 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -289,11 +289,28 @@ void GlobalObject::setSection(StringRef S) { } void GlobalObject::setSectionPrefix(StringRef Prefix) { + if (Prefix.empty()) { + setMetadata(LLVMContext::MD_section_prefix, nullptr); + return; + } MDBuilder MDB(getContext()); setMetadata(LLVMContext::MD_section_prefix, MDB.createGlobalObjectSectionPrefix(Prefix)); } +bool GlobalObject::updateSectionPrefix(StringRef Prefix) { + auto MD = getMetadata(LLVMContext::MD_section_prefix); + StringRef ExistingPrefix; // Empty by default. + if (MD != nullptr) + ExistingPrefix = cast(MD->getOperand(1))->getString(); + + if (ExistingPrefix != Prefix) { + setSectionPrefix(Prefix); + return true; + } + return false; +} + std::optional GlobalObject::getSectionPrefix() const { if (MDNode *MD = getMetadata(LLVMContext::MD_section_prefix)) { [[maybe_unused]] StringRef MDName = diff --git a/llvm/lib/ProfileData/DataAccessProf.cpp b/llvm/lib/ProfileData/DataAccessProf.cpp index a1e686ba0036b..d16d0b6f852e5 100644 --- a/llvm/lib/ProfileData/DataAccessProf.cpp +++ b/llvm/lib/ProfileData/DataAccessProf.cpp @@ -1,6 +1,7 @@ #include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Errc.h" @@ -9,6 +10,9 @@ #include "llvm/Support/raw_ostream.h" namespace llvm { +cl::opt AnnotateStaticDataSectionPrefix( + "memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden, + cl::desc("If true, annotate the static data section prefix")); namespace memprof { // If `Map` has an entry keyed by `Str`, returns the entry iterator. Otherwise, diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp index ecb2f2dbc552b..789396ca20eac 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp @@ -41,6 +41,7 @@ using namespace llvm::memprof; #define DEBUG_TYPE "memprof" namespace llvm { +extern cl::opt AnnotateStaticDataSectionPrefix; extern cl::opt PGOWarnMissing; extern cl::opt NoPGOWarnMismatch; extern cl::opt NoPGOWarnMismatchComdatWeak; @@ -76,10 +77,6 @@ static cl::opt MinMatchedColdBytePercent( "memprof-matching-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes matched to hint allocation cold")); -static cl::opt AnnotateStaticDataSectionPrefix( - "memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden, - cl::desc("If true, annotate the static data section prefix")); - // Matching statistics STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); STATISTIC(NumOfMemProfMismatch, @@ -797,7 +794,11 @@ bool MemProfUsePass::annotateGlobalVariables( if (!AnnotateStaticDataSectionPrefix || M.globals().empty()) return false; + // The module flag helps codegen passes interpret empty section prefix: + // - 0 : empty section prefix is expected for each GV. + // - 1 : empty section prefix means the GV has unknown hotness. if (!DataAccessProf) { + M.addModuleFlag(Module::Warning, "HasDataAccessProf", 0U); M.getContext().diagnose(DiagnosticInfoPGOProfile( MemoryProfileFileName.data(), StringRef("Data access profiles not found in memprof. Ignore " @@ -806,6 +807,8 @@ bool MemProfUsePass::annotateGlobalVariables( return false; } + M.addModuleFlag(Module::Warning, "HasDataAccessProf", 1); + bool Changed = false; // Iterate all global variables in the module and annotate them based on // data access profiles. Note it's up to the linker to decide how to map input diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll index 63d37f2b3ccea..8a8148148d1b8 100644 --- a/llvm/test/CodeGen/X86/global-variable-partition.ll +++ b/llvm/test/CodeGen/X86/global-variable-partition.ll @@ -109,9 +109,9 @@ target triple = "x86_64-unknown-linux-gnu" ; section prefix. PGO counter analysis categorizes it as cold, so it will have ; section name `.data.unlikely`. ; COMMON: .type data3,@object -; SYM-NEXT: .section .data.unlikely.data3,"aw",@progbits -; UNIQ-NEXT: .section .data.unlikely.,"aw",@progbits,unique,8 -; AGG-NEXT: .section .data.unlikely.,"aw",@progbits +; SYM-NEXT: .section .data.data3,"aw",@progbits +; UNIQ-NEXT: .section .data,"aw",@progbits,unique,8 +; AGG-NEXT: .data ; For @data_with_unknown_hotness ; SYM: .type .Ldata_with_unknown_hotness,@object # @data_with_unknown_hotness @@ -119,8 +119,6 @@ target triple = "x86_64-unknown-linux-gnu" ; UNIQ: .section .data,"aw",@progbits,unique,9 ; The `.section` directive is omitted for .data with -unique-section-names=false. ; See MCSectionELF::shouldOmitSectionDirective for the implementation details. -; AGG: .data -; COMMON: .Ldata_with_unknown_hotness: ; For @hot_data_custom_bar_section ; It has an explicit section attribute 'var' and shouldn't have hot or unlikely suffix. @@ -130,20 +128,27 @@ target triple = "x86_64-unknown-linux-gnu" ; UNIQ: .section bar,"aw",@progbits ; AGG: .section bar,"aw",@progbits +; For @external_cold_bss +; COMMON: .type external_cold_bss,@object +; SYM-NEXT: .section .bss.external_cold_bss,"aw",@nobits +; UNIQ-NEXT: .section .bss,"aw",@nobits,unique, +; AVG-NEXT: .bss + @.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1 @.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1 @hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3] @hot_data = internal global i32 5 @hot_bss = internal global i32 0 @.str.2 = private unnamed_addr constant [14 x i8] c"cold%d\09%d\09%d\0A\00", align 1 -@cold_bss = internal global i32 0 -@cold_data = internal global i32 4 +@cold_bss = internal global i32 0, !section_prefix !18 +@cold_data = internal global i32 4, !section_prefix !18 @cold_data_custom_foo_section = internal global i32 100, section "foo" -@cold_relro_array = internal constant [2 x ptr] [ptr @data3, ptr @bss2] +@cold_relro_array = internal constant [2 x ptr] [ptr @data3, ptr @bss2], !section_prefix !18 @bss2 = internal global i32 0, !section_prefix !17 @data3 = internal global i32 3 @data_with_unknown_hotness = private global i32 5 @hot_data_custom_bar_section = internal global i32 101 #0 +@external_cold_bss = global i32 0 define void @cold_func(i32 %0) !prof !15 { %2 = load i32, ptr @cold_bss @@ -156,7 +161,8 @@ define void @cold_func(i32 %0) !prof !15 { %9 = load i32, ptr @data_with_unknown_hotness %11 = load i32, ptr @hot_data %12 = load i32, ptr @cold_data_custom_foo_section - %13 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.2, i32 %2, i32 %3, i32 %8, i32 %9, i32 %11, i32 %12) + %val = load i32, ptr @external_cold_bss + %13 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.2, i32 %2, i32 %3, i32 %8, i32 %9, i32 %11, i32 %12, i32 %val) ret void } @@ -216,8 +222,9 @@ declare i32 @func_taking_arbitrary_param(...) attributes #0 = {"data-section"="bar"} -!llvm.module.flags = !{!1} +!llvm.module.flags = !{!0, !1} +!0 = !{i32 2, !"HasDataAccessProf", i32 1} !1 = !{i32 1, !"ProfileSummary", !2} !2 = !{!3, !4, !5, !6, !7, !8, !9, !10} !3 = !{!"ProfileFormat", !"InstrProf"} @@ -235,3 +242,5 @@ attributes #0 = {"data-section"="bar"} !15 = !{!"function_entry_count", i64 1} !16 = !{!"branch_weights", i32 1, i32 99999} !17 = !{!"section_prefix", !"hot"} +!18 = !{!"section_prefix", !"unlikely"} + From bc8e67a42de1f2b22cdde0dd186d505d2c15d1ec Mon Sep 17 00:00:00 2001 From: mingmingl Date: Sun, 5 Oct 2025 23:35:48 -0700 Subject: [PATCH 5/6] simplify test --- .../X86/global-variable-partition-dap.ll | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/llvm/test/CodeGen/X86/global-variable-partition-dap.ll b/llvm/test/CodeGen/X86/global-variable-partition-dap.ll index f9cc8b501dfd1..f987162a293dc 100644 --- a/llvm/test/CodeGen/X86/global-variable-partition-dap.ll +++ b/llvm/test/CodeGen/X86/global-variable-partition-dap.ll @@ -141,36 +141,6 @@ define void @hot_func(i32 %0) !prof !14 { ret void } -define i32 @main(i32 %0, ptr %1) !prof !15 { - br label %11 - -5: ; preds = %11 - %6 = call i32 @rand() - store i32 %6, ptr @cold_bss - store i32 %6, ptr @cold_data - store i32 %6, ptr @bss2 - store i32 %6, ptr @data3 - call void @cold_func(i32 %6) - ret i32 0 - -11: ; preds = %11, %2 - %12 = phi i32 [ 0, %2 ], [ %19, %11 ] - %13 = call i32 @rand() - %14 = srem i32 %13, 2 - %15 = sext i32 %14 to i64 - %16 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %15 - %17 = load ptr, ptr %16 - store i32 %13, ptr %17 - store i32 %13, ptr @external_hot_data - %18 = add i32 %13, 1 - store i32 %18, ptr @hot_bss - call void @hot_func(i32 %12) - %19 = add i32 %12, 1 - %20 = icmp eq i32 %19, 100000 - br i1 %20, label %5, label %11, !prof !16 -} - -declare i32 @rand() declare i32 @func_taking_arbitrary_param(...) attributes #0 = {"data-section"="bar"} From 4ead381c76adfef914463426d72a6e318e833352 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Mon, 6 Oct 2025 23:45:27 -0700 Subject: [PATCH 6/6] simplify test. The next step is to add debug log --- .../llvm/Analysis/StaticDataProfileInfo.h | 2 + llvm/lib/Analysis/StaticDataProfileInfo.cpp | 7 +- llvm/lib/CodeGen/StaticDataSplitter.cpp | 3 +- .../X86/global-variable-partition-dap.ll | 146 +++++++----------- 4 files changed, 63 insertions(+), 95 deletions(-) diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h index fbbc13a148ffa..5bd2ae18fefe9 100644 --- a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h +++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h @@ -10,6 +10,8 @@ namespace llvm { +bool IsReservedGlobalVariable(const GlobalVariable &GV); + /// A class that holds the constants that represent static data and their /// profile information and provides methods to operate on them. class StaticDataProfileInfo { diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp index 700f57000383a..7f42d079e1c9e 100644 --- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp +++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp @@ -14,6 +14,10 @@ using namespace llvm; extern cl::opt AnnotateStaticDataSectionPrefix; +bool llvm::IsReservedGlobalVariable(const GlobalVariable &GV) { + return GV.getName().starts_with("llvm."); +} + void StaticDataProfileInfo::addConstantProfileCount( const Constant *C, std::optional Count) { if (!Count) { @@ -87,7 +91,8 @@ StringRef StaticDataProfileInfo::getConstantSectionPrefix( // Module flag `HasDataAccessProf` is 1 -> empty section prefix means // unknown hotness except for string literals. if (const GlobalVariable *GV = dyn_cast(C); - GV && !GV->getName().starts_with(".str")) { + GV && !IsReservedGlobalVariable(*GV) && + !GV->getName().starts_with(".str")) { auto HotnessFromDAP = getSectionHotnessUsingDAP(GV->getSectionPrefix()); if (!Count) { diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index e22dc2507d548..97aa833cf14ff 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -133,8 +133,7 @@ StaticDataSplitter::getConstant(const MachineOperand &Op, // Skip 'llvm.'-prefixed global variables conservatively because they are // often handled specially, and skip those not in static data // sections. - if (!GV || GV->getName().starts_with("llvm.") || - !inStaticDataSection(*GV, TM)) + if (!GV || llvm::IsReservedGlobalVariable(*GV) || !inStaticDataSection(*GV, TM)) return nullptr; return GV; } diff --git a/llvm/test/CodeGen/X86/global-variable-partition-dap.ll b/llvm/test/CodeGen/X86/global-variable-partition-dap.ll index f987162a293dc..22788a2bdc18c 100644 --- a/llvm/test/CodeGen/X86/global-variable-partition-dap.ll +++ b/llvm/test/CodeGen/X86/global-variable-partition-dap.ll @@ -2,7 +2,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80: target triple = "x86_64-unknown-linux-gnu" ;; Comments for RUN command options -; 1. `-relocation-model=pic` -> `hot_relro_array` and `cold_relro_array` are +; 1. `-relocation-model=pic` -> `relro_var` is ; placed in the .data.rel.ro-prefixed section. ; 2. `-data-sections=true -unique-section-names=false` -> data sections are ; uniqufied by variable names. @@ -10,120 +10,83 @@ target triple = "x86_64-unknown-linux-gnu" ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \ ; RUN: -partition-static-data-sections=true \ ; RUN: -data-sections=true -unique-section-names=false \ -; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=UNIQ,COMMON --dump-input=always +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always ; For @.str and @.str.1 -; COMMON: .type .L.str,@object -; UNIQ-NEXT: .section .rodata.str1.1.hot.,"aMS",@progbits,1 -; COMMON-NEXT: .L.str: -; COMMON-NEXT: "hot\t" -; COMMON: .L.str.1: -; COMMON-NEXT: "%d\t%d\t%d\n" +; CHECK: .type .L.str,@object +; CHECK-NEXT: .section .rodata.str1.1.hot.,"aMS",@progbits,1 +; CHECK-NEXT: .L.str: +; CHECK-NEXT: "1234" +; CHECK: .type .str.1,@object +; CHECK: .str.1: +; CHECK-NEXT: "abcde" -; For @hot_relro_array -; COMMON: .type hot_relro_array,@object -; UNIQ-NEXT: .section .data.rel.ro.hot.,"aw",@progbits,unique,1 +; For @.str.2 +; CHECK: .type .str.2,@object +; CHECK-NEXT: .section .rodata.str1.1,"aMS",@progbits +; CHECK-NEXT: .globl .str.2 +; CHECK-NEXT: .str.2: +; CHECK-NEXT: "beef" +@.str = private unnamed_addr constant [5 x i8] c"1234\00", align 1 +@.str.1 = internal unnamed_addr constant [6 x i8] c"abcde\00" +@.str.2 = unnamed_addr constant [5 x i8] c"beef\00", align 1 -; For @external_hot_data, which is accessed by {cold_func, unprofiled_func, hot_func}. -; COMMON: .type external_hot_data,@object -; UNIQ-NEXT: .section .data.hot.,"aw",@progbits,unique,2 +; CHECK: .type relro_var,@object +; CHECK-NEXT: .section .data.rel.ro,"aw",@progbits,unique,1 -; For @hot_bss, which is accessed by {unprofiled_func, hot_func}. -; COMMON: .type hot_bss,@object -; UNIQ-NEXT: .section .bss.hot.,"aw",@nobits,unique,3 +; CHECK: .type external_hot_data,@object +; CHECK-NEXT: .section .data.hot.,"aw",@progbits,unique,2 -; For @.str.2 -; COMMON: .type .L.str.2,@object -; UNIQ-NEXT: .section .rodata.str1.1.unlikely.,"aMS",@progbits,1 -; COMMON-NEXT: .L.str.2: -; COMMON-NEXT: "cold%d\t%d\t%d\n" - -; For @cold_bss -; COMMON: .type cold_bss,@object -; UNIQ-NEXT: .section .bss.unlikely.,"aw",@nobits,unique,4 - -; For @cold_data -; COMMON: .type cold_data,@object -; UNIQ-NEXT: .section .data.unlikely.,"aw",@progbits,unique,5 - -; For @cold_data_custom_foo_section -; It has an explicit section 'foo' and shouldn't have hot or unlikely suffix. -; COMMON: .type cold_data_custom_foo_section,@object -; UNIQ-NEXT: .section foo,"aw",@progbits - -; For @cold_relro_array -; COMMON: .type cold_relro_array,@object -; UNIQ-NEXT: .section .data.rel.ro.unlikely.,"aw",@progbits,unique,6 - - -; @bss2 and @data3 are indirectly accessed via @hot_relro_array and -; @cold_relro_array, and actually hot due to accesses via @hot_relro_array. -; Under the hood, the static data splitter pass analyzes accesses from code but -; won't aggressively propgate the hotness of @hot_relro_array into the array -; elements -- instead, this pass reconciles the hotness information from both -; global variable section prefix and PGO counters. - -; @bss2 has a section prefix 'hot' in the IR. StaticDataProfileInfo reconciles -; it into a hot prefix. -; COMMON: .type bss2,@object -; UNIQ-NEXT: .section .bss.hot.,"aw",@nobits,unique,7 - -; @data3 doesn't have data access profile coverage and thereby doesn't have a -; section prefix. PGO counter analysis categorizes it as cold, so it will have -; section name `.data.unlikely`. -; COMMON: .type data3,@object -; UNIQ-NEXT: .section .data,"aw",@progbits,unique,8 - -; For @data_with_unknown_hotness -; COMMON: .type .Ldata_with_unknown_hotness,@object # @data_with_unknown_hotness -; UNIQ: .section .data,"aw",@progbits,unique,9 - -; For @hot_data_custom_bar_section -; It has an explicit section attribute 'var' and shouldn't have hot or unlikely suffix. -; COMMON: .type hot_data_custom_bar_section,@object -; UNIQ: .section bar,"aw",@progbits - -; For @external_cold_bss -; COMMON: .type external_cold_bss,@object -; UNIQ-NEXT: .section .bss,"aw",@nobits,unique, - -@.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1 -@.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1 -@hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3] +; CHECK: .type hot_bss,@object +; CHECK-NEXT: .section .bss.hot.,"aw",@nobits,unique,3 + +@relro_var = constant [2 x ptr] [ptr @bss2, ptr @data3] @external_hot_data = global i32 5, !section_prefix !17 @hot_bss = internal global i32 0 -@.str.2 = private unnamed_addr constant [14 x i8] c"cold%d\09%d\09%d\0A\00", align 1 + +;; Both section prefix and PGO counters indicate @cold_bss and @cold_data are +;; rarely accesed. +; CHECK: .type cold_bss,@object +; CHECK-NEXT: .section .bss.unlikely.,"aw",@nobits,unique,4 +; CHECK: .type cold_data,@object +; CHECK-NEXT: .section .data.unlikely.,"aw",@progbits,unique,5 @cold_bss = internal global i32 0, !section_prefix !18 @cold_data = internal global i32 4, !section_prefix !18 -@cold_data_custom_foo_section = internal global i32 100, section "foo" -@cold_relro_array = internal constant [2 x ptr] [ptr @data3, ptr @bss2], !section_prefix !18 + +;; @bss2 has a section prefix 'hot' in the IR. StaticDataProfileInfo reconciles +;; it into a hot prefix. +; CHECK: .type bss2,@object +; CHECK-NEXT: .section .bss.hot.,"aw",@nobits,unique,6 @bss2 = internal global i32 0, !section_prefix !17 + +;; Since `HasDataAccessProf` is true, data without a section prefix is +;; conservatively categorized as unknown (e.g., from incremental source code) +;; rather than cold. +; CHECK: .type data3,@object +; CHECK-NEXT: .section .data,"aw",@progbits,unique,7 @data3 = internal global i32 3 -@data_with_unknown_hotness = private global i32 5 + +;; These sections have custom names, so they won't be labeled as .hot or .unlikely. +; CHECK: .type hot_data_custom_bar_section,@object +; CHECK-NEXT: .section bar,"aw" +; CHECK: .type cold_data_custom_foo_section,@object +; CHECK-NEXT: .section foo,"aw" @hot_data_custom_bar_section = internal global i32 101 #0 -@external_cold_bss = global i32 0 +@cold_data_custom_foo_section = internal global i32 100, section "foo" define void @cold_func(i32 %0) !prof !15 { %2 = load i32, ptr @cold_bss %3 = load i32, ptr @cold_data - %4 = srem i32 %0, 2 - %5 = sext i32 %4 to i64 - %6 = getelementptr inbounds [2 x ptr], ptr @cold_relro_array, i64 0, i64 %5 - %7 = load ptr, ptr %6 - %8 = load i32, ptr %7 - %9 = load i32, ptr @data_with_unknown_hotness %11 = load i32, ptr @external_hot_data %12 = load i32, ptr @cold_data_custom_foo_section - %val = load i32, ptr @external_cold_bss - %13 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.2, i32 %2, i32 %3, i32 %8, i32 %9, i32 %11, i32 %12, i32 %val) + %13 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.2, i32 %2, i32 %3, i32 %11, i32 %12) ret void } define i32 @unprofiled_func() { - %a = load i32, ptr @data_with_unknown_hotness %b = load i32, ptr @external_hot_data %c = load i32, ptr @hot_bss - %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b, i32 %c) + %ret = call i32 (...) @func_taking_arbitrary_param(i32 %b, i32 %c) ret i32 %ret } @@ -131,7 +94,7 @@ define void @hot_func(i32 %0) !prof !14 { %2 = call i32 (...) @func_taking_arbitrary_param(ptr @.str) %3 = srem i32 %0, 2 %4 = sext i32 %3 to i64 - %5 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %4 + %5 = getelementptr inbounds [2 x ptr], ptr @relro_var, i64 0, i64 %4 %6 = load ptr, ptr %5 %7 = load i32, ptr %6 %8 = load i32, ptr @external_hot_data @@ -166,4 +129,3 @@ attributes #0 = {"data-section"="bar"} !16 = !{!"branch_weights", i32 1, i32 99999} !17 = !{!"section_prefix", !"hot"} !18 = !{!"section_prefix", !"unlikely"} -