diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h index fa21eba1377df..5bd2ae18fefe9 100644 --- a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h +++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h @@ -10,12 +10,16 @@ namespace llvm { +bool IsReservedGlobalVariable(const GlobalVariable &GV); + /// A class that holds the constants that represent static data and their /// profile information and provides methods to operate on them. class StaticDataProfileInfo { public: - /// Accummulate the profile count of a constant that will be lowered to static - /// data sections. + /// A constant and its profile count. + /// A constant is tracked if both conditions are met: + /// 1) It has local (i.e., private or internal) linkage. + // 2) Its data kind is one of {.rodata, .data, .bss, .data.rel.ro}. DenseMap ConstantProfileCounts; /// Keeps track of the constants that are seen at least once without profile @@ -26,7 +30,24 @@ class StaticDataProfileInfo { LLVM_ABI std::optional getConstantProfileCount(const Constant *C) const; + enum class StaticDataHotness : uint8_t { + Cold = 0, + LukewarmOrUnknown = 1, + Hot = 2, + }; + + LLVM_ABI StaticDataHotness getSectionHotnessUsingProfileCount( + const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const; + LLVM_ABI StaticDataHotness + getSectionHotnessUsingDAP(std::optional SectionPrefix) const; + + LLVM_ABI StringRef hotnessToStr(StaticDataHotness Hotness) const; + + bool HasDataAccessProf = false; + public: + StaticDataProfileInfo(bool HasDataAccessProf) + : HasDataAccessProf(HasDataAccessProf) {} StaticDataProfileInfo() = default; /// If \p Count is not nullopt, add it to the profile count of the constant \p @@ -36,14 +57,10 @@ class StaticDataProfileInfo { LLVM_ABI void addConstantProfileCount(const Constant *C, std::optional Count); - /// Return a section prefix for the constant \p C based on its profile count. - /// - If a constant doesn't have a counter, return an empty string. - /// - Otherwise, - /// - If it has a hot count, return "hot". - /// - If it is seen by unprofiled function, return an empty string. - /// - If it has a cold count, return "unlikely". - /// - Otherwise (e.g. it's used by lukewarm functions), return an empty - /// string. + /// Given a constant \p C, returns a section prefix. + /// If \p C is a global variable, the section prefix is the bigger one + /// between its existing section prefix and its use profile count. Otherwise, + /// the section prefix is based on its use profile count. LLVM_ABI StringRef getConstantSectionPrefix( const Constant *C, const ProfileSummaryInfo *PSI) const; }; diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp index b036b2dde770e..7f42d079e1c9e 100644 --- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp +++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp @@ -1,11 +1,23 @@ #include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/ProfileData/InstrProf.h" +#define DEBUG_TYPE "static-data-profile-info" + using namespace llvm; + +extern cl::opt AnnotateStaticDataSectionPrefix; + +bool llvm::IsReservedGlobalVariable(const GlobalVariable &GV) { + return GV.getName().starts_with("llvm."); +} + void StaticDataProfileInfo::addConstantProfileCount( const Constant *C, std::optional Count) { if (!Count) { @@ -28,29 +40,84 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const { return I->second; } -StringRef StaticDataProfileInfo::getConstantSectionPrefix( - const Constant *C, const ProfileSummaryInfo *PSI) const { - auto Count = getConstantProfileCount(C); - if (!Count) - return ""; +StaticDataProfileInfo::StaticDataHotness +StaticDataProfileInfo::getSectionHotnessUsingProfileCount( + const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const { // The accummulated counter shows the constant is hot. Return 'hot' whether // this variable is seen by unprofiled functions or not. - if (PSI->isHotCount(*Count)) - return "hot"; + if (PSI->isHotCount(Count)) + return StaticDataHotness::Hot; // The constant is not hot, and seen by unprofiled functions. We don't want to // assign it to unlikely sections, even if the counter says 'cold'. So return // an empty prefix before checking whether the counter is cold. if (ConstantWithoutCounts.count(C)) - return ""; + return StaticDataHotness::LukewarmOrUnknown; // The accummulated counter shows the constant is cold. Return 'unlikely'. - if (PSI->isColdCount(*Count)) + if (PSI->isColdCount(Count)) + return StaticDataHotness::Cold; + + return StaticDataHotness::LukewarmOrUnknown; +} + +StringRef StaticDataProfileInfo::hotnessToStr( + StaticDataProfileInfo::StaticDataHotness Hotness) const { + switch (Hotness) { + case StaticDataProfileInfo::StaticDataHotness::Cold: return "unlikely"; - // The counter says lukewarm. Return an empty prefix. - return ""; + case StaticDataProfileInfo::StaticDataHotness::Hot: + return "hot"; + default: + return ""; + } +} + +StaticDataProfileInfo::StaticDataHotness +StaticDataProfileInfo::getSectionHotnessUsingDAP( + std::optional MaybeSectionPrefix) const { + if (!MaybeSectionPrefix) + return StaticDataProfileInfo::StaticDataHotness::LukewarmOrUnknown; + StringRef Prefix = *MaybeSectionPrefix; + assert((Prefix == "hot" || Prefix == "unlikely") && + "Expect section_prefix to be one of hot or unlikely"); + return Prefix == "hot" ? StaticDataProfileInfo::StaticDataHotness::Hot + : StaticDataProfileInfo::StaticDataHotness::Cold; +} + +StringRef StaticDataProfileInfo::getConstantSectionPrefix( + const Constant *C, const ProfileSummaryInfo *PSI) const { + std::optional Count = getConstantProfileCount(C); + + if (HasDataAccessProf) { + // Module flag `HasDataAccessProf` is 1 -> empty section prefix means + // unknown hotness except for string literals. + if (const GlobalVariable *GV = dyn_cast(C); + GV && !IsReservedGlobalVariable(*GV) && + !GV->getName().starts_with(".str")) { + auto HotnessFromDAP = getSectionHotnessUsingDAP(GV->getSectionPrefix()); + + if (!Count) { + // Use data access profiles to infer hotness when the profile counter + // isn't computed. + return hotnessToStr(HotnessFromDAP); + } + + // Both DAP and PGO counters are available. Use the hotter one. + auto HotnessFromPGO = getSectionHotnessUsingProfileCount(C, PSI, *Count); + return hotnessToStr(std::max(HotnessFromDAP, HotnessFromPGO)); + } + } + + if (!Count) + return ""; + return hotnessToStr(getSectionHotnessUsingProfileCount(C, PSI, *Count)); } bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) { - Info.reset(new StaticDataProfileInfo()); + bool HasDataAccessProf = false; + if (auto *MD = mdconst::extract_or_null( + M.getModuleFlag("HasDataAccessProf"))) + HasDataAccessProf = MD->getZExtValue(); + Info.reset(new StaticDataProfileInfo(HasDataAccessProf)); return false; } diff --git a/llvm/lib/CodeGen/StaticDataAnnotator.cpp b/llvm/lib/CodeGen/StaticDataAnnotator.cpp index 53a9ab4dbda02..010354b4c4b56 100644 --- a/llvm/lib/CodeGen/StaticDataAnnotator.cpp +++ b/llvm/lib/CodeGen/StaticDataAnnotator.cpp @@ -31,6 +31,8 @@ #include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Analysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" @@ -78,19 +80,7 @@ bool StaticDataAnnotator::runOnModule(Module &M) { if (GV.isDeclarationForLinker()) continue; - // The implementation below assumes prior passes don't set section prefixes, - // and specifically do 'assign' rather than 'update'. So report error if a - // section prefix is already set. - if (auto maybeSectionPrefix = GV.getSectionPrefix(); - maybeSectionPrefix && !maybeSectionPrefix->empty()) - llvm::report_fatal_error("Global variable " + GV.getName() + - " already has a section prefix " + - *maybeSectionPrefix); - StringRef SectionPrefix = SDPI->getConstantSectionPrefix(&GV, PSI); - if (SectionPrefix.empty()) - continue; - Changed |= GV.setSectionPrefix(SectionPrefix); } diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index e22dc2507d548..97aa833cf14ff 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -133,8 +133,7 @@ StaticDataSplitter::getConstant(const MachineOperand &Op, // Skip 'llvm.'-prefixed global variables conservatively because they are // often handled specially, and skip those not in static data // sections. - if (!GV || GV->getName().starts_with("llvm.") || - !inStaticDataSection(*GV, TM)) + if (!GV || llvm::IsReservedGlobalVariable(*GV) || !inStaticDataSection(*GV, TM)) return nullptr; return GV; } diff --git a/llvm/lib/ProfileData/DataAccessProf.cpp b/llvm/lib/ProfileData/DataAccessProf.cpp index a1e686ba0036b..d16d0b6f852e5 100644 --- a/llvm/lib/ProfileData/DataAccessProf.cpp +++ b/llvm/lib/ProfileData/DataAccessProf.cpp @@ -1,6 +1,7 @@ #include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Errc.h" @@ -9,6 +10,9 @@ #include "llvm/Support/raw_ostream.h" namespace llvm { +cl::opt AnnotateStaticDataSectionPrefix( + "memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden, + cl::desc("If true, annotate the static data section prefix")); namespace memprof { // If `Map` has an entry keyed by `Str`, returns the entry iterator. Otherwise, diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp index c86092bd51eda..5d36df4f85ad6 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp @@ -41,6 +41,7 @@ using namespace llvm::memprof; #define DEBUG_TYPE "memprof" namespace llvm { +extern cl::opt AnnotateStaticDataSectionPrefix; extern cl::opt PGOWarnMissing; extern cl::opt NoPGOWarnMismatch; extern cl::opt NoPGOWarnMismatchComdatWeak; @@ -76,10 +77,6 @@ static cl::opt MinMatchedColdBytePercent( "memprof-matching-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes matched to hint allocation cold")); -static cl::opt AnnotateStaticDataSectionPrefix( - "memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden, - cl::desc("If true, annotate the static data section prefix")); - // Matching statistics STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); STATISTIC(NumOfMemProfMismatch, @@ -797,7 +794,11 @@ bool MemProfUsePass::annotateGlobalVariables( if (!AnnotateStaticDataSectionPrefix || M.globals().empty()) return false; + // The module flag helps codegen passes interpret empty section prefix: + // - 0 : empty section prefix is expected for each GV. + // - 1 : empty section prefix means the GV has unknown hotness. if (!DataAccessProf) { + M.addModuleFlag(Module::Warning, "HasDataAccessProf", 0U); M.getContext().diagnose(DiagnosticInfoPGOProfile( MemoryProfileFileName.data(), StringRef("Data access profiles not found in memprof. Ignore " @@ -806,6 +807,8 @@ bool MemProfUsePass::annotateGlobalVariables( return false; } + M.addModuleFlag(Module::Warning, "HasDataAccessProf", 1); + bool Changed = false; // Iterate all global variables in the module and annotate them based on // data access profiles. Note it's up to the linker to decide how to map input diff --git a/llvm/test/CodeGen/X86/global-variable-partition-dap.ll b/llvm/test/CodeGen/X86/global-variable-partition-dap.ll new file mode 100644 index 0000000000000..22788a2bdc18c --- /dev/null +++ b/llvm/test/CodeGen/X86/global-variable-partition-dap.ll @@ -0,0 +1,131 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;; Comments for RUN command options +; 1. `-relocation-model=pic` -> `relro_var` is +; placed in the .data.rel.ro-prefixed section. +; 2. `-data-sections=true -unique-section-names=false` -> data sections are +; uniqufied by variable names. +; +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \ +; RUN: -partition-static-data-sections=true \ +; RUN: -data-sections=true -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +; For @.str and @.str.1 +; CHECK: .type .L.str,@object +; CHECK-NEXT: .section .rodata.str1.1.hot.,"aMS",@progbits,1 +; CHECK-NEXT: .L.str: +; CHECK-NEXT: "1234" +; CHECK: .type .str.1,@object +; CHECK: .str.1: +; CHECK-NEXT: "abcde" + +; For @.str.2 +; CHECK: .type .str.2,@object +; CHECK-NEXT: .section .rodata.str1.1,"aMS",@progbits +; CHECK-NEXT: .globl .str.2 +; CHECK-NEXT: .str.2: +; CHECK-NEXT: "beef" +@.str = private unnamed_addr constant [5 x i8] c"1234\00", align 1 +@.str.1 = internal unnamed_addr constant [6 x i8] c"abcde\00" +@.str.2 = unnamed_addr constant [5 x i8] c"beef\00", align 1 + +; CHECK: .type relro_var,@object +; CHECK-NEXT: .section .data.rel.ro,"aw",@progbits,unique,1 + +; CHECK: .type external_hot_data,@object +; CHECK-NEXT: .section .data.hot.,"aw",@progbits,unique,2 + +; CHECK: .type hot_bss,@object +; CHECK-NEXT: .section .bss.hot.,"aw",@nobits,unique,3 + +@relro_var = constant [2 x ptr] [ptr @bss2, ptr @data3] +@external_hot_data = global i32 5, !section_prefix !17 +@hot_bss = internal global i32 0 + +;; Both section prefix and PGO counters indicate @cold_bss and @cold_data are +;; rarely accesed. +; CHECK: .type cold_bss,@object +; CHECK-NEXT: .section .bss.unlikely.,"aw",@nobits,unique,4 +; CHECK: .type cold_data,@object +; CHECK-NEXT: .section .data.unlikely.,"aw",@progbits,unique,5 +@cold_bss = internal global i32 0, !section_prefix !18 +@cold_data = internal global i32 4, !section_prefix !18 + +;; @bss2 has a section prefix 'hot' in the IR. StaticDataProfileInfo reconciles +;; it into a hot prefix. +; CHECK: .type bss2,@object +; CHECK-NEXT: .section .bss.hot.,"aw",@nobits,unique,6 +@bss2 = internal global i32 0, !section_prefix !17 + +;; Since `HasDataAccessProf` is true, data without a section prefix is +;; conservatively categorized as unknown (e.g., from incremental source code) +;; rather than cold. +; CHECK: .type data3,@object +; CHECK-NEXT: .section .data,"aw",@progbits,unique,7 +@data3 = internal global i32 3 + +;; These sections have custom names, so they won't be labeled as .hot or .unlikely. +; CHECK: .type hot_data_custom_bar_section,@object +; CHECK-NEXT: .section bar,"aw" +; CHECK: .type cold_data_custom_foo_section,@object +; CHECK-NEXT: .section foo,"aw" +@hot_data_custom_bar_section = internal global i32 101 #0 +@cold_data_custom_foo_section = internal global i32 100, section "foo" + +define void @cold_func(i32 %0) !prof !15 { + %2 = load i32, ptr @cold_bss + %3 = load i32, ptr @cold_data + %11 = load i32, ptr @external_hot_data + %12 = load i32, ptr @cold_data_custom_foo_section + %13 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.2, i32 %2, i32 %3, i32 %11, i32 %12) + ret void +} + +define i32 @unprofiled_func() { + %b = load i32, ptr @external_hot_data + %c = load i32, ptr @hot_bss + %ret = call i32 (...) @func_taking_arbitrary_param(i32 %b, i32 %c) + ret i32 %ret +} + +define void @hot_func(i32 %0) !prof !14 { + %2 = call i32 (...) @func_taking_arbitrary_param(ptr @.str) + %3 = srem i32 %0, 2 + %4 = sext i32 %3 to i64 + %5 = getelementptr inbounds [2 x ptr], ptr @relro_var, i64 0, i64 %4 + %6 = load ptr, ptr %5 + %7 = load i32, ptr %6 + %8 = load i32, ptr @external_hot_data + %9 = load i32, ptr @hot_bss + %10 = load i32, ptr @hot_data_custom_bar_section + %11 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.1, i32 %7, i32 %8, i32 %9, i32 %10) + ret void +} + +declare i32 @func_taking_arbitrary_param(...) + +attributes #0 = {"data-section"="bar"} + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 2, !"HasDataAccessProf", i32 1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 1460183} +!5 = !{!"MaxCount", i64 849024} +!6 = !{!"MaxInternalCount", i64 32769} +!7 = !{!"MaxFunctionCount", i64 849024} +!8 = !{!"NumCounts", i64 23627} +!9 = !{!"NumFunctions", i64 3271} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13} +!12 = !{i32 990000, i64 166, i32 73} +!13 = !{i32 999999, i64 3, i32 1443} +!14 = !{!"function_entry_count", i64 100000} +!15 = !{!"function_entry_count", i64 1} +!16 = !{!"branch_weights", i32 1, i32 99999} +!17 = !{!"section_prefix", !"hot"} +!18 = !{!"section_prefix", !"unlikely"}