diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 82dd5feb31dba..42771fbe18d60 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -42,18 +42,40 @@ struct BBClusterInfo { unsigned PositionInCluster; }; +// This represents the cfg profile data for a function. +struct CfgProfile { + // Node counts for each basic block. + DenseMap NodeCounts; + // Edge counts for each edge, stored as a nested map. + DenseMap> EdgeCounts; + + // Returns the profile count for the given basic block or zero if it does not + // exist. + uint64_t getNodeCount(const UniqueBBID &BBID) const { + return NodeCounts.lookup(BBID); + } + + // Returns the profile count for the edge from `SrcBBID` to `SinkBBID` or + // zero if it does not exist. + uint64_t getEdgeCount(const UniqueBBID &SrcBBID, + const UniqueBBID &SinkBBID) const { + auto It = EdgeCounts.find(SrcBBID); + if (It == EdgeCounts.end()) + return 0; + return It->second.lookup(SinkBBID); + } +}; + // This represents the raw input profile for one function. -struct FunctionPathAndClusterInfo { +struct FunctionProfile { // BB Cluster information specified by `UniqueBBID`s. SmallVector ClusterInfo; // Paths to clone. A path a -> b -> c -> d implies cloning b, c, and d along // the edge a -> b (a is not cloned). The index of the path in this vector // determines the `UniqueBBID::CloneID` of the cloned blocks in that path. SmallVector> ClonePaths; - // Node counts for each basic block. - DenseMap NodeCounts; - // Edge counts for each edge, stored as a nested map. - DenseMap> EdgeCounts; + // Cfg profile data (block and edge frequencies). + CfgProfile Cfg; }; class BasicBlockSectionsProfileReader { @@ -81,10 +103,14 @@ class BasicBlockSectionsProfileReader { SmallVector> getClonePathsForFunction(StringRef FuncName) const; - // Returns the profile count for the edge from `SrcBBID` to `SinkBBID` in - // function `FuncName` or zero if it does not exist. - uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, - const UniqueBBID &SinkBBID) const; + // Returns a pointer to the CfgProfile for the given function. + // Returns nullptr if no profile data is available for the function. + const CfgProfile *getFunctionCfgProfile(StringRef FuncName) const { + auto It = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); + if (It == ProgramPathAndClusterInfo.end()) + return nullptr; + return &It->second.Cfg; + } private: StringRef getAliasName(StringRef FuncName) const { @@ -132,7 +158,7 @@ class BasicBlockSectionsProfileReader { // for (all or some of) its basic blocks. The cluster information for every // basic block includes its cluster ID along with the position of the basic // block in that cluster. - StringMap ProgramPathAndClusterInfo; + StringMap ProgramPathAndClusterInfo; // Some functions have alias names. We use this map to find the main alias // name which appears in ProgramPathAndClusterInfo as a key. @@ -192,8 +218,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { SmallVector> getClonePathsForFunction(StringRef FuncName) const; - uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, - const UniqueBBID &DestBBID) const; + const CfgProfile *getFunctionCfgProfile(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and // then reads the profile for the matching functions. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 05fffe9d97e98..0d748217a1ed9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -37,6 +37,7 @@ #include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" @@ -164,6 +165,12 @@ static cl::bits PgoAnalysisMapFeatures( "Enable extended information within the SHT_LLVM_BB_ADDR_MAP that is " "extracted from PGO related analysis.")); +static cl::opt PgoAnalysisMapUsePropellerCfg( + "pgo-analysis-map-use-propeller-cfg", + cl::desc( + "If available, use the Propeller cfg profile in the PGO analysis map."), + cl::Hidden, cl::init(false)); + static cl::opt BBAddrMapSkipEmitBBEntries( "basic-block-address-map-skip-bb-entries", cl::desc("Skip emitting basic block entries in the SHT_LLVM_BB_ADDR_MAP " @@ -473,6 +480,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addUsedIfAvailable(); } bool AsmPrinter::doInitialization(Module &M) { @@ -1532,12 +1540,16 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { assert(BBAddrMapVersion >= 2 && "PGOAnalysisMap only supports version 2 or later"); - if (Features.FuncEntryCount) { - OutStreamer->AddComment("function entry count"); - auto MaybeEntryCount = MF.getFunction().getEntryCount(); - OutStreamer->emitULEB128IntValue( - MaybeEntryCount ? MaybeEntryCount->getCount() : 0); + // We will emit the BBSPR profile data if requested and availale. Otherwise, + // we fall back to MBFI and MBPI. + const CfgProfile *FuncCfgProfile = nullptr; + if (PgoAnalysisMapUsePropellerCfg) { + if (auto *BBSPR = getAnalysisIfAvailable< + BasicBlockSectionsProfileReaderWrapperPass>()) + FuncCfgProfile = + BBSPR->getFunctionCfgProfile(MF.getFunction().getName()); } + const MachineBlockFrequencyInfo *MBFI = Features.BBFreq ? &getAnalysis().getBFI() @@ -1547,23 +1559,43 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { ? &getAnalysis().getMBPI() : nullptr; + if (Features.FuncEntryCount) { + OutStreamer->AddComment("function entry count"); + uint64_t EntryCount = 0; + if (FuncCfgProfile) { + EntryCount = FuncCfgProfile->getNodeCount(*MF.front().getBBID()); + } else { + auto MaybeEntryCount = MF.getFunction().getEntryCount(); + EntryCount = MaybeEntryCount ? MaybeEntryCount->getCount() : 0; + } + OutStreamer->emitULEB128IntValue(EntryCount); + } + if (Features.BBFreq || Features.BrProb) { for (const MachineBasicBlock &MBB : MF) { + if (Features.BBFreq) { OutStreamer->AddComment("basic block frequency"); - OutStreamer->emitULEB128IntValue( - MBFI->getBlockFreq(&MBB).getFrequency()); + uint64_t BlockFrequency = + FuncCfgProfile ? FuncCfgProfile->getNodeCount(*MBB.getBBID()) + : MBFI->getBlockFreq(&MBB).getFrequency(); + OutStreamer->emitULEB128IntValue(BlockFrequency); } if (Features.BrProb) { - unsigned SuccCount = MBB.succ_size(); OutStreamer->AddComment("basic block successor count"); - OutStreamer->emitULEB128IntValue(SuccCount); + OutStreamer->emitULEB128IntValue(MBB.succ_size()); for (const MachineBasicBlock *SuccMBB : MBB.successors()) { OutStreamer->AddComment("successor BB ID"); OutStreamer->emitULEB128IntValue(SuccMBB->getBBID()->BaseID); OutStreamer->AddComment("successor branch probability"); - OutStreamer->emitULEB128IntValue( - MBPI->getEdgeProbability(&MBB, SuccMBB).getNumerator()); + // For MPBI, we emit the numerator of the probability. For BBSPR, we + // emit the raw edge count. + uint64_t EdgeFrequency = + FuncCfgProfile + ? FuncCfgProfile->getEdgeCount(*MBB.getBBID(), + *SuccMBB->getBBID()) + : MBPI->getEdgeProbability(&MBB, SuccMBB).getNumerator(); + OutStreamer->emitULEB128IntValue(EdgeFrequency); } } } diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index fbcd614b85d18..adab0e8956268 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -76,21 +76,6 @@ BasicBlockSectionsProfileReader::getClonePathsForFunction( return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths; } -uint64_t BasicBlockSectionsProfileReader::getEdgeCount( - StringRef FuncName, const UniqueBBID &SrcBBID, - const UniqueBBID &SinkBBID) const { - auto It = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); - if (It == ProgramPathAndClusterInfo.end()) - return 0; - auto NodeIt = It->second.EdgeCounts.find(SrcBBID); - if (NodeIt == It->second.EdgeCounts.end()) - return 0; - auto EdgeIt = NodeIt->second.find(SinkBBID); - if (EdgeIt == NodeIt->second.end()) - return 0; - return EdgeIt->second; -} - // Reads the version 1 basic block sections profile. Profile for each function // is encoded as follows: // m @@ -115,7 +100,7 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount( // the edge 1->3. Within the given clusters, each cloned block is identified by // ".". For instance, 3.1 represents the first // clone of block 3. Original blocks are specified just with their block ids. A -// block cloned multiple times appears with distinct clone ids. The CFG for bar +// block cloned multiple times appears with distinct clone ids. The Cfg for bar // is shown below before and after cloning with its final clusters labeled. // // f main @@ -255,12 +240,12 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { } continue; } - case 'g': { // CFG profile specifier. + case 'g': { // Cfg profile specifier. // Skip the profile when we the profile iterator (FI) refers to the // past-the-end element. if (FI == ProgramPathAndClusterInfo.end()) continue; - // For each node, its CFG profile is encoded as + // For each node, its Cfg profile is encoded as // :,:,:,... for (auto BasicBlockEdgeProfile : Values) { if (BasicBlockEdgeProfile.empty()) @@ -279,10 +264,10 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { Twine("unsigned integer expected: '") + CountStr + "'"); if (i == 0) { // The first element represents the source and its total count. - FI->second.NodeCounts[SrcBBID = *BBID] = Count; + FI->second.Cfg.NodeCounts[SrcBBID = *BBID] = Count; continue; } - FI->second.EdgeCounts[SrcBBID][*BBID] = Count; + FI->second.Cfg.EdgeCounts[SrcBBID][*BBID] = Count; } } continue; @@ -487,10 +472,10 @@ BasicBlockSectionsProfileReaderWrapperPass::getClonePathsForFunction( return BBSPR.getClonePathsForFunction(FuncName); } -uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount( - StringRef FuncName, const UniqueBBID &SrcBBID, - const UniqueBBID &SinkBBID) const { - return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID); +const CfgProfile * +BasicBlockSectionsProfileReaderWrapperPass::getFunctionCfgProfile( + StringRef FuncName) const { + return BBSPR.getFunctionCfgProfile(FuncName); } BasicBlockSectionsProfileReader & diff --git a/llvm/test/CodeGen/X86/basic-block-sections-pgo-features.ll b/llvm/test/CodeGen/X86/basic-block-sections-pgo-features.ll new file mode 100644 index 0000000000000..21ff75a5cd354 --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-pgo-features.ll @@ -0,0 +1,92 @@ +; Verify PGO analysis map features with basic block sections profile. +; +; RUN: echo 'v1' > %t +; RUN: echo 'f foo' >> %t +; RUN: echo 'g 0:1000,1:800,2:200 1:800,3:800 2:200,3:200 3:1000' >> %t +; RUN: echo 'c 0 1 2' >> %t +; +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t -basic-block-address-map -pgo-analysis-map=all -pgo-analysis-map-use-propeller-cfg | FileCheck %s + +define void @foo(i1 %cond) nounwind !prof !0 { +entry: + br label %bb1 + +bb1: + br i1 %cond, label %bb2, label %bb3, !prof !1 + +bb2: + br label %bb3 + +bb3: + ret void +} + +!0 = !{!"function_entry_count", i64 1500} +!1 = !{!"branch_weights", i32 1200, i32 300} + +;; Verify that foo gets its PGO map from its Propeller CFG profile. + +; CHECK: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text.foo +; CHECK-NEXT: .byte 3 # version +; CHECK-NEXT: .byte 15 # feature +; CHECK: .quad .Lfunc_begin0 # base address +; CHECK: .byte 0 # BB id +; CHECK: .byte 1 # BB id +; CHECK: .byte 2 # BB id +; CHECK: .byte 3 # BB id + +; PGO Analysis Map for foo +; CHECK: .ascii "\350\007" # function entry count +; CHECK-NEXT: .ascii "\350\007" # basic block frequency +; CHECK-NEXT: .byte 1 # basic block successor count +; CHECK-NEXT: .byte 1 # successor BB ID +; CHECK-NEXT: .ascii "\240\006" # successor branch probability +; CHECK-NEXT: .ascii "\240\006" # basic block frequency +; CHECK-NEXT: .byte 2 # basic block successor count +; CHECK-NEXT: .byte 2 # successor BB ID +; CHECK-NEXT: .byte 0 # successor branch probability +; CHECK-NEXT: .byte 3 # successor BB ID +; CHECK-NEXT: .ascii "\240\006" # successor branch probability +; CHECK-NEXT: .ascii "\310\001" # basic block frequency +; CHECK-NEXT: .byte 1 # basic block successor count +; CHECK-NEXT: .byte 3 # successor BB ID +; CHECK-NEXT: .ascii "\310\001" # successor branch probability +; CHECK-NEXT: .ascii "\350\007" # basic block frequency +; CHECK-NEXT: .byte 0 # basic block successor count + +define void @bar(i1 %cond) nounwind !prof !2 { +entry: + br i1 %cond, label %bb1, label %bb2, !prof !3 + +bb1: + ret void + +bb2: + ret void +} + +!2 = !{!"function_entry_count", i64 80} +!3 = !{!"branch_weights", i32 2, i32 78} + +;; Verify that we emit the PGO map for bar which doesn't have Propeller profile. + +; CHECK: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text.bar +; CHECK-NEXT: .byte 3 # version +; CHECK-NEXT: .byte 7 # feature +; CHECK: .quad .Lfunc_begin1 # function address +; CHECK: .byte 0 # BB id +; CHECK: .byte 1 # BB id +; CHECK: .byte 2 # BB id + +; CHECK: .byte 80 # function entry count +; CHECK-NEXT: .ascii "\200\200\200\200\200\200\200 " # basic block frequency +; CHECK-NEXT: .byte 2 # basic block successor count +; CHECK-NEXT: .byte 1 # successor BB ID +; CHECK-NEXT: .ascii "\200\200\200\200\004" # successor branch probability +; CHECK-NEXT: .byte 2 # successor BB ID +; CHECK-NEXT: .ascii "\200\200\200\200\004" # successor branch probability +; CHECK-NEXT: .ascii "\200\200\200\200\200\200\200\020" # basic block frequency +; CHECK-NEXT: .byte 0 # basic block successor count +; CHECK-NEXT: .ascii "\200\200\200\200\200\200\200\020" # basic block frequency +; CHECK-NEXT: .byte 0 # basic block successor count +