Skip to content
49 changes: 37 additions & 12 deletions llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,40 @@ struct BBClusterInfo {
unsigned PositionInCluster;
};

// This represents the cfg profile data for a function.
struct CfgProfile {
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
// Edge counts for each edge, stored as a nested map.
DenseMap<UniqueBBID, DenseMap<UniqueBBID, uint64_t>> EdgeCounts;

// Returns the profile count for the given basic block or zero if it does not
// exist.
uint64_t getNodeCount(const UniqueBBID &BBID) const {
return NodeCounts.lookup(BBID);
}

// Returns the profile count for the edge from `SrcBBID` to `SinkBBID` or
// zero if it does not exist.
uint64_t getEdgeCount(const UniqueBBID &SrcBBID,
const UniqueBBID &SinkBBID) const {
auto It = EdgeCounts.find(SrcBBID);
if (It == EdgeCounts.end())
return 0;
return It->second.lookup(SinkBBID);
}
};

// This represents the raw input profile for one function.
struct FunctionPathAndClusterInfo {
struct FunctionProfile {
// BB Cluster information specified by `UniqueBBID`s.
SmallVector<BBClusterInfo> ClusterInfo;
// Paths to clone. A path a -> b -> c -> d implies cloning b, c, and d along
// the edge a -> b (a is not cloned). The index of the path in this vector
// determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
SmallVector<SmallVector<unsigned>> ClonePaths;
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
// Edge counts for each edge, stored as a nested map.
DenseMap<UniqueBBID, DenseMap<UniqueBBID, uint64_t>> EdgeCounts;
// Cfg profile data (block and edge frequencies).
CfgProfile Cfg;
};

class BasicBlockSectionsProfileReader {
Expand Down Expand Up @@ -81,10 +103,14 @@ class BasicBlockSectionsProfileReader {
SmallVector<SmallVector<unsigned>>
getClonePathsForFunction(StringRef FuncName) const;

// Returns the profile count for the edge from `SrcBBID` to `SinkBBID` in
// function `FuncName` or zero if it does not exist.
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &SinkBBID) const;
// Returns a pointer to the CfgProfile for the given function.
// Returns nullptr if no profile data is available for the function.
const CfgProfile *getFunctionCfgProfile(StringRef FuncName) const {
auto It = ProgramPathAndClusterInfo.find(getAliasName(FuncName));
if (It == ProgramPathAndClusterInfo.end())
return nullptr;
return &It->second.Cfg;
}

private:
StringRef getAliasName(StringRef FuncName) const {
Expand Down Expand Up @@ -132,7 +158,7 @@ class BasicBlockSectionsProfileReader {
// for (all or some of) its basic blocks. The cluster information for every
// basic block includes its cluster ID along with the position of the basic
// block in that cluster.
StringMap<FunctionPathAndClusterInfo> ProgramPathAndClusterInfo;
StringMap<FunctionProfile> ProgramPathAndClusterInfo;

// Some functions have alias names. We use this map to find the main alias
// name which appears in ProgramPathAndClusterInfo as a key.
Expand Down Expand Up @@ -192,8 +218,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
SmallVector<SmallVector<unsigned>>
getClonePathsForFunction(StringRef FuncName) const;

uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &DestBBID) const;
const CfgProfile *getFunctionCfgProfile(StringRef FuncName) const;

// Initializes the FunctionNameToDIFilename map for the current module and
// then reads the profile for the matching functions.
Expand Down
54 changes: 43 additions & 11 deletions llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
Expand Down Expand Up @@ -164,6 +165,12 @@ static cl::bits<PGOMapFeaturesEnum> PgoAnalysisMapFeatures(
"Enable extended information within the SHT_LLVM_BB_ADDR_MAP that is "
"extracted from PGO related analysis."));

static cl::opt<bool> PgoAnalysisMapUsePropellerCfg(
"pgo-analysis-map-use-propeller-cfg",
cl::desc(
"If available, use the Propeller cfg profile in the PGO analysis map."),
cl::Hidden, cl::init(false));

static cl::opt<bool> BBAddrMapSkipEmitBBEntries(
"basic-block-address-map-skip-bb-entries",
cl::desc("Skip emitting basic block entries in the SHT_LLVM_BB_ADDR_MAP "
Expand Down Expand Up @@ -473,6 +480,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<GCModuleInfo>();
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
}

bool AsmPrinter::doInitialization(Module &M) {
Expand Down Expand Up @@ -1532,12 +1540,16 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
assert(BBAddrMapVersion >= 2 &&
"PGOAnalysisMap only supports version 2 or later");

if (Features.FuncEntryCount) {
OutStreamer->AddComment("function entry count");
auto MaybeEntryCount = MF.getFunction().getEntryCount();
OutStreamer->emitULEB128IntValue(
MaybeEntryCount ? MaybeEntryCount->getCount() : 0);
// We will emit the BBSPR profile data if requested and availale. Otherwise,
// we fall back to MBFI and MBPI.
const CfgProfile *FuncCfgProfile = nullptr;
if (PgoAnalysisMapUsePropellerCfg) {
if (auto *BBSPR = getAnalysisIfAvailable<
BasicBlockSectionsProfileReaderWrapperPass>())
FuncCfgProfile =
BBSPR->getFunctionCfgProfile(MF.getFunction().getName());
}

const MachineBlockFrequencyInfo *MBFI =
Features.BBFreq
? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
Expand All @@ -1547,23 +1559,43 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
? &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI()
: nullptr;

if (Features.FuncEntryCount) {
OutStreamer->AddComment("function entry count");
uint64_t EntryCount = 0;
if (FuncCfgProfile) {
EntryCount = FuncCfgProfile->getNodeCount(*MF.front().getBBID());
} else {
auto MaybeEntryCount = MF.getFunction().getEntryCount();
EntryCount = MaybeEntryCount ? MaybeEntryCount->getCount() : 0;
}
OutStreamer->emitULEB128IntValue(EntryCount);
}

if (Features.BBFreq || Features.BrProb) {
for (const MachineBasicBlock &MBB : MF) {

if (Features.BBFreq) {
OutStreamer->AddComment("basic block frequency");
OutStreamer->emitULEB128IntValue(
MBFI->getBlockFreq(&MBB).getFrequency());
uint64_t BlockFrequency =
FuncCfgProfile ? FuncCfgProfile->getNodeCount(*MBB.getBBID())
: MBFI->getBlockFreq(&MBB).getFrequency();
OutStreamer->emitULEB128IntValue(BlockFrequency);
}
if (Features.BrProb) {
unsigned SuccCount = MBB.succ_size();
OutStreamer->AddComment("basic block successor count");
OutStreamer->emitULEB128IntValue(SuccCount);
OutStreamer->emitULEB128IntValue(MBB.succ_size());
for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
OutStreamer->AddComment("successor BB ID");
OutStreamer->emitULEB128IntValue(SuccMBB->getBBID()->BaseID);
OutStreamer->AddComment("successor branch probability");
OutStreamer->emitULEB128IntValue(
MBPI->getEdgeProbability(&MBB, SuccMBB).getNumerator());
// For MPBI, we emit the numerator of the probability. For BBSPR, we
// emit the raw edge count.
uint64_t EdgeFrequency =
FuncCfgProfile
? FuncCfgProfile->getEdgeCount(*MBB.getBBID(),
*SuccMBB->getBBID())
: MBPI->getEdgeProbability(&MBB, SuccMBB).getNumerator();
OutStreamer->emitULEB128IntValue(EdgeFrequency);
}
}
}
Expand Down
33 changes: 9 additions & 24 deletions llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,21 +76,6 @@ BasicBlockSectionsProfileReader::getClonePathsForFunction(
return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths;
}

uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &SinkBBID) const {
auto It = ProgramPathAndClusterInfo.find(getAliasName(FuncName));
if (It == ProgramPathAndClusterInfo.end())
return 0;
auto NodeIt = It->second.EdgeCounts.find(SrcBBID);
if (NodeIt == It->second.EdgeCounts.end())
return 0;
auto EdgeIt = NodeIt->second.find(SinkBBID);
if (EdgeIt == NodeIt->second.end())
return 0;
return EdgeIt->second;
}

// Reads the version 1 basic block sections profile. Profile for each function
// is encoded as follows:
// m <module_name>
Expand All @@ -115,7 +100,7 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
// the edge 1->3. Within the given clusters, each cloned block is identified by
// "<original block id>.<clone id>". For instance, 3.1 represents the first
// clone of block 3. Original blocks are specified just with their block ids. A
// block cloned multiple times appears with distinct clone ids. The CFG for bar
// block cloned multiple times appears with distinct clone ids. The Cfg for bar
// is shown below before and after cloning with its final clusters labeled.
//
// f main
Expand Down Expand Up @@ -255,12 +240,12 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
}
continue;
}
case 'g': { // CFG profile specifier.
case 'g': { // Cfg profile specifier.
// Skip the profile when we the profile iterator (FI) refers to the
// past-the-end element.
if (FI == ProgramPathAndClusterInfo.end())
continue;
// For each node, its CFG profile is encoded as
// For each node, its Cfg profile is encoded as
// <src>:<count>,<sink_1>:<count_1>,<sink_2>:<count_2>,...
for (auto BasicBlockEdgeProfile : Values) {
if (BasicBlockEdgeProfile.empty())
Expand All @@ -279,10 +264,10 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
Twine("unsigned integer expected: '") + CountStr + "'");
if (i == 0) {
// The first element represents the source and its total count.
FI->second.NodeCounts[SrcBBID = *BBID] = Count;
FI->second.Cfg.NodeCounts[SrcBBID = *BBID] = Count;
continue;
}
FI->second.EdgeCounts[SrcBBID][*BBID] = Count;
FI->second.Cfg.EdgeCounts[SrcBBID][*BBID] = Count;
}
}
continue;
Expand Down Expand Up @@ -487,10 +472,10 @@ BasicBlockSectionsProfileReaderWrapperPass::getClonePathsForFunction(
return BBSPR.getClonePathsForFunction(FuncName);
}

uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &SinkBBID) const {
return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
const CfgProfile *
BasicBlockSectionsProfileReaderWrapperPass::getFunctionCfgProfile(
StringRef FuncName) const {
return BBSPR.getFunctionCfgProfile(FuncName);
}

BasicBlockSectionsProfileReader &
Expand Down
92 changes: 92 additions & 0 deletions llvm/test/CodeGen/X86/basic-block-sections-pgo-features.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
; Verify PGO analysis map features with basic block sections profile.
;
; RUN: echo 'v1' > %t
; RUN: echo 'f foo' >> %t
; RUN: echo 'g 0:1000,1:800,2:200 1:800,3:800 2:200,3:200 3:1000' >> %t
; RUN: echo 'c 0 1 2' >> %t
;
; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t -basic-block-address-map -pgo-analysis-map=all -pgo-analysis-map-use-propeller-cfg | FileCheck %s

define void @foo(i1 %cond) nounwind !prof !0 {
entry:
br label %bb1

bb1:
br i1 %cond, label %bb2, label %bb3, !prof !1

bb2:
br label %bb3

bb3:
ret void
}

!0 = !{!"function_entry_count", i64 1500}
!1 = !{!"branch_weights", i32 1200, i32 300}

;; Verify that foo gets its PGO map from its Propeller CFG profile.

; CHECK: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text.foo
; CHECK-NEXT: .byte 3 # version
; CHECK-NEXT: .byte 15 # feature
; CHECK: .quad .Lfunc_begin0 # base address
; CHECK: .byte 0 # BB id
; CHECK: .byte 1 # BB id
; CHECK: .byte 2 # BB id
; CHECK: .byte 3 # BB id

; PGO Analysis Map for foo
; CHECK: .ascii "\350\007" # function entry count
; CHECK-NEXT: .ascii "\350\007" # basic block frequency
; CHECK-NEXT: .byte 1 # basic block successor count
; CHECK-NEXT: .byte 1 # successor BB ID
; CHECK-NEXT: .ascii "\240\006" # successor branch probability
; CHECK-NEXT: .ascii "\240\006" # basic block frequency
; CHECK-NEXT: .byte 2 # basic block successor count
; CHECK-NEXT: .byte 2 # successor BB ID
; CHECK-NEXT: .byte 0 # successor branch probability
; CHECK-NEXT: .byte 3 # successor BB ID
; CHECK-NEXT: .ascii "\240\006" # successor branch probability
; CHECK-NEXT: .ascii "\310\001" # basic block frequency
; CHECK-NEXT: .byte 1 # basic block successor count
; CHECK-NEXT: .byte 3 # successor BB ID
; CHECK-NEXT: .ascii "\310\001" # successor branch probability
; CHECK-NEXT: .ascii "\350\007" # basic block frequency
; CHECK-NEXT: .byte 0 # basic block successor count

define void @bar(i1 %cond) nounwind !prof !2 {
entry:
br i1 %cond, label %bb1, label %bb2, !prof !3

bb1:
ret void

bb2:
ret void
}

!2 = !{!"function_entry_count", i64 80}
!3 = !{!"branch_weights", i32 2, i32 78}

;; Verify that we emit the PGO map for bar which doesn't have Propeller profile.

; CHECK: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text.bar
; CHECK-NEXT: .byte 3 # version
; CHECK-NEXT: .byte 7 # feature
; CHECK: .quad .Lfunc_begin1 # function address
; CHECK: .byte 0 # BB id
; CHECK: .byte 1 # BB id
; CHECK: .byte 2 # BB id

; CHECK: .byte 80 # function entry count
; CHECK-NEXT: .ascii "\200\200\200\200\200\200\200 " # basic block frequency
; CHECK-NEXT: .byte 2 # basic block successor count
; CHECK-NEXT: .byte 1 # successor BB ID
; CHECK-NEXT: .ascii "\200\200\200\200\004" # successor branch probability
; CHECK-NEXT: .byte 2 # successor BB ID
; CHECK-NEXT: .ascii "\200\200\200\200\004" # successor branch probability
; CHECK-NEXT: .ascii "\200\200\200\200\200\200\200\020" # basic block frequency
; CHECK-NEXT: .byte 0 # basic block successor count
; CHECK-NEXT: .ascii "\200\200\200\200\200\200\200\020" # basic block frequency
; CHECK-NEXT: .byte 0 # basic block successor count