Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 12 additions & 14 deletions bolt/include/bolt/Profile/ProfileYAMLMapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ struct CallSiteInfo {
uint32_t EntryDiscriminator{0}; /// multiple entry discriminator
uint64_t Count{0};
uint64_t Mispreds{0};
// Pseudo probe information, optional
uint32_t Probe{0};
bool Indirect = false;
uint32_t InlineTreeNode{0};

bool operator==(const CallSiteInfo &Other) const {
return Offset == Other.Offset && DestId == Other.DestId &&
Expand Down Expand Up @@ -63,6 +67,9 @@ template <> struct MappingTraits<bolt::CallSiteInfo> {
YamlIO.mapOptional("disc", CSI.EntryDiscriminator, (uint32_t)0);
YamlIO.mapRequired("cnt", CSI.Count);
YamlIO.mapOptional("mis", CSI.Mispreds, (uint64_t)0);
YamlIO.mapOptional("pp", CSI.Probe, 0);
YamlIO.mapOptional("ppn", CSI.InlineTreeNode, 0);
YamlIO.mapOptional("ind", CSI.Indirect, false);
}

static const bool flow = true;
Expand Down Expand Up @@ -95,29 +102,20 @@ template <> struct MappingTraits<bolt::SuccessorInfo> {

namespace bolt {
struct PseudoProbeInfo {
uint32_t InlineTreeIndex = 0;
uint64_t BlockMask = 0; // bitset with probe indices from 1 to 64
std::vector<uint64_t> BlockProbes; // block probes with indices above 64
std::vector<uint64_t> CallProbes;
std::vector<uint64_t> IndCallProbes;
std::vector<uint64_t> BlockProbes;
std::vector<uint32_t> InlineTreeNodes;

bool operator==(const PseudoProbeInfo &Other) const {
return InlineTreeIndex == Other.InlineTreeIndex &&
BlockProbes == Other.BlockProbes && CallProbes == Other.CallProbes &&
IndCallProbes == Other.IndCallProbes;
return InlineTreeNodes == Other.InlineTreeNodes &&
BlockProbes == Other.BlockProbes;
}
};
} // end namespace bolt

template <> struct MappingTraits<bolt::PseudoProbeInfo> {
static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) {
YamlIO.mapOptional("blx", PI.BlockMask, 0);
YamlIO.mapOptional("blk", PI.BlockProbes, std::vector<uint64_t>());
YamlIO.mapOptional("call", PI.CallProbes, std::vector<uint64_t>());
YamlIO.mapOptional("icall", PI.IndCallProbes, std::vector<uint64_t>());
YamlIO.mapOptional("id", PI.InlineTreeIndex, 0);
YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector<uint32_t>());
YamlIO.mapOptional("blk", PI.BlockProbes, std::vector<uint64_t>(1, 1));
YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector<uint32_t>(1, 0));
}

static const bool flow = true;
Expand Down
35 changes: 17 additions & 18 deletions bolt/include/bolt/Profile/YAMLProfileWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,25 +74,24 @@ class YAMLProfileWriter {
collectInlineTree(const MCPseudoProbeDecoder &Decoder,
const MCDecodedPseudoProbeInlineTree &Root);

// 0 - block probe, 1 - indirect call, 2 - direct call
using ProbeList = std::array<SmallVector<uint64_t, 0>, 3>;
using NodeIdToProbes = DenseMap<uint32_t, ProbeList>;
static std::vector<yaml::bolt::PseudoProbeInfo>
convertNodeProbes(NodeIdToProbes &NodeProbes);

public:
template <typename T>
static std::vector<yaml::bolt::PseudoProbeInfo>
writeBlockProbes(T Probes, const InlineTreeMapTy &InlineTreeNodeId) {
NodeIdToProbes NodeProbes;
for (const MCDecodedPseudoProbe &Probe : Probes) {
auto It = InlineTreeNodeId.find(Probe.getInlineTreeNode());
if (It == InlineTreeNodeId.end())
continue;
NodeProbes[It->second][Probe.getType()].emplace_back(Probe.getIndex());
}
return convertNodeProbes(NodeProbes);
}
class BlockProbeCtx {
struct Call {
uint64_t Id;
uint32_t Node;
bool Indirect;
bool Used;
};
// Group block probes by node id.
DenseMap<uint32_t, std::vector<uint64_t>> NodeToProbes;
// Offset -> call probe
DenseMap<uint32_t, Call> CallProbes;

public:
void addBlockProbe(const InlineTreeMapTy &Map,
const MCDecodedPseudoProbe &Probe, uint32_t ProbeOffset);
void finalize(yaml::bolt::BinaryBasicBlockProfile &YamlBB);
};
};
} // namespace bolt
} // namespace llvm
Expand Down
16 changes: 6 additions & 10 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2383,26 +2383,22 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
PseudoProbeDecoder->getAddress2ProbesMap();
BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
Fragments.insert(BF);
DenseMap<
uint32_t,
std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>>
BlockProbes;
DenseMap<uint32_t, YAMLProfileWriter::BlockProbeCtx> BlockCtx;
for (const BinaryFunction *F : Fragments) {
const uint64_t FuncAddr = F->getAddress();
for (const MCDecodedPseudoProbe &Probe :
ProbeMap.find(FuncAddr, FuncAddr + F->getSize())) {
const uint32_t OutputAddress = Probe.getAddress();
const uint32_t InputOffset = BAT->translate(
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
const unsigned BlockIndex = getBlock(InputOffset).second;
BlockProbes[BlockIndex].emplace_back(Probe);
const auto &[BlockOffset, BlockIndex] = getBlock(InputOffset);
BlockCtx[BlockIndex].addBlockProbe(InlineTreeNodeId, Probe,
InputOffset - BlockOffset);
}
}

for (auto &[Block, Probes] : BlockProbes) {
YamlBF.Blocks[Block].PseudoProbes =
YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId);
}
for (auto &[Block, Ctx] : BlockCtx)
Ctx.finalize(YamlBF.Blocks[Block]);
}
// Skip printing if there's no profile data
llvm::erase_if(
Expand Down
24 changes: 4 additions & 20 deletions bolt/lib/Profile/StaleProfileMatching.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,26 +348,10 @@ class StaleMatcher {
return It->second;
};

auto matchPseudoProbeInfo = [&](const yaml::bolt::PseudoProbeInfo
&ProfileProbe,
uint32_t NodeId) {
for (uint64_t Index = 0; Index < 64; ++Index)
if (ProfileProbe.BlockMask & 1ull << Index)
++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, Index + 1)];
for (const auto &ProfileProbes :
{ProfileProbe.BlockProbes, ProfileProbe.IndCallProbes,
ProfileProbe.CallProbes})
for (uint64_t ProfileProbe : ProfileProbes)
++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, ProfileProbe)];
};

for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) {
if (!ProfileProbe.InlineTreeNodes.empty())
for (uint32_t ProfileInlineTreeNode : ProfileProbe.InlineTreeNodes)
matchPseudoProbeInfo(ProfileProbe, ProfileInlineTreeNode);
else
matchPseudoProbeInfo(ProfileProbe, ProfileProbe.InlineTreeIndex);
}
for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes)
for (uint32_t Node : ProfileProbe.InlineTreeNodes)
for (uint64_t Probe : ProfileProbe.BlockProbes)
++FlowBlockMatchCount[matchProfileProbeToBlock(Node, Probe)];
uint32_t BestMatchCount = 0;
uint32_t TotalMatchCount = 0;
const FlowBlock *BestMatchBlock = nullptr;
Expand Down
103 changes: 58 additions & 45 deletions bolt/lib/Profile/YAMLProfileWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,50 +129,62 @@ YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) {
return {Desc, InlineTree};
}

std::vector<yaml::bolt::PseudoProbeInfo>
YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) {
struct BlockProbeInfoHasher {
size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const {
return llvm::hash_combine(llvm::hash_combine_range(BPI.BlockProbes),
llvm::hash_combine_range(BPI.CallProbes),
llvm::hash_combine_range(BPI.IndCallProbes));
void YAMLProfileWriter::BlockProbeCtx::addBlockProbe(
const InlineTreeMapTy &Map, const MCDecodedPseudoProbe &Probe,
uint32_t ProbeOffset) {
auto It = Map.find(Probe.getInlineTreeNode());
if (It == Map.end())
return;
auto NodeId = It->second;
uint32_t Index = Probe.getIndex();
if (Probe.isCall())
CallProbes[ProbeOffset] =
Call{Index, NodeId, Probe.isIndirectCall(), false};
else
NodeToProbes[NodeId].emplace_back(Index);
}

void YAMLProfileWriter::BlockProbeCtx::finalize(
yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
// Hash block probes by vector
struct ProbeHasher {
size_t operator()(const ArrayRef<uint64_t> Probes) const {
return llvm::hash_combine_range(Probes);
}
};

// Check identical BlockProbeInfo structs and merge them
std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>,
BlockProbeInfoHasher>
BPIToNodes;
for (auto &[NodeId, Probes] : NodeProbes) {
yaml::bolt::PseudoProbeInfo BPI;
BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end());
BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end());
BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end());
BPIToNodes[BPI].push_back(NodeId);
// Check identical block probes and merge them
std::unordered_map<std::vector<uint64_t>, std::vector<uint32_t>, ProbeHasher>
ProbesToNodes;
for (auto &[NodeId, Probes] : NodeToProbes) {
llvm::sort(Probes);
ProbesToNodes[Probes].emplace_back(NodeId);
}

auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) {
for (auto Id : Ids)
if (Id > 64)
Vec.emplace_back(Id);
else
Mask |= 1ull << (Id - 1);
};

// Add to YAML with merged nodes/block mask optimizations
std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes;
YamlProbes.reserve(BPIToNodes.size());
for (const auto &[BPI, Nodes] : BPIToNodes) {
auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo());
YamlBPI.CallProbes = BPI.CallProbes;
YamlBPI.IndCallProbes = BPI.IndCallProbes;
if (Nodes.size() == 1)
YamlBPI.InlineTreeIndex = Nodes.front();
else
YamlBPI.InlineTreeNodes = Nodes;
handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask);
for (auto &[Probes, Nodes] : ProbesToNodes) {
llvm::sort(Nodes);
YamlBB.PseudoProbes.emplace_back(
yaml::bolt::PseudoProbeInfo{Probes, Nodes});
}
for (yaml::bolt::CallSiteInfo &CSI : YamlBB.CallSites) {
auto It = CallProbes.find(CSI.Offset);
if (It == CallProbes.end())
continue;
Call &Probe = It->second;
CSI.Probe = Probe.Id;
CSI.InlineTreeNode = Probe.Node;
CSI.Indirect = Probe.Indirect;
Probe.Used = true;
}
for (const auto &[Offset, Probe] : CallProbes) {
if (Probe.Used)
continue;
yaml::bolt::CallSiteInfo CSI;
CSI.Offset = Offset;
CSI.Probe = Probe.Id;
CSI.InlineTreeNode = Probe.Node;
CSI.Indirect = Probe.Indirect;
YamlBB.CallSites.emplace_back(CSI);
}
return YamlProbes;
}

std::tuple<std::vector<yaml::bolt::InlineTreeNode>,
Expand Down Expand Up @@ -343,12 +355,13 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const AddressProbesMap &ProbeMap =
PseudoProbeDecoder->getAddress2ProbesMap();
const uint64_t FuncAddr = BF.getAddress();
const std::pair<uint64_t, uint64_t> &BlockRange =
BB->getInputAddressRange();
const std::pair<uint64_t, uint64_t> BlockAddrRange = {
FuncAddr + BlockRange.first, FuncAddr + BlockRange.second};
auto Probes = ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second);
YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId);
auto [Start, End] = BB->getInputAddressRange();
Start += FuncAddr;
End += FuncAddr;
BlockProbeCtx Ctx;
for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(Start, End))
Ctx.addBlockProbe(InlineTreeNodeId, Probe, Probe.getAddress() - Start);
Ctx.finalize(YamlBB);
}

YamlBF.Blocks.emplace_back(YamlBB);
Expand Down
6 changes: 3 additions & 3 deletions bolt/test/X86/match-blocks-with-pseudo-probes-inline.test
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ functions:
insns: 11
hash: 0x1
exec: 1
probes: [ { blx: 9 } ]
probes: [ { blk: [ 1, 4 ] } ]
inline_tree: [ { } ]
- name: foo
fid: 10
Expand All @@ -43,7 +43,7 @@ functions:
hash: 0x2
exec: 1
succ: [ { bid: 3, cnt: 0 } ]
probes: [ { blx: 3 } ]
probes: [ { blk: [ 1, 2 ] } ]
inline_tree: [ { g: 1 }, { g: 0, cs: 8 } ]
- name: main
fid: 11
Expand All @@ -56,7 +56,7 @@ functions:
hash: 0x3
exec: 1
succ: [ { bid: 3, cnt: 0 } ]
probes: [ { blx: 3, id: 1 }, { blx: 1 } ]
probes: [ { blk: [ 1, 2 ], ids: [ 1 ] }, { blk: [ 1 ] } ]
inline_tree: [ { g: 2 }, { g: 1, cs: 2 }, { g: 0, p: 1, cs: 8 } ]
pseudo_probe_desc:
gs: [ 0xE413754A191DB537, 0x5CF8C24CDB18BDAC, 0xDB956436E78DD5FA ]
Expand Down
2 changes: 1 addition & 1 deletion bolt/test/X86/match-blocks-with-pseudo-probes.test
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ functions:
hash: 0xFFFFFFFFFFFFFFF1
insns: 1
succ: [ { bid: 3, cnt: 1} ]
probes: [ { blx: 1 } ]
probes: [ { blk: [ 1 ] } ]
inline_tree: [ { g: 0 } ]
pseudo_probe_desc:
gs: [ 0xDB956436E78DD5FA ]
Expand Down
6 changes: 3 additions & 3 deletions bolt/test/X86/pseudoprobe-decoding-inline.test
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@
# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
# CHECK-YAML: name: bar
# CHECK-YAML: - bid: 0
# CHECK-YAML: probes: [ { blx: 9 } ]
# CHECK-YAML: probes: [ { blk: [ 1, 4 ] } ]
# CHECK-YAML: inline_tree: [ { } ]
#
# CHECK-YAML: name: foo
# CHECK-YAML: - bid: 0
# CHECK-YAML: probes: [ { blx: 3 } ]
# CHECK-YAML: probes: [ { blk: [ 1, 2 ] } ]
# CHECK-YAML: inline_tree: [ { g: 1 }, { g: 0, cs: 8 } ]
#
# CHECK-YAML: name: main
# CHECK-YAML: - bid: 0
# CHECK-YAML: probes: [ { blx: 3, id: 1 }, { blx: 1 } ]
# CHECK-YAML: probes: [ { blk: [ 1, 2 ], ids: [ 1 ] }, { } ]
# CHECK-YAML: inline_tree: [ { g: 2 }, { g: 1, cs: 2 }, { g: 0, p: 1, cs: 8 } ]
#
# CHECK-YAML: pseudo_probe_desc:
Expand Down
7 changes: 4 additions & 3 deletions bolt/test/X86/pseudoprobe-decoding-noinline.test
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,18 @@
# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
# CHECK-YAML: name: bar
# CHECK-YAML: - bid: 0
# CHECK-YAML: probes: [ { blx: 9 } ]
# CHECK-YAML: probes: [ { blk: [ 1, 4 ] } ]
# CHECK-YAML: inline_tree: [ { } ]
#
# CHECK-YAML: name: foo
# CHECK-YAML: - bid: 0
# CHECK-YAML: probes: [ { blx: 3 } ]
# CHECK-YAML: probes: [ { blk: [ 1, 2 ] } ]
# CHECK-YAML: inline_tree: [ { g: 2 } ]
#
# CHECK-YAML: name: main
# CHECK-YAML: - bid: 0
# CHECK-YAML: probes: [ { blx: 1, call: [ 2 ] } ]
# CHECK-YAML: calls: [ { off: 0x4, fid: 0, cnt: 0, pp: 2 } ]
# CHECK-YAML: probes: [ { } ]
# CHECK-YAML: inline_tree: [ { g: 1 } ]
#
# CHECK-YAML: pseudo_probe_desc:
Expand Down
Loading