Skip to content

Commit c607085

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.4
2 parents ab2c4a0 + 2c9b1e0 commit c607085

File tree

7 files changed

+106
-126
lines changed

7 files changed

+106
-126
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -775,11 +775,6 @@ class BinaryContext {
775775
uint64_t PseudoProbeLooseMatchedSampleCount{0};
776776
/// the count of call matched samples
777777
uint64_t CallMatchedSampleCount{0};
778-
/// the number of stale functions that have matching number of blocks in
779-
/// the profile
780-
uint64_t NumStaleFuncsWithEqualBlockCount{0};
781-
/// the number of blocks that have matching size but a differing hash
782-
uint64_t NumStaleBlocksWithEqualIcount{0};
783778
} Stats;
784779

785780
// Original binary execution count stats.

bolt/include/bolt/Profile/ProfileYAMLMapping.h

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ struct CallSiteInfo {
2929
uint32_t EntryDiscriminator{0}; /// multiple entry discriminator
3030
uint64_t Count{0};
3131
uint64_t Mispreds{0};
32+
// Pseudo probe information, optional
33+
uint32_t Probe{0};
34+
bool Indirect = false;
35+
uint32_t InlineTreeNode{0};
3236

3337
bool operator==(const CallSiteInfo &Other) const {
3438
return Offset == Other.Offset && DestId == Other.DestId &&
@@ -63,6 +67,9 @@ template <> struct MappingTraits<bolt::CallSiteInfo> {
6367
YamlIO.mapOptional("disc", CSI.EntryDiscriminator, (uint32_t)0);
6468
YamlIO.mapRequired("cnt", CSI.Count);
6569
YamlIO.mapOptional("mis", CSI.Mispreds, (uint64_t)0);
70+
YamlIO.mapOptional("pp", CSI.Probe, 0);
71+
YamlIO.mapOptional("ppn", CSI.InlineTreeNode, 0);
72+
YamlIO.mapOptional("ind", CSI.Indirect, false);
6673
}
6774

6875
static const bool flow = true;
@@ -95,29 +102,20 @@ template <> struct MappingTraits<bolt::SuccessorInfo> {
95102

96103
namespace bolt {
97104
struct PseudoProbeInfo {
98-
uint32_t InlineTreeIndex = 0;
99-
uint64_t BlockMask = 0; // bitset with probe indices from 1 to 64
100-
std::vector<uint64_t> BlockProbes; // block probes with indices above 64
101-
std::vector<uint64_t> CallProbes;
102-
std::vector<uint64_t> IndCallProbes;
105+
std::vector<uint64_t> BlockProbes;
103106
std::vector<uint32_t> InlineTreeNodes;
104107

105108
bool operator==(const PseudoProbeInfo &Other) const {
106-
return InlineTreeIndex == Other.InlineTreeIndex &&
107-
BlockProbes == Other.BlockProbes && CallProbes == Other.CallProbes &&
108-
IndCallProbes == Other.IndCallProbes;
109+
return InlineTreeNodes == Other.InlineTreeNodes &&
110+
BlockProbes == Other.BlockProbes;
109111
}
110112
};
111113
} // end namespace bolt
112114

113115
template <> struct MappingTraits<bolt::PseudoProbeInfo> {
114116
static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) {
115-
YamlIO.mapOptional("blx", PI.BlockMask, 0);
116-
YamlIO.mapOptional("blk", PI.BlockProbes, std::vector<uint64_t>());
117-
YamlIO.mapOptional("call", PI.CallProbes, std::vector<uint64_t>());
118-
YamlIO.mapOptional("icall", PI.IndCallProbes, std::vector<uint64_t>());
119-
YamlIO.mapOptional("id", PI.InlineTreeIndex, 0);
120-
YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector<uint32_t>());
117+
YamlIO.mapOptional("blk", PI.BlockProbes, std::vector<uint64_t>(1, 1));
118+
YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector<uint32_t>(1, 0));
121119
}
122120

123121
static const bool flow = true;

bolt/include/bolt/Profile/YAMLProfileWriter.h

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,15 @@ class YAMLProfileWriter {
3636
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>;
3737
struct InlineTreeDesc {
3838
template <typename T> using GUIDMapTy = std::unordered_map<uint64_t, T>;
39-
using GUIDNodeMap = GUIDMapTy<const MCDecodedPseudoProbeInlineTree *>;
4039
using GUIDNumMap = GUIDMapTy<uint32_t>;
41-
GUIDNodeMap TopLevelGUIDToInlineTree;
4240
GUIDNumMap GUIDIdxMap;
4341
GUIDNumMap HashIdxMap;
4442
};
4543

4644
static std::tuple<std::vector<yaml::bolt::InlineTreeNode>, InlineTreeMapTy>
4745
convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
48-
const InlineTreeDesc &InlineTree, uint64_t GUID);
46+
const InlineTreeDesc &InlineTree,
47+
const BinaryFunction &BF);
4948

5049
static std::tuple<yaml::bolt::ProfilePseudoProbeDesc, InlineTreeDesc>
5150
convertPseudoProbeDesc(const MCPseudoProbeDecoder &PseudoProbeDecoder);
@@ -74,25 +73,24 @@ class YAMLProfileWriter {
7473
collectInlineTree(const MCPseudoProbeDecoder &Decoder,
7574
const MCDecodedPseudoProbeInlineTree &Root);
7675

77-
// 0 - block probe, 1 - indirect call, 2 - direct call
78-
using ProbeList = std::array<SmallVector<uint64_t, 0>, 3>;
79-
using NodeIdToProbes = DenseMap<uint32_t, ProbeList>;
80-
static std::vector<yaml::bolt::PseudoProbeInfo>
81-
convertNodeProbes(NodeIdToProbes &NodeProbes);
82-
8376
public:
84-
template <typename T>
85-
static std::vector<yaml::bolt::PseudoProbeInfo>
86-
writeBlockProbes(T Probes, const InlineTreeMapTy &InlineTreeNodeId) {
87-
NodeIdToProbes NodeProbes;
88-
for (const MCDecodedPseudoProbe &Probe : Probes) {
89-
auto It = InlineTreeNodeId.find(Probe.getInlineTreeNode());
90-
if (It == InlineTreeNodeId.end())
91-
continue;
92-
NodeProbes[It->second][Probe.getType()].emplace_back(Probe.getIndex());
93-
}
94-
return convertNodeProbes(NodeProbes);
95-
}
77+
class BlockProbeCtx {
78+
struct Call {
79+
uint64_t Id;
80+
uint32_t Node;
81+
bool Indirect;
82+
bool Used;
83+
};
84+
// Group block probes by node id.
85+
DenseMap<uint32_t, std::vector<uint64_t>> NodeToProbes;
86+
// Offset -> call probe
87+
DenseMap<uint32_t, Call> CallProbes;
88+
89+
public:
90+
void addBlockProbe(const InlineTreeMapTy &Map,
91+
const MCDecodedPseudoProbe &Probe, uint32_t ProbeOffset);
92+
void finalize(yaml::bolt::BinaryBasicBlockProfile &YamlBB);
93+
};
9694
};
9795
} // namespace bolt
9896
} // namespace llvm

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1505,12 +1505,6 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
15051505
if (NumAllStaleFunctions) {
15061506
const float PctStale =
15071507
NumAllStaleFunctions / (float)NumAllProfiledFunctions * 100.0f;
1508-
const float PctStaleFuncsWithEqualBlockCount =
1509-
(float)BC.Stats.NumStaleFuncsWithEqualBlockCount /
1510-
NumAllStaleFunctions * 100.0f;
1511-
const float PctStaleBlocksWithEqualIcount =
1512-
(float)BC.Stats.NumStaleBlocksWithEqualIcount /
1513-
BC.Stats.NumStaleBlocks * 100.0f;
15141508
auto printErrorOrWarning = [&]() {
15151509
if (PctStale > opts::StaleThreshold)
15161510
BC.errs() << "BOLT-ERROR: ";
@@ -1533,17 +1527,6 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
15331527
<< "%) belong to functions with invalid"
15341528
" (possibly stale) profile.\n";
15351529
}
1536-
BC.outs() << "BOLT-INFO: " << BC.Stats.NumStaleFuncsWithEqualBlockCount
1537-
<< " stale function"
1538-
<< (BC.Stats.NumStaleFuncsWithEqualBlockCount == 1 ? "" : "s")
1539-
<< format(" (%.1f%% of all stale)",
1540-
PctStaleFuncsWithEqualBlockCount)
1541-
<< " have matching block count.\n";
1542-
BC.outs() << "BOLT-INFO: " << BC.Stats.NumStaleBlocksWithEqualIcount
1543-
<< " stale block"
1544-
<< (BC.Stats.NumStaleBlocksWithEqualIcount == 1 ? "" : "s")
1545-
<< format(" (%.1f%% of all stale)", PctStaleBlocksWithEqualIcount)
1546-
<< " have matching icount.\n";
15471530
if (PctStale > opts::StaleThreshold) {
15481531
return createFatalBOLTError(
15491532
Twine("BOLT-ERROR: stale functions exceed specified threshold of ") +

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2376,33 +2376,29 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
23762376
if (BF->getGUID()) {
23772377
std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
23782378
YAMLProfileWriter::convertBFInlineTree(*PseudoProbeDecoder,
2379-
InlineTree, BF->getGUID());
2379+
InlineTree, *BF);
23802380
}
23812381
// Fetch probes belonging to all fragments
23822382
const AddressProbesMap &ProbeMap =
23832383
PseudoProbeDecoder->getAddress2ProbesMap();
23842384
BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
23852385
Fragments.insert(BF);
2386-
DenseMap<
2387-
uint32_t,
2388-
std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>>
2389-
BlockProbes;
2386+
DenseMap<uint32_t, YAMLProfileWriter::BlockProbeCtx> BlockCtx;
23902387
for (const BinaryFunction *F : Fragments) {
23912388
const uint64_t FuncAddr = F->getAddress();
23922389
for (const MCDecodedPseudoProbe &Probe :
23932390
ProbeMap.find(FuncAddr, FuncAddr + F->getSize())) {
23942391
const uint32_t OutputAddress = Probe.getAddress();
23952392
const uint32_t InputOffset = BAT->translate(
23962393
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
2397-
const unsigned BlockIndex = getBlock(InputOffset).second;
2398-
BlockProbes[BlockIndex].emplace_back(Probe);
2394+
const auto &[BlockOffset, BlockIndex] = getBlock(InputOffset);
2395+
BlockCtx[BlockIndex].addBlockProbe(InlineTreeNodeId, Probe,
2396+
InputOffset - BlockOffset);
23992397
}
24002398
}
24012399

2402-
for (auto &[Block, Probes] : BlockProbes) {
2403-
YamlBF.Blocks[Block].PseudoProbes =
2404-
YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId);
2405-
}
2400+
for (auto &[Block, Ctx] : BlockCtx)
2401+
Ctx.finalize(YamlBF.Blocks[Block]);
24062402
}
24072403
// Skip printing if there's no profile data
24082404
llvm::erase_if(

bolt/lib/Profile/YAMLProfileReader.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -350,9 +350,6 @@ bool YAMLProfileReader::parseFunctionProfile(
350350
<< MismatchedCalls << " calls, and " << MismatchedEdges
351351
<< " edges in profile did not match function " << BF << '\n';
352352

353-
if (YamlBF.NumBasicBlocks != BF.size())
354-
++BC.Stats.NumStaleFuncsWithEqualBlockCount;
355-
356353
if (!opts::InferStaleProfile)
357354
return false;
358355
ArrayRef<ProbeMatchSpec> ProbeMatchSpecs;

bolt/lib/Profile/YAMLProfileWriter.cpp

Lines changed: 68 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,6 @@ YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) {
8787
yaml::bolt::ProfilePseudoProbeDesc Desc;
8888
InlineTreeDesc InlineTree;
8989

90-
for (const MCDecodedPseudoProbeInlineTree &TopLev :
91-
Decoder.getDummyInlineRoot().getChildren())
92-
InlineTree.TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev;
93-
9490
for (const auto &FuncDesc : Decoder.getGUID2FuncDescMap())
9591
++InlineTree.HashIdxMap[FuncDesc.FuncHash];
9692

@@ -129,64 +125,80 @@ YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) {
129125
return {Desc, InlineTree};
130126
}
131127

132-
std::vector<yaml::bolt::PseudoProbeInfo>
133-
YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) {
134-
struct BlockProbeInfoHasher {
135-
size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const {
136-
return llvm::hash_combine(llvm::hash_combine_range(BPI.BlockProbes),
137-
llvm::hash_combine_range(BPI.CallProbes),
138-
llvm::hash_combine_range(BPI.IndCallProbes));
128+
void YAMLProfileWriter::BlockProbeCtx::addBlockProbe(
129+
const InlineTreeMapTy &Map, const MCDecodedPseudoProbe &Probe,
130+
uint32_t ProbeOffset) {
131+
auto It = Map.find(Probe.getInlineTreeNode());
132+
if (It == Map.end())
133+
return;
134+
auto NodeId = It->second;
135+
uint32_t Index = Probe.getIndex();
136+
if (Probe.isCall())
137+
CallProbes[ProbeOffset] =
138+
Call{Index, NodeId, Probe.isIndirectCall(), false};
139+
else
140+
NodeToProbes[NodeId].emplace_back(Index);
141+
}
142+
143+
void YAMLProfileWriter::BlockProbeCtx::finalize(
144+
yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
145+
// Hash block probes by vector
146+
struct ProbeHasher {
147+
size_t operator()(const ArrayRef<uint64_t> Probes) const {
148+
return llvm::hash_combine_range(Probes);
139149
}
140150
};
141151

142-
// Check identical BlockProbeInfo structs and merge them
143-
std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>,
144-
BlockProbeInfoHasher>
145-
BPIToNodes;
146-
for (auto &[NodeId, Probes] : NodeProbes) {
147-
yaml::bolt::PseudoProbeInfo BPI;
148-
BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end());
149-
BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end());
150-
BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end());
151-
BPIToNodes[BPI].push_back(NodeId);
152+
// Check identical block probes and merge them
153+
std::unordered_map<std::vector<uint64_t>, std::vector<uint32_t>, ProbeHasher>
154+
ProbesToNodes;
155+
for (auto &[NodeId, Probes] : NodeToProbes) {
156+
llvm::sort(Probes);
157+
ProbesToNodes[Probes].emplace_back(NodeId);
152158
}
153-
154-
auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) {
155-
for (auto Id : Ids)
156-
if (Id > 64)
157-
Vec.emplace_back(Id);
158-
else
159-
Mask |= 1ull << (Id - 1);
160-
};
161-
162-
// Add to YAML with merged nodes/block mask optimizations
163-
std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes;
164-
YamlProbes.reserve(BPIToNodes.size());
165-
for (const auto &[BPI, Nodes] : BPIToNodes) {
166-
auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo());
167-
YamlBPI.CallProbes = BPI.CallProbes;
168-
YamlBPI.IndCallProbes = BPI.IndCallProbes;
169-
if (Nodes.size() == 1)
170-
YamlBPI.InlineTreeIndex = Nodes.front();
171-
else
172-
YamlBPI.InlineTreeNodes = Nodes;
173-
handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask);
159+
for (auto &[Probes, Nodes] : ProbesToNodes) {
160+
llvm::sort(Nodes);
161+
YamlBB.PseudoProbes.emplace_back(
162+
yaml::bolt::PseudoProbeInfo{Probes, Nodes});
163+
}
164+
for (yaml::bolt::CallSiteInfo &CSI : YamlBB.CallSites) {
165+
auto It = CallProbes.find(CSI.Offset);
166+
if (It == CallProbes.end())
167+
continue;
168+
Call &Probe = It->second;
169+
CSI.Probe = Probe.Id;
170+
CSI.InlineTreeNode = Probe.Node;
171+
CSI.Indirect = Probe.Indirect;
172+
Probe.Used = true;
173+
}
174+
for (const auto &[Offset, Probe] : CallProbes) {
175+
if (Probe.Used)
176+
continue;
177+
yaml::bolt::CallSiteInfo CSI;
178+
CSI.Offset = Offset;
179+
CSI.Probe = Probe.Id;
180+
CSI.InlineTreeNode = Probe.Node;
181+
CSI.Indirect = Probe.Indirect;
182+
YamlBB.CallSites.emplace_back(CSI);
174183
}
175-
return YamlProbes;
176184
}
177185

178186
std::tuple<std::vector<yaml::bolt::InlineTreeNode>,
179187
YAMLProfileWriter::InlineTreeMapTy>
180188
YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
181189
const InlineTreeDesc &InlineTree,
182-
uint64_t GUID) {
190+
const BinaryFunction &BF) {
183191
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
184192
std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree;
185-
auto It = InlineTree.TopLevelGUIDToInlineTree.find(GUID);
186-
if (It == InlineTree.TopLevelGUIDToInlineTree.end())
193+
uint64_t Addr = BF.getAddress();
194+
uint64_t Size = BF.getSize();
195+
auto Probes = Decoder.getAddress2ProbesMap().find(Addr, Addr + Size);
196+
if (Probes.empty())
187197
return {YamlInlineTree, InlineTreeNodeId};
188-
const MCDecodedPseudoProbeInlineTree *Root = It->second;
189-
assert(Root && "Malformed TopLevelGUIDToInlineTree");
198+
const MCDecodedPseudoProbe &Probe = *Probes.begin();
199+
const MCDecodedPseudoProbeInlineTree *Root = Probe.getInlineTreeNode();
200+
while (Root->hasInlineSite())
201+
Root = (const MCDecodedPseudoProbeInlineTree *)Root->Parent;
190202
uint32_t Index = 0;
191203
uint32_t PrevParent = 0;
192204
uint32_t PrevGUIDIdx = 0;
@@ -230,7 +242,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
230242
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
231243
if (PseudoProbeDecoder && BF.getGUID()) {
232244
std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
233-
convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF.getGUID());
245+
convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF);
234246
}
235247

236248
BinaryFunction::BasicBlockOrderType Order;
@@ -343,12 +355,13 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
343355
const AddressProbesMap &ProbeMap =
344356
PseudoProbeDecoder->getAddress2ProbesMap();
345357
const uint64_t FuncAddr = BF.getAddress();
346-
const std::pair<uint64_t, uint64_t> &BlockRange =
347-
BB->getInputAddressRange();
348-
const std::pair<uint64_t, uint64_t> BlockAddrRange = {
349-
FuncAddr + BlockRange.first, FuncAddr + BlockRange.second};
350-
auto Probes = ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second);
351-
YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId);
358+
auto [Start, End] = BB->getInputAddressRange();
359+
Start += FuncAddr;
360+
End += FuncAddr;
361+
BlockProbeCtx Ctx;
362+
for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(Start, End))
363+
Ctx.addBlockProbe(InlineTreeNodeId, Probe, Probe.getAddress() - Start);
364+
Ctx.finalize(YamlBB);
352365
}
353366

354367
YamlBF.Blocks.emplace_back(YamlBB);

0 commit comments

Comments
 (0)