-
Notifications
You must be signed in to change notification settings - Fork 15k
[BOLT] Match blocks with pseudo probes #99891
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 22 commits
5a5991a
94ffb45
0274f69
7e3d8d6
780a07e
1638ac1
144716b
2934710
b74fc8b
c38fb98
b2a3ca7
2eb7bf2
212bd00
eb6dfb9
16b5cfb
799f20c
1e9af7f
e3599d4
33f1b2a
9889f89
022c517
5109893
5bf4220
f1179b1
5076bab
4f2f642
327eb81
37793aa
ba00b22
5e47249
3902eff
d20d4d6
a857d32
cddea6a
9746055
3dcef48
ba149d9
c35e8ac
1c469cf
97f8101
e0a705e
66fe5d5
36197b1
bfa0afc
0f455d0
8fafc04
b1be6e6
4c5156c
205c79c
544a6ad
ee214d5
0bb4e3a
880bd37
2ba5591
75d6229
3b4e3f4
41e1fa0
7ee82b6
648f2bb
e8e1cb9
ebd3acf
c84de42
e7bce6d
2502434
956bcf2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,6 +45,8 @@ namespace opts { | |
|
|
||
| extern cl::opt<bool> TimeRewrite; | ||
| extern cl::OptionCategory BoltOptCategory; | ||
| extern cl::opt<unsigned> Verbosity; | ||
| extern cl::opt<bool> ProfileUsePseudoProbes; | ||
|
|
||
| cl::opt<bool> | ||
| InferStaleProfile("infer-stale-profile", | ||
|
|
@@ -191,6 +193,8 @@ struct BlendedBlockHash { | |
| /// release. | ||
| class StaleMatcher { | ||
| public: | ||
| StaleMatcher(const uint64_t YamlBFGUID) : YamlBFGUID(YamlBFGUID) {} | ||
|
|
||
| /// Initialize stale matcher. | ||
| void init(const std::vector<FlowBlock *> &Blocks, | ||
| const std::vector<BlendedBlockHash> &Hashes, | ||
|
|
@@ -208,11 +212,33 @@ class StaleMatcher { | |
| } | ||
| } | ||
|
|
||
| /// Find the most similar block for a given hash. | ||
| const FlowBlock *matchBlock(BlendedBlockHash BlendedHash, | ||
| uint64_t CallHash) const { | ||
| /// Creates a mapping from a pseudo probe index to pseudo probe. | ||
| void mapIndexToProbe(uint64_t Index, const MCDecodedPseudoProbe *Probe) { | ||
| IndexToBBPseudoProbes[Index].push_back(Probe); | ||
| } | ||
|
|
||
| /// Creates a mapping from a pseudo probe to a flow block. | ||
| void mapProbeToBB(const MCDecodedPseudoProbe *Probe, FlowBlock *Block) { | ||
| BBPseudoProbeToBlock[Probe] = Block; | ||
| } | ||
|
|
||
| /// Find the most similar flow block for a profile block given its hashes and | ||
| /// pseudo probe information. | ||
| const FlowBlock * | ||
| matchBlock(BlendedBlockHash BlendedHash, uint64_t CallHash, | ||
| const std::vector<yaml::bolt::PseudoProbeInfo> &PseudoProbes) { | ||
| const FlowBlock *BestBlock = matchWithOpcodes(BlendedHash); | ||
| return BestBlock ? BestBlock : matchWithCalls(BlendedHash, CallHash); | ||
| if (BestBlock) { | ||
| ++MatchedWithOpcodes; | ||
| return BestBlock; | ||
| } | ||
| BestBlock = matchWithCalls(BlendedHash, CallHash); | ||
| if (BestBlock) | ||
| return BestBlock; | ||
| BestBlock = matchWithPseudoProbes(BlendedHash, PseudoProbes); | ||
aaupov marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if (BestBlock) | ||
| MatchedWithPseudoProbes.insert(BlendedHash.combine()); | ||
| return BestBlock; | ||
| } | ||
|
|
||
| /// Returns true if the two basic blocks (in the binary and in the profile) | ||
|
|
@@ -223,10 +249,31 @@ class StaleMatcher { | |
| return Hash1.InstrHash == Hash2.InstrHash; | ||
| } | ||
|
|
||
| /// Returns true if a profiled block was matched with its pseudo probe. | ||
| bool isPseudoProbeMatch(BlendedBlockHash YamlBBHash) { | ||
| return MatchedWithPseudoProbes.find(YamlBBHash.combine()) != | ||
| MatchedWithPseudoProbes.end(); | ||
| } | ||
|
|
||
| /// Returns the number of blocks matched with opcodes. | ||
| size_t getNumBlocksMatchedWithOpcodes() const { return MatchedWithOpcodes; } | ||
|
|
||
| /// Returns the number of blocks matched with pseudo probes. | ||
| size_t getNumBlocksMatchedWithPseudoProbes() const { | ||
| return MatchedWithPseudoProbes.size(); | ||
| } | ||
|
|
||
| private: | ||
| using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>; | ||
| std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks; | ||
| std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks; | ||
| std::unordered_map<uint64_t, std::vector<const MCDecodedPseudoProbe *>> | ||
shawbyoung marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| IndexToBBPseudoProbes; | ||
| std::unordered_map<const MCDecodedPseudoProbe *, FlowBlock *> | ||
| BBPseudoProbeToBlock; | ||
| std::unordered_set<uint64_t> MatchedWithPseudoProbes; | ||
| const uint64_t YamlBFGUID{0}; | ||
| uint64_t MatchedWithOpcodes{0}; | ||
|
|
||
| // Uses OpcodeHash to find the most similar block for a given hash. | ||
| const FlowBlock *matchWithOpcodes(BlendedBlockHash BlendedHash) const { | ||
|
|
@@ -266,6 +313,65 @@ class StaleMatcher { | |
| } | ||
| return BestBlock; | ||
| } | ||
| // Uses pseudo probe information to attach the profile to the appropriate | ||
| // block. | ||
| const FlowBlock *matchWithPseudoProbes( | ||
| BlendedBlockHash BlendedHash, | ||
| const std::vector<yaml::bolt::PseudoProbeInfo> &PseudoProbes) const { | ||
| if (!YamlBFGUID) | ||
| return nullptr; | ||
|
|
||
| if (opts::Verbosity >= 3) | ||
| outs() << "BOLT-INFO: attempting to match block with pseudo probes\n"; | ||
shawbyoung marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| // Searches for the pseudo probe attached to the matched function's block, | ||
| // ignoring pseudo probes attached to function calls and inlined functions' | ||
| // blocks. | ||
| std::vector<const yaml::bolt::PseudoProbeInfo *> BlockPseudoProbes; | ||
shawbyoung marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| for (const auto &PseudoProbe : PseudoProbes) { | ||
| // Ensures that pseudo probe information belongs to the appropriate | ||
| // function and not an inlined function. | ||
| if (PseudoProbe.GUID != YamlBFGUID) | ||
| continue; | ||
| // Skips pseudo probes attached to function calls. | ||
| if (PseudoProbe.Type != static_cast<uint8_t>(PseudoProbeType::Block)) | ||
| continue; | ||
|
|
||
| BlockPseudoProbes.push_back(&PseudoProbe); | ||
| } | ||
| // Returns nullptr if there is not a 1:1 mapping of the yaml block pseudo | ||
| // probe and binary pseudo probe. | ||
| if (BlockPseudoProbes.size() == 0) { | ||
| if (opts::Verbosity >= 3) | ||
| errs() << "BOLT-WARNING: no pseudo probes in profile block\n"; | ||
shawbyoung marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return nullptr; | ||
| } | ||
| if (BlockPseudoProbes.size() > 1) { | ||
shawbyoung marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if (opts::Verbosity >= 3) | ||
| errs() << "BOLT-WARNING: more than 1 pseudo probes in profile block\n"; | ||
| return nullptr; | ||
| } | ||
| uint64_t Index = BlockPseudoProbes[0]->Index; | ||
| auto It = IndexToBBPseudoProbes.find(Index); | ||
| if (It == IndexToBBPseudoProbes.end()) { | ||
| if (opts::Verbosity >= 3) | ||
| errs() << "BOLT-WARNING: no block pseudo probes found within binary " | ||
| "block at index\n"; | ||
| return nullptr; | ||
| } | ||
| if (It->second.size() > 1) { | ||
| if (opts::Verbosity >= 3) | ||
shawbyoung marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| errs() << "BOLT-WARNING: more than 1 block pseudo probes in binary " | ||
| "block at index\n"; | ||
| return nullptr; | ||
| } | ||
| const MCDecodedPseudoProbe *BinaryPseudoProbe = It->second[0]; | ||
| auto BinaryPseudoProbeIt = BBPseudoProbeToBlock.find(BinaryPseudoProbe); | ||
| assert(BinaryPseudoProbeIt != BBPseudoProbeToBlock.end() && | ||
| "All binary pseudo probes should belong a binary basic block"); | ||
|
|
||
| return BinaryPseudoProbeIt->second; | ||
| } | ||
| }; | ||
|
|
||
| void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const { | ||
|
|
@@ -447,15 +553,36 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) { | |
| /// of the basic blocks in the binary, the count is "matched" to the block. | ||
| /// Similarly, if both the source and the target of a count in the profile are | ||
| /// matched to a jump in the binary, the count is recorded in CFG. | ||
| size_t | ||
| matchWeightsByHashes(BinaryContext &BC, | ||
| const BinaryFunction::BasicBlockOrderType &BlockOrder, | ||
| const yaml::bolt::BinaryFunctionProfile &YamlBF, | ||
| FlowFunction &Func, HashFunction HashFunction, | ||
| YAMLProfileReader::ProfileLookupMap &IdToYamlBF) { | ||
| size_t matchWeightsByHashes( | ||
aaupov marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder, | ||
| const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func, | ||
| HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF, | ||
| const BinaryFunction &BF) { | ||
|
|
||
| assert(Func.Blocks.size() == BlockOrder.size() + 2); | ||
|
|
||
| // Sets the YamlBFGUID in the StaleMatcher such that if either the profiled or | ||
| // binary function dne or they are not equal, to zero, as not to perform | ||
| // pseudo probe block matching. Otherwise, the YamlBF's GUID is used for | ||
| // pseudo probe block matching. | ||
| const MCPseudoProbeDecoder *PseudoProbeDecoder = | ||
| opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr; | ||
| uint64_t BFPseudoProbeDescHash = 0; | ||
| if (opts::ProfileUsePseudoProbes && BF.getGUID() != 0) { | ||
| assert(PseudoProbeDecoder && | ||
| "If BF has pseudo probe, BC should have a pseudo probe decoder"); | ||
| auto &GUID2FuncDescMap = PseudoProbeDecoder->getGUID2FuncDescMap(); | ||
| auto It = GUID2FuncDescMap.find(BF.getGUID()); | ||
| if (It != GUID2FuncDescMap.end()) | ||
| BFPseudoProbeDescHash = It->second.FuncHash; | ||
| } | ||
| uint64_t YamlBFGUID = | ||
| BFPseudoProbeDescHash && YamlBF.PseudoProbeDescHash && | ||
| BFPseudoProbeDescHash == YamlBF.PseudoProbeDescHash | ||
| ? static_cast<uint64_t>(YamlBF.GUID) | ||
| : 0; | ||
|
|
||
| StaleMatcher Matcher(YamlBFGUID); | ||
| std::vector<uint64_t> CallHashes; | ||
| std::vector<FlowBlock *> Blocks; | ||
| std::vector<BlendedBlockHash> BlendedHashes; | ||
|
|
@@ -478,10 +605,31 @@ matchWeightsByHashes(BinaryContext &BC, | |
| Blocks.push_back(&Func.Blocks[I + 1]); | ||
| BlendedBlockHash BlendedHash(BB->getHash()); | ||
| BlendedHashes.push_back(BlendedHash); | ||
| // Collects pseudo probes attached to the BB for use in the StaleMatcher. | ||
| if (opts::ProfileUsePseudoProbes && PseudoProbeDecoder) { | ||
| const AddressProbesMap &ProbeMap = | ||
| PseudoProbeDecoder->getAddress2ProbesMap(); | ||
| const uint64_t FuncAddr = BF.getAddress(); | ||
| const std::pair<uint64_t, uint64_t> &BlockRange = | ||
| BB->getInputAddressRange(); | ||
| const auto &BlockProbes = | ||
| llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first), | ||
| ProbeMap.lower_bound(FuncAddr + BlockRange.second)); | ||
| for (const auto &[_, Probes] : BlockProbes) { | ||
| for (const MCDecodedPseudoProbe &Probe : Probes) { | ||
| if (Probe.getInlineTreeNode()->hasInlineSite()) | ||
shawbyoung marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
aaupov marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| continue; | ||
| if (Probe.getType() != static_cast<uint8_t>(PseudoProbeType::Block)) | ||
| continue; | ||
| Matcher.mapIndexToProbe(Probe.getIndex(), &Probe); | ||
| Matcher.mapProbeToBB(&Probe, Blocks[I]); | ||
|
||
| } | ||
| } | ||
| } | ||
|
|
||
| LLVM_DEBUG(dbgs() << "BB with index " << I << " has hash = " | ||
| << Twine::utohexstr(BB->getHash()) << "\n"); | ||
| } | ||
| StaleMatcher Matcher; | ||
| Matcher.init(Blocks, BlendedHashes, CallHashes); | ||
|
|
||
| // Index in yaml profile => corresponding (matched) block | ||
|
|
@@ -502,7 +650,7 @@ matchWeightsByHashes(BinaryContext &BC, | |
| else | ||
| llvm_unreachable("Unhandled HashFunction"); | ||
| } | ||
| MatchedBlock = Matcher.matchBlock(YamlHash, CallHash); | ||
| MatchedBlock = Matcher.matchBlock(YamlHash, CallHash, YamlBB.PseudoProbes); | ||
| if (MatchedBlock == nullptr && YamlBB.Index == 0) | ||
| MatchedBlock = Blocks[0]; | ||
| if (MatchedBlock != nullptr) { | ||
|
|
@@ -516,9 +664,13 @@ matchWeightsByHashes(BinaryContext &BC, | |
| << "\n"); | ||
| // Update matching stats accounting for the matched block. | ||
| if (Matcher.isHighConfidenceMatch(BinHash, YamlHash)) { | ||
| ++BC.Stats.NumMatchedBlocks; | ||
| BC.Stats.MatchedSampleCount += YamlBB.ExecCount; | ||
| ++BC.Stats.NumExactMatchedBlocks; | ||
| BC.Stats.ExactMatchedSampleCount += YamlBB.ExecCount; | ||
| LLVM_DEBUG(dbgs() << " exact match\n"); | ||
| } else if (Matcher.isPseudoProbeMatch(YamlHash)) { | ||
| ++BC.Stats.NumPseudoProbeMatchedBlocks; | ||
| BC.Stats.PseudoProbeMatchedSampleCount += YamlBB.ExecCount; | ||
| LLVM_DEBUG(dbgs() << " pseudo probe match\n"); | ||
| } else { | ||
| LLVM_DEBUG(dbgs() << " loose match\n"); | ||
| } | ||
|
|
@@ -535,6 +687,13 @@ matchWeightsByHashes(BinaryContext &BC, | |
| BC.Stats.StaleSampleCount += YamlBB.ExecCount; | ||
| } | ||
|
|
||
| if (opts::Verbosity >= 2) { | ||
| outs() << "BOLT-INFO: " << Matcher.getNumBlocksMatchedWithPseudoProbes() | ||
| << " blocks matched with pseudo probes\n" | ||
| << "BOLT-INFO: " << Matcher.getNumBlocksMatchedWithOpcodes() | ||
| << " blocks matched with opcodes\n"; | ||
| } | ||
|
|
||
| // Match jumps from the profile to the jumps from CFG | ||
| std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0); | ||
| std::vector<uint64_t> InWeight(Func.Blocks.size(), 0); | ||
|
|
@@ -828,7 +987,7 @@ bool YAMLProfileReader::inferStaleProfile( | |
| // Match as many block/jump counts from the stale profile as possible | ||
| size_t MatchedBlocks = | ||
| matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func, | ||
| YamlBP.Header.HashFunction, IdToYamLBF); | ||
| YamlBP.Header.HashFunction, IdToYamLBF, BF); | ||
|
|
||
| // Adjust the flow function by marking unreachable blocks Unlikely so that | ||
| // they don't get any counts assigned. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| ## Tests stale block matching with pseudo probes. | ||
|
|
||
| # REQUIRES: system-linux | ||
| # RUN: split-file %s %t | ||
| # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o | ||
| # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib | ||
| # RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \ | ||
| # RUN: --print-cfg --funcs=main --profile-ignore-hash=0 --infer-stale-profile --profile-use-pseudo-probes 2>&1 | FileCheck %s | ||
|
|
||
| # CHECK: BOLT-INFO: inference found a pseudo probe match for 100.00% of basic blocks (1 out of 1 stale) responsible for -nan% samples (0 out of 0 stale) | ||
|
|
||
| #--- main.s | ||
| .text | ||
| .globl main # -- Begin function main | ||
| .p2align 4, 0x90 | ||
| .type main,@function | ||
| main: # @main | ||
| # %bb.0: | ||
| pushq %rbp | ||
| movq %rsp, %rbp | ||
| movl $0, -4(%rbp) | ||
| .pseudoprobe 15822663052811949562 1 0 0 main | ||
aaupov marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| xorl %eax, %eax | ||
| popq %rbp | ||
| retq | ||
| .Lfunc_end0: | ||
| .size main, .Lfunc_end0-main | ||
| # -- End function | ||
| .section .pseudo_probe_desc,"",@progbits | ||
| .quad -2624081020897602054 | ||
| .quad 4294967295 | ||
| .byte 4 | ||
| .ascii "main" | ||
| .ident "clang version 17.0.6 (CentOS 17.0.6-5.el9)" | ||
| .section ".note.GNU-stack","",@progbits | ||
| .addrsig | ||
|
|
||
| #--- yaml | ||
| --- | ||
| header: | ||
| profile-version: 1 | ||
| binary-name: 'match-blocks-with-pseudo-probes.s.tmp.exe' | ||
| binary-build-id: '<unknown>' | ||
| profile-flags: [ lbr ] | ||
| profile-origin: branch profile reader | ||
| profile-events: '' | ||
| dfs-order: false | ||
| hash-func: xxh3 | ||
| functions: | ||
| - name: main | ||
| fid: 0 | ||
| hash: 0x0000000000000001 | ||
| exec: 1 | ||
| nblocks: 6 | ||
| guid: 0xDB956436E78DD5FA | ||
| pseudo_probe_desc_hash: 4294967295 | ||
| blocks: | ||
| - bid: 1 | ||
| hash: 0x0000000000000001 | ||
| insns: 1 | ||
| succ: [ { bid: 3, cnt: 1} ] | ||
| pseudo_probes: [ { guid: 0xDB956436E78DD5FA, id: 1, type: 0 } ] | ||
Uh oh!
There was an error while loading. Please reload this page.