diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 47ee892728ad0..876a5de52e019 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -757,10 +757,8 @@ class BinaryContext { uint32_t NumExactMatchedBlocks{0}; /// the number of loosely matched basic blocks uint32_t NumLooseMatchedBlocks{0}; - /// the number of exactly pseudo probe matched basic blocks - uint32_t NumPseudoProbeExactMatchedBlocks{0}; - /// the number of loosely pseudo probe matched basic blocks - uint32_t NumPseudoProbeLooseMatchedBlocks{0}; + /// the number of pseudo probe matched basic blocks + uint32_t NumPseudoProbeMatchedBlocks{0}; /// the number of call matched basic blocks uint32_t NumCallMatchedBlocks{0}; /// the total count of samples in the profile @@ -769,10 +767,8 @@ class BinaryContext { uint64_t ExactMatchedSampleCount{0}; /// the count of loosely matched samples uint64_t LooseMatchedSampleCount{0}; - /// the count of exactly pseudo probe matched samples - uint64_t PseudoProbeExactMatchedSampleCount{0}; - /// the count of loosely pseudo probe matched samples - uint64_t PseudoProbeLooseMatchedSampleCount{0}; + /// the count of pseudo probe matched samples + uint64_t PseudoProbeMatchedSampleCount{0}; /// the count of call matched samples uint64_t CallMatchedSampleCount{0}; } Stats; diff --git a/bolt/include/bolt/Profile/YAMLProfileReader.h b/bolt/include/bolt/Profile/YAMLProfileReader.h index 91f51f30e3ca4..781b89dd8aed9 100644 --- a/bolt/include/bolt/Profile/YAMLProfileReader.h +++ b/bolt/include/bolt/Profile/YAMLProfileReader.h @@ -45,9 +45,6 @@ class YAMLProfileReader : public ProfileReaderBase { using ProfileLookupMap = DenseMap; - using GUIDInlineTreeMap = - std::unordered_map; - /// A class for matching binary functions in functions in the YAML profile. /// First, a call graph is constructed for both profiled and binary functions. /// Then functions are hashed based on the names of their callee/caller @@ -110,51 +107,40 @@ class YAMLProfileReader : public ProfileReaderBase { // (profile) (binary) // | blocks ^ // v | - // yaml::bolt::BinaryBasicBlockProfile ~= FlowBlock - // ||| probes ^ (majority vote) - // v ||| BBPseudoProbeToBlock - // yaml::bolt::PseudoProbeInfo MCDecodedPseudoProbe + // BinaryBasicBlockProfile BasicBlock + // | probes ^ + // v | address + // PseudoProbeInfo MCDecodedPseudoProbe // | InlineTreeIndex ^ // v | probe id // [ profile node id (uint32_t) -> MCDecodedPseudoProbeInlineTree *] // InlineTreeNodeMapTy - class InlineTreeNodeMapTy { - DenseMap Map; - - void mapInlineTreeNode(uint32_t ProfileNodeIdx, - const MCDecodedPseudoProbeInlineTree *BinaryNode) { - auto Res = Map.try_emplace(ProfileNodeIdx, BinaryNode); - assert(Res.second && - "Duplicate mapping from profile node index to binary inline tree"); - (void)Res; - } - - public: - /// Returns matched InlineTree * for a given profile inline_tree_id. - const MCDecodedPseudoProbeInlineTree * - getInlineTreeNode(uint32_t ProfileInlineTreeNodeId) const { - auto It = Map.find(ProfileInlineTreeNodeId); - if (It == Map.end()) - return nullptr; - return It->second; - } + using InlineTreeNodeMapTy = + std::vector; - // Match up \p YamlInlineTree with binary inline tree rooted at \p Root. - // Return the number of matched nodes. - // - // This function populates the mapping from profile inline tree node id to a - // corresponding binary MCDecodedPseudoProbeInlineTree node. - size_t matchInlineTrees( - const MCPseudoProbeDecoder &Decoder, - const std::vector &YamlInlineTree, - const MCDecodedPseudoProbeInlineTree *Root); - }; - - // Partial probe matching specification: matched inline tree and corresponding - // BinaryFunctionProfile + // Match up \p YamlInlineTree with binary inline tree rooted at \p Root. + // Return the number of matched nodes. + // + // This function populates the \p Map indexed by \p YamlInlineTree from + // profile inline tree node id to a corresponding binary + // MCDecodedPseudoProbeInlineTree node on a successful match or nullptr. + size_t matchInlineTreesImpl( + BinaryFunction &BF, yaml::bolt::BinaryFunctionProfile &YamlBF, + const MCDecodedPseudoProbeInlineTree &Root, uint32_t RootIdx, + ArrayRef YamlInlineTree, + MutableArrayRef Map, float Scale); + + /// Support mapping the profile to the same binary function more than once. + /// The index is used to check and prevent recursive (backtracking) matches. + /// The value is a node map and scale factor, used to downscale outlined + /// profile when inlining it into the function. + using RootIdxToMapTy = + std::map>; + /// Probe matching specification: map from \p BinaryFunctionProfile to the + /// partial match specification \p RootIdxToMapTy. using ProbeMatchSpec = - std::pair>; + std::unordered_map; private: /// Adjustments for basic samples profiles (without LBR). @@ -183,18 +169,43 @@ class YAMLProfileReader : public ProfileReaderBase { /// Map a common LTO prefix to a set of binary functions. StringMap> LTOCommonNameFunctionMap; + /// For pseudo probe function matching. + /// Set of profile GUIDs. + std::unordered_set YamlGUIDs; + /// Map binary function GUID to binary function. + std::unordered_multimap GUIDToBF; + /// Function names in profile. StringSet<> ProfileFunctionNames; /// BinaryFunction pointers indexed by YamlBP functions. std::vector ProfileBFs; - // Pseudo probe function GUID to inline tree node - GUIDInlineTreeMap TopLevelGUIDToInlineTree; - // Mapping from a binary function to its partial match specification // (YAML profile and its inline tree mapping to binary). - DenseMap> BFToProbeMatchSpecs; + DenseMap BFToProbeMatchSpecs; + + /// Pseudo probe matching stats. + struct { + // Inline tree root mismatch causes. + uint64_t MismatchingRootGUID{0}; + uint64_t MismatchingRootHash{0}; + // Inline tree internal node mismatch causes. + uint64_t MismatchingNodeHash{0}; + uint64_t MissingProfileNode{0}; + // Incomplete profile causes. + uint64_t MissingCallProbe{0}; + uint64_t MissingCallee{0}; + uint64_t MissingInlineTree{0}; + uint64_t TotalCallSites{0}; + uint64_t MissingCallCount{0}; + uint64_t TotalCallCount{0}; + // Total matched stats. + uint64_t MatchedRoots{0}; + uint64_t MatchedNodes{0}; + uint64_t AttemptedNodes{0}; + uint64_t AttemptedRoots{0}; + } ProbeMatchingStats; /// Populate \p Function profile with the one supplied in YAML format. bool parseFunctionProfile(BinaryFunction &Function, @@ -207,7 +218,7 @@ class YAMLProfileReader : public ProfileReaderBase { /// Infer function profile from stale data (collected on older binaries). bool inferStaleProfile(BinaryFunction &Function, const yaml::bolt::BinaryFunctionProfile &YamlBF, - const ArrayRef ProbeMatchSpecs); + const ProbeMatchSpec &ProbeMatchSpecs); /// Initialize maps for profile matching. void buildNameMaps(BinaryContext &BC); @@ -242,6 +253,18 @@ class YAMLProfileReader : public ProfileReaderBase { ProfiledFunctions.emplace(&BF); } + /// Return a top-level binary inline tree node for a given \p BF + const MCDecodedPseudoProbeInlineTree * + lookupTopLevelNode(const BinaryFunction &BF); + + /// Match up \p BF binary inline trees starting at root or \p Node if set, and + /// \p YamlBF profile starting at \p NodeIdx and record the mapping in + /// BFToProbeMatchSpecs. + void matchInlineTrees(BinaryFunction &BF, + const MCDecodedPseudoProbeInlineTree *Node, + yaml::bolt::BinaryFunctionProfile &YamlBF, + uint32_t NodeIdx, float Scale); + /// Check if the profile uses an event with a given \p Name. bool usesEvent(StringRef Name) const; }; diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index 00680045882fc..e10d60f84affd 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1552,25 +1552,14 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { 100.0 * BC.Stats.ExactMatchedSampleCount / BC.Stats.StaleSampleCount, BC.Stats.ExactMatchedSampleCount, BC.Stats.StaleSampleCount); BC.outs() << format( - "BOLT-INFO: inference found an exact pseudo probe match for %.2f%% of " + "BOLT-INFO: inference found pseudo probe match for %.2f%% of " "basic blocks (%zu out of %zu stale) responsible for %.2f%% samples" " (%zu out of %zu stale)\n", - 100.0 * BC.Stats.NumPseudoProbeExactMatchedBlocks / - BC.Stats.NumStaleBlocks, - BC.Stats.NumPseudoProbeExactMatchedBlocks, BC.Stats.NumStaleBlocks, - 100.0 * BC.Stats.PseudoProbeExactMatchedSampleCount / + 100.0 * BC.Stats.NumPseudoProbeMatchedBlocks / BC.Stats.NumStaleBlocks, + BC.Stats.NumPseudoProbeMatchedBlocks, BC.Stats.NumStaleBlocks, + 100.0 * BC.Stats.PseudoProbeMatchedSampleCount / BC.Stats.StaleSampleCount, - BC.Stats.PseudoProbeExactMatchedSampleCount, BC.Stats.StaleSampleCount); - BC.outs() << format( - "BOLT-INFO: inference found a loose pseudo probe match for %.2f%% of " - "basic blocks (%zu out of %zu stale) responsible for %.2f%% samples" - " (%zu out of %zu stale)\n", - 100.0 * BC.Stats.NumPseudoProbeLooseMatchedBlocks / - BC.Stats.NumStaleBlocks, - BC.Stats.NumPseudoProbeLooseMatchedBlocks, BC.Stats.NumStaleBlocks, - 100.0 * BC.Stats.PseudoProbeLooseMatchedSampleCount / - BC.Stats.StaleSampleCount, - BC.Stats.PseudoProbeLooseMatchedSampleCount, BC.Stats.StaleSampleCount); + BC.Stats.PseudoProbeMatchedSampleCount, BC.Stats.StaleSampleCount); BC.outs() << format( "BOLT-INFO: inference found a call match for %.2f%% of basic " "blocks" diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp index f1cffde975520..3d3d9384acc54 100644 --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -217,47 +217,23 @@ class StaleMatcher { } } - /// Creates a mapping from a pseudo probe to a flow block. - void mapProbeToBB(const MCDecodedPseudoProbe *Probe, FlowBlock *Block) { - BBPseudoProbeToBlock[Probe] = Block; - } - enum MatchMethod : char { MATCH_EXACT = 0, - MATCH_PROBE_EXACT, - MATCH_PROBE_LOOSE, MATCH_OPCODE, MATCH_CALL, NO_MATCH }; - /// Find the most similar flow block for a profile block given blended hash. + /// Find the most similar flow block for a profile block given its hashes. std::pair - matchBlockStrict(BlendedBlockHash BlendedHash) { + matchBlock(BlendedBlockHash BlendedHash, uint64_t CallHash) { const auto &[Block, ExactHash] = matchWithOpcodes(BlendedHash); if (Block && ExactHash) return {Block, MATCH_EXACT}; - return {nullptr, NO_MATCH}; - } - - /// Find the most similar flow block for a profile block given pseudo probes. - std::pair matchBlockProbe( - const ArrayRef PseudoProbes, - const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) { - const auto &[ProbeBlock, ExactProbe] = - matchWithPseudoProbes(PseudoProbes, InlineTreeNodeMap); - if (ProbeBlock) - return {ProbeBlock, ExactProbe ? MATCH_PROBE_EXACT : MATCH_PROBE_LOOSE}; - return {nullptr, NO_MATCH}; - } - - /// Find the most similar flow block for a profile block given its hashes. - std::pair - matchBlockLoose(BlendedBlockHash BlendedHash, uint64_t CallHash) { if (const FlowBlock *CallBlock = matchWithCalls(BlendedHash, CallHash)) return {CallBlock, MATCH_CALL}; - if (const FlowBlock *OpcodeBlock = matchWithOpcodes(BlendedHash).first) - return {OpcodeBlock, MATCH_OPCODE}; + if (Block) + return {Block, MATCH_OPCODE}; return {nullptr, NO_MATCH}; } @@ -273,7 +249,6 @@ class StaleMatcher { using HashBlockPairType = std::pair; std::unordered_map> OpHashToBlocks; std::unordered_map> CallHashToBlocks; - DenseMap BBPseudoProbeToBlock; // Uses OpcodeHash to find the most similar block for a given hash. std::pair @@ -316,57 +291,6 @@ class StaleMatcher { } return BestBlock; } - - /// Matches a profile block with a binary block based on pseudo probes. - /// Returns the best matching block (or nullptr) and whether the match is - /// unambiguous. - std::pair matchWithPseudoProbes( - const ArrayRef BlockPseudoProbes, - const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) const { - - if (!opts::StaleMatchingWithPseudoProbes) - return {nullptr, false}; - - DenseMap FlowBlockMatchCount; - - auto matchProfileProbeToBlock = [&](uint32_t NodeId, - uint64_t ProbeId) -> const FlowBlock * { - const MCDecodedPseudoProbeInlineTree *BinaryNode = - InlineTreeNodeMap.getInlineTreeNode(NodeId); - if (!BinaryNode) - return nullptr; - const MCDecodedPseudoProbe Dummy(0, ProbeId, PseudoProbeType::Block, 0, 0, - nullptr); - ArrayRef BinaryProbes = BinaryNode->getProbes(); - auto BinaryProbeIt = llvm::lower_bound( - BinaryProbes, Dummy, [](const auto &LHS, const auto &RHS) { - return LHS.getIndex() < RHS.getIndex(); - }); - if (BinaryProbeIt == BinaryNode->getProbes().end() || - BinaryProbeIt->getIndex() != ProbeId) - return nullptr; - auto It = BBPseudoProbeToBlock.find(&*BinaryProbeIt); - if (It == BBPseudoProbeToBlock.end()) - return nullptr; - return It->second; - }; - - for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) - for (uint32_t Node : ProfileProbe.InlineTreeNodes) - for (uint64_t Probe : ProfileProbe.BlockProbes) - ++FlowBlockMatchCount[matchProfileProbeToBlock(Node, Probe)]; - uint32_t BestMatchCount = 0; - uint32_t TotalMatchCount = 0; - const FlowBlock *BestMatchBlock = nullptr; - for (const auto &[FlowBlock, Count] : FlowBlockMatchCount) { - TotalMatchCount += Count; - if (Count < BestMatchCount || (Count == BestMatchCount && BestMatchBlock)) - continue; - BestMatchBlock = FlowBlock; - BestMatchCount = Count; - } - return {BestMatchBlock, BestMatchCount == TotalMatchCount}; - } }; void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const { @@ -549,15 +473,16 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) { /// of the basic blocks in the binary, the count is "matched" to the block. /// Similarly, if both the source and the target of a count in the profile are /// matched to a jump in the binary, the count is recorded in CFG. -size_t matchWeights( - BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder, - const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func, - HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF, - const BinaryFunction &BF, - const ArrayRef ProbeMatchSpecs); +size_t matchWeights(BinaryContext &BC, + const BinaryFunction::BasicBlockOrderType &BlockOrder, + const yaml::bolt::BinaryFunctionProfile &YamlBF, + FlowFunction &Func, HashFunction HashFunction, + YAMLProfileReader::ProfileLookupMap &IdToYamlBF, + const BinaryFunction &BF, + const YAMLProfileReader::ProbeMatchSpec &ProbeMatchSpecs); std::pair> -initMatcher(BinaryContext &BC, const BinaryFunction &BF, +initMatcher(BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder, FlowFunction &Func, HashFunction HashFunction) { @@ -589,25 +514,11 @@ initMatcher(BinaryContext &BC, const BinaryFunction &BF, << Twine::utohexstr(BB->getHash()) << "\n"); } StaleMatcher Matcher; - // Collects function pseudo probes for use in the StaleMatcher. - if (opts::StaleMatchingWithPseudoProbes) { - const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); - assert(Decoder && - "If pseudo probes are in use, pseudo probe decoder should exist"); - const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap(); - const uint64_t FuncAddr = BF.getAddress(); - for (const MCDecodedPseudoProbe &Probe : - ProbeMap.find(FuncAddr, FuncAddr + BF.getSize())) - if (const BinaryBasicBlock *BB = - BF.getBasicBlockContainingOffset(Probe.getAddress() - FuncAddr)) - Matcher.mapProbeToBB(&Probe, Blocks[BB->getIndex()]); - } Matcher.init(Blocks, BlendedHashes, CallHashes); return {Matcher, BlendedHashes}; } -using FlowBlockTy = - std::pair; +using FlowBlockTy = std::pair; using ProfileBlockMatchMap = DenseMap; // Match \p YamlBF blocks and return a mapping from block index to matched flow @@ -616,47 +527,15 @@ ProfileBlockMatchMap matchBlocks(BinaryContext &BC, const yaml::bolt::BinaryFunctionProfile &YamlBF, HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF, - StaleMatcher &Matcher, ArrayRef BlendedHashes, - const ArrayRef ProbeMatchSpecs) { + StaleMatcher &Matcher, ArrayRef BlendedHashes) { ProfileBlockMatchMap MatchedBlocks; - // Map of FlowBlock and matching method. - DenseMap MatchedFlowBlocks; - - auto addMatchedBlock = - [&](std::pair BlockMethod, - const yaml::bolt::BinaryFunctionProfile &YamlBP, - const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { - const auto &[MatchedBlock, Method] = BlockMethod; - if (!MatchedBlock) - return; - // Don't override earlier matches - if (MatchedFlowBlocks.contains(MatchedBlock)) - return; - MatchedFlowBlocks.try_emplace(MatchedBlock, Method); - MatchedBlocks[YamlBB.Index] = {MatchedBlock, &YamlBB}; - }; - - // Match blocks from the profile to the blocks in CFG by strict hash. + + // Match blocks from the profile to the blocks in CFG. for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { // Update matching stats. ++BC.Stats.NumStaleBlocks; BC.Stats.StaleSampleCount += YamlBB.ExecCount; - assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile"); - BlendedBlockHash YamlHash(YamlBB.Hash); - addMatchedBlock(Matcher.matchBlockStrict(YamlHash), YamlBF, YamlBB); - } - // Match blocks from the profile to the blocks in CFG by pseudo probes. - for (const auto &[InlineNodeMap, YamlBP] : ProbeMatchSpecs) { - if (&YamlBP.get() != &YamlBF) - continue; - for (const yaml::bolt::BinaryBasicBlockProfile &BB : YamlBP.get().Blocks) - if (!BB.PseudoProbes.empty()) - addMatchedBlock(Matcher.matchBlockProbe(BB.PseudoProbes, InlineNodeMap), - YamlBP, BB); - } - // Match blocks from the profile to the blocks in CFG with loose methods. - for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile"); BlendedBlockHash YamlHash(YamlBB.Hash); @@ -670,7 +549,7 @@ matchBlocks(BinaryContext &BC, const yaml::bolt::BinaryFunctionProfile &YamlBF, else llvm_unreachable("Unhandled HashFunction"); } - auto [MatchedBlock, Method] = Matcher.matchBlockLoose(YamlHash, CallHash); + auto [MatchedBlock, Method] = Matcher.matchBlock(YamlHash, CallHash); if (MatchedBlock == nullptr && YamlBB.Index == 0) { MatchedBlock = Matcher.EntryBlock; // Report as loose match @@ -682,20 +561,15 @@ matchBlocks(BinaryContext &BC, const yaml::bolt::BinaryFunctionProfile &YamlBF, << "\n"); continue; } - addMatchedBlock({MatchedBlock, Method}, YamlBF, YamlBB); - } - - for (const auto &[YamlBBIdx, FlowBlockProfile] : MatchedBlocks) { - const auto &[MatchedBlock, YamlBB] = FlowBlockProfile; - StaleMatcher::MatchMethod Method = MatchedFlowBlocks.lookup(MatchedBlock); + MatchedBlocks[YamlBB.Index] = {MatchedBlock, 1}; BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1]; - LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBBIdx << ")" - << " with hash " << Twine::utohexstr(YamlBB->Hash) + LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBB.Index << ")" + << " with hash " << Twine::utohexstr(YamlBB.Hash) << " to BB (index = " << MatchedBlock->Index - 1 << ")" << " with hash " << Twine::utohexstr(BinHash.combine()) << "\n"); (void)BinHash; - uint64_t ExecCount = YamlBB->ExecCount; + uint64_t ExecCount = YamlBB.ExecCount; // Update matching stats accounting for the matched block. switch (Method) { case StaleMatcher::MATCH_EXACT: @@ -703,16 +577,6 @@ matchBlocks(BinaryContext &BC, const yaml::bolt::BinaryFunctionProfile &YamlBF, BC.Stats.ExactMatchedSampleCount += ExecCount; LLVM_DEBUG(dbgs() << " exact match\n"); break; - case StaleMatcher::MATCH_PROBE_EXACT: - ++BC.Stats.NumPseudoProbeExactMatchedBlocks; - BC.Stats.PseudoProbeExactMatchedSampleCount += ExecCount; - LLVM_DEBUG(dbgs() << " exact pseudo probe match\n"); - break; - case StaleMatcher::MATCH_PROBE_LOOSE: - ++BC.Stats.NumPseudoProbeLooseMatchedBlocks; - BC.Stats.PseudoProbeLooseMatchedSampleCount += ExecCount; - LLVM_DEBUG(dbgs() << " loose pseudo probe match\n"); - break; case StaleMatcher::MATCH_CALL: ++BC.Stats.NumCallMatchedBlocks; BC.Stats.CallMatchedSampleCount += ExecCount; @@ -746,8 +610,8 @@ void transferEdgeWeights(ProfileBlockMatchMap &MatchedBlocks, const uint64_t SrcIndex = YamlBB.Index; const uint64_t DstIndex = YamlSI.Index; - const FlowBlock *MatchedSrcBlock = MatchedBlocks.lookup(SrcIndex).first; - const FlowBlock *MatchedDstBlock = MatchedBlocks.lookup(DstIndex).first; + const auto &[MatchedSrcBlock, SrcScale] = MatchedBlocks.lookup(SrcIndex); + const auto &[MatchedDstBlock, DstScale] = MatchedBlocks.lookup(DstIndex); if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) { // Find a jump between the two blocks @@ -760,16 +624,18 @@ void transferEdgeWeights(ProfileBlockMatchMap &MatchedBlocks, } // Assign the weight, if the corresponding jump is found if (Jump != nullptr) { - Jump->Weight = YamlSI.Count; - Jump->HasUnknownWeight = false; + if (uint64_t Count = SrcScale * YamlSI.Count) { + Jump->Weight += Count; + Jump->HasUnknownWeight = false; + } } } // Assign the weight for the src block, if it is found if (MatchedSrcBlock != nullptr) - OutWeight[MatchedSrcBlock->Index] += YamlSI.Count; + OutWeight[MatchedSrcBlock->Index] += SrcScale * YamlSI.Count; // Assign the weight for the dst block, if it is found if (MatchedDstBlock != nullptr) - InWeight[MatchedDstBlock->Index] += YamlSI.Count; + InWeight[MatchedDstBlock->Index] += DstScale * YamlSI.Count; } } } @@ -801,27 +667,79 @@ size_t setBlockWeights(FlowFunction &Func, ArrayRef OutWeight, /// of the basic blocks in the binary, the count is "matched" to the block. /// Similarly, if both the source and the target of a count in the profile are /// matched to a jump in the binary, the count is recorded in CFG. -size_t matchWeights( - BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder, - const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func, - HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF, - const BinaryFunction &BF, - const ArrayRef ProbeMatchSpecs) { +size_t matchWeights(BinaryContext &BC, + const BinaryFunction::BasicBlockOrderType &BlockOrder, + const yaml::bolt::BinaryFunctionProfile &YamlBF, + FlowFunction &Func, HashFunction HashFunction, + YAMLProfileReader::ProfileLookupMap &IdToYamlBF, + const BinaryFunction &BF, + const YAMLProfileReader::ProbeMatchSpec &ProbeMatchSpecs) { using namespace yaml::bolt; assert(Func.Blocks.size() == BlockOrder.size() + 2); auto [Matcher, BlendedHashes] = - initMatcher(BC, BF, BlockOrder, Func, HashFunction); + initMatcher(BC, BlockOrder, Func, HashFunction); + + auto getBlock = [&](const MCDecodedPseudoProbeInlineTree *Node, + uint64_t Id) -> const FlowBlock * { + if (!Node) + return nullptr; + const uint64_t Addr = BF.getAddress(); + const MCDecodedPseudoProbe Dummy(0, Id, PseudoProbeType::Block, 0, 0, + nullptr); + ArrayRef Probes = Node->getProbes(); + auto Cmp = [](auto &L, auto &R) { return L.getIndex() < R.getIndex(); }; + const MCDecodedPseudoProbe *Probe = llvm::lower_bound(Probes, Dummy, Cmp); + if (Probe == Probes.end() || Probe->getIndex() != Id) + return nullptr; + if (auto *BB = BF.getBasicBlockContainingOffset(Probe->getAddress() - Addr)) + return &Func.Blocks[BB->getIndex() + 1]; + return nullptr; + }; // Match jumps from the profile to the jumps from CFG std::vector OutWeight(Func.Blocks.size(), 0); std::vector InWeight(Func.Blocks.size(), 0); - ProfileBlockMatchMap MatchedBlocks = - matchBlocks(BC, YamlBF, HashFunction, IdToYamlBF, Matcher, BlendedHashes, - ProbeMatchSpecs); - transferEdgeWeights(MatchedBlocks, OutWeight, InWeight, YamlBF); + // Match blocks from the profile to the blocks in CFG by pseudo probes. + for (const auto &[YamlBF, StartIdxNodeMap] : ProbeMatchSpecs) { + // Block index => matched block + its block profile + ProfileBlockMatchMap MatchedBlocks; + for (const auto &[RootIdx, NodeMapScale] : StartIdxNodeMap) { + ArrayRef NodeMap = NodeMapScale.first; + const float Scale = NodeMapScale.second; + for (const BinaryBasicBlockProfile &BB : YamlBF->Blocks) { + auto matchProbe = [&](uint32_t Node, uint64_t Id) { + if (const FlowBlock *Block = getBlock(NodeMap[Node], Id)) { + MatchedBlocks[BB.Index] = {Block, Scale}; + ++BC.Stats.NumPseudoProbeMatchedBlocks; + BC.Stats.PseudoProbeMatchedSampleCount += BB.ExecCount; + } + }; + for (const PseudoProbeInfo &PI : BB.PseudoProbes) + for (uint32_t Node : PI.InlineTreeNodes) + for (uint64_t Id : PI.BlockProbes) + matchProbe(Node, Id); + for (const CallSiteInfo &CSI : BB.CallSites) + if (CSI.Probe) + matchProbe(CSI.InlineTreeNode, CSI.Probe); + } + } + ProfileBlockMatchMap StaleMatchedBlocks = matchBlocks( + BC, *YamlBF, HashFunction, IdToYamlBF, Matcher, BlendedHashes); + for (const auto &[Id, FlowBlockPair] : StaleMatchedBlocks) + // TBD: determine scaling factor + if (!MatchedBlocks.contains(Id)) + MatchedBlocks[Id] = FlowBlockPair; + + transferEdgeWeights(MatchedBlocks, OutWeight, InWeight, *YamlBF); + } + if (ProbeMatchSpecs.empty()) { + ProfileBlockMatchMap MatchedBlocks = matchBlocks( + BC, YamlBF, HashFunction, IdToYamlBF, Matcher, BlendedHashes); + transferEdgeWeights(MatchedBlocks, OutWeight, InWeight, YamlBF); + } return setBlockWeights(Func, OutWeight, InWeight); } @@ -1042,7 +960,7 @@ void assignProfile(BinaryFunction &BF, bool YAMLProfileReader::inferStaleProfile( BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF, - const ArrayRef ProbeMatchSpecs) { + const ProbeMatchSpec &ProbeMatchSpecs) { NamedRegionTimer T("inferStaleProfile", "stale profile inference", "rewrite", "Rewrite passes", opts::TimeRewrite); diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 29388380ca216..78cea1f71a66d 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -9,16 +9,20 @@ #include "bolt/Profile/YAMLProfileReader.h" #include "bolt/Core/BinaryBasicBlock.h" #include "bolt/Core/BinaryFunction.h" -#include "bolt/Passes/MCF.h" #include "bolt/Profile/ProfileYAMLMapping.h" #include "bolt/Utils/NameResolver.h" #include "bolt/Utils/Utils.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/edit_distance.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" #include "llvm/Demangle/Demangle.h" #include "llvm/MC/MCPseudoProbe.h" #include "llvm/Support/CommandLine.h" +#undef DEBUG_TYPE +#define DEBUG_TYPE "bolt-prof" + using namespace llvm; namespace opts { @@ -352,11 +356,7 @@ bool YAMLProfileReader::parseFunctionProfile( if (!opts::InferStaleProfile) return false; - ArrayRef ProbeMatchSpecs; - auto BFIt = BFToProbeMatchSpecs.find(&BF); - if (BFIt != BFToProbeMatchSpecs.end()) - ProbeMatchSpecs = BFIt->second; - ProfileMatched = inferStaleProfile(BF, YamlBF, ProbeMatchSpecs); + ProfileMatched = inferStaleProfile(BF, YamlBF, BFToProbeMatchSpecs[&BF]); } if (ProfileMatched) BF.markProfiled(YamlBP.Header.Flags); @@ -364,6 +364,28 @@ bool YAMLProfileReader::parseFunctionProfile( return ProfileMatched; } +// Inline tree: decode index deltas and indirection through \p YamlPD and +// set decoded fields (GUID, Hash, ParentIndex). +// Probe inline tree: move InlineTreeIndex into InlineTreeNodes. +static void +decodeYamlInlineTree(const yaml::bolt::ProfilePseudoProbeDesc &YamlPD, + yaml::bolt::BinaryFunctionProfile &YamlBF) { + uint32_t ParentId = 0; + uint32_t PrevGUIDIdx = 0; + for (yaml::bolt::InlineTreeNode &InlineTreeNode : YamlBF.InlineTree) { + uint32_t GUIDIdx = InlineTreeNode.GUIDIndex; + if (GUIDIdx != UINT32_MAX) + PrevGUIDIdx = GUIDIdx; + else + GUIDIdx = PrevGUIDIdx; + uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx]; + ParentId += InlineTreeNode.ParentIndexDelta; + InlineTreeNode.GUID = YamlPD.GUID[GUIDIdx]; + InlineTreeNode.Hash = YamlPD.Hash[HashIdx]; + InlineTreeNode.ParentIndexDelta = ParentId; + } +} + Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) { ErrorOr> MB = MemoryBuffer::getFileOrSTDIN(Filename); @@ -396,8 +418,11 @@ Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) { // Match profile to function based on a function name. buildNameMaps(BC); - // Preliminary assign function execution count. + // Preliminary assign function execution count and decode pseudo probe info. + const yaml::bolt::ProfilePseudoProbeDesc &YamlPD = YamlBP.PseudoProbeDesc; + YamlGUIDs.insert(YamlPD.GUID.begin(), YamlPD.GUID.end()); for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) { + decodeYamlInlineTree(YamlPD, YamlBF); if (!BF) continue; if (!BF->hasProfile()) { @@ -433,6 +458,22 @@ bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) { return true; } } + if (opts::StaleMatchingWithPseudoProbes) { + SmallVector Suffixes( + {".destroy", ".resume", ".llvm.", ".cold", ".warm"}); + for (const MCSymbol *Sym : BF.getSymbols()) { + StringRef SymName = Sym->getName(); + for (auto Name : {std::optional(NameResolver::restore(SymName)), + getCommonName(SymName, false, Suffixes)}) { + if (!Name) + continue; + SymName = *Name; + uint64_t GUID = Function::getGUIDAssumingExternalLinkage(SymName); + if (YamlGUIDs.count(GUID)) + return true; + } + } + } return false; } @@ -592,59 +633,260 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { return MatchedWithCallGraph; } -size_t YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees( - const MCPseudoProbeDecoder &Decoder, - const std::vector &DecodedInlineTree, - const MCDecodedPseudoProbeInlineTree *Root) { - // Match inline tree nodes by GUID, checksum, parent, and call site. - for (const auto &[InlineTreeNodeId, InlineTreeNode] : - llvm::enumerate(DecodedInlineTree)) { - uint64_t GUID = InlineTreeNode.GUID; - uint64_t Hash = InlineTreeNode.Hash; - uint32_t ParentId = InlineTreeNode.ParentIndexDelta; - uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe; - const MCDecodedPseudoProbeInlineTree *Cur = nullptr; - if (!InlineTreeNodeId) { - Cur = Root; - } else if (const MCDecodedPseudoProbeInlineTree *Parent = - getInlineTreeNode(ParentId)) { - for (const MCDecodedPseudoProbeInlineTree &Child : - Parent->getChildren()) { - if (Child.Guid == GUID) { - if (std::get<1>(Child.getInlineSite()) == CallSiteProbe) - Cur = &Child; - break; - } +const MCDecodedPseudoProbeInlineTree * +YAMLProfileReader::lookupTopLevelNode(const BinaryFunction &BF) { + const BinaryContext &BC = BF.getBinaryContext(); + const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); + assert(Decoder && + "If pseudo probes are in use, pseudo probe decoder should exist"); + uint64_t Addr = BF.getAddress(); + uint64_t Size = BF.getSize(); + auto Probes = Decoder->getAddress2ProbesMap().find(Addr, Addr + Size); + if (Probes.empty()) + return nullptr; + const MCDecodedPseudoProbe &Probe = *Probes.begin(); + const MCDecodedPseudoProbeInlineTree *Root = Probe.getInlineTreeNode(); + while (Root->hasInlineSite()) + Root = (const MCDecodedPseudoProbeInlineTree *)Root->Parent; + return Root; +} + +size_t YAMLProfileReader::matchInlineTreesImpl( + BinaryFunction &BF, yaml::bolt::BinaryFunctionProfile &YamlBF, + const MCDecodedPseudoProbeInlineTree &Root, uint32_t RootIdx, + ArrayRef ProfileInlineTree, + MutableArrayRef Map, float Scale) { + using namespace yaml::bolt; + BinaryContext &BC = BF.getBinaryContext(); + const MCPseudoProbeDecoder &Decoder = *BC.getPseudoProbeDecoder(); + const InlineTreeNode &FuncNode = ProfileInlineTree[RootIdx]; + + using ChildMapTy = + std::unordered_map; + using CallSiteInfoTy = + std::unordered_map; + // Mapping from a parent node id to a map InlineSite -> Child node. + DenseMap ParentToChildren; + // Collect calls in the profile: map from a parent node id to a map + // InlineSite -> CallSiteInfo ptr. + DenseMap ParentToCSI; + for (const BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { + // Collect callees for inlined profile matching, indexed by InlineSite. + for (const CallSiteInfo &CSI : YamlBB.CallSites) { + ProbeMatchingStats.TotalCallCount += CSI.Count; + ++ProbeMatchingStats.TotalCallSites; + if (CSI.Probe == 0) { + LLVM_DEBUG(dbgs() << "no probe for " << CSI.DestId << " " << CSI.Count + << '\n'); + ++ProbeMatchingStats.MissingCallProbe; + ProbeMatchingStats.MissingCallCount += CSI.Count; + continue; + } + const BinaryFunctionProfile *Callee = IdToYamLBF.lookup(CSI.DestId); + if (!Callee) { + LLVM_DEBUG(dbgs() << "no callee for " << CSI.DestId << " " << CSI.Count + << '\n'); + ++ProbeMatchingStats.MissingCallee; + ProbeMatchingStats.MissingCallCount += CSI.Count; + continue; + } + // Get callee GUID + if (Callee->InlineTree.empty()) { + LLVM_DEBUG(dbgs() << "no inline tree for " << Callee->Name << '\n'); + ++ProbeMatchingStats.MissingInlineTree; + ProbeMatchingStats.MissingCallCount += CSI.Count; + continue; + } + uint64_t CalleeGUID = Callee->InlineTree.front().GUID; + ParentToCSI[CSI.InlineTreeNode][InlineSite(CalleeGUID, CSI.Probe)] = &CSI; + } + } + LLVM_DEBUG({ + for (auto &[ParentId, InlineSiteCSI] : ParentToCSI) { + for (auto &[InlineSite, CSI] : InlineSiteCSI) { + auto [CalleeGUID, CallSite] = InlineSite; + errs() << ParentId << "@" << CallSite << "->" + << Twine::utohexstr(CalleeGUID) << ": " << CSI->Count << ", " + << Twine::utohexstr(CSI->Offset) << '\n'; + } + } + }); + + assert(!Root.isRoot()); + LLVM_DEBUG(dbgs() << "matchInlineTreesImpl for " << BF << "@" + << Twine::utohexstr(Root.Guid) << " and " << YamlBF.Name + << "@" << Twine::utohexstr(FuncNode.GUID) << '\n'); + ++ProbeMatchingStats.AttemptedNodes; + ++ProbeMatchingStats.AttemptedRoots; + + // Match profile function with a lead node (top-level function or inlinee) + if (Root.Guid != FuncNode.GUID) { + LLVM_DEBUG(dbgs() << "Mismatching root GUID\n"); + ++ProbeMatchingStats.MismatchingRootGUID; + return 0; + } + { + uint64_t BinaryHash = Decoder.getFuncDescForGUID(Root.Guid)->FuncHash; + uint64_t ProfileHash = FuncNode.Hash; + if (BinaryHash != ProfileHash) { + LLVM_DEBUG(dbgs() << "Mismatching hashes: " + << Twine::utohexstr(BinaryHash) << " " + << Twine::utohexstr(ProfileHash) << '\n'); + ++ProbeMatchingStats.MismatchingRootHash; + return 0; + } + } + assert(!Map[RootIdx]); + Map[RootIdx] = &Root; + ++ProbeMatchingStats.MatchedRoots; + + uint64_t Matched = 1; + for (const auto &[Idx, ProfileNode] : + llvm::drop_begin(llvm::enumerate(ProfileInlineTree), RootIdx + 1)) { + // Can't match children if parent is not matched. + uint32_t ParentIdx = ProfileNode.ParentIndexDelta; + const MCDecodedPseudoProbeInlineTree *Parent = Map[ParentIdx]; + // Exclude nodes with unmatched parent from attempted stats. + if (!Parent) + continue; + ++ProbeMatchingStats.AttemptedNodes; + if (!ParentToChildren.contains(ParentIdx)) + for (const MCDecodedPseudoProbeInlineTree &Child : Parent->getChildren()) + ParentToChildren[ParentIdx].emplace(InlineSite(Child.getInlineSite()), + &Child); + ChildMapTy &ChildMap = ParentToChildren[ParentIdx]; + InlineSite Site(ProfileNode.GUID, ProfileNode.CallSiteProbe); + auto ChildIt = ChildMap.find(Site); + if (ChildIt == ChildMap.end()) { + // No binary inline tree node for a given profile inline tree node => + // the function is inlined in the profile and is outlined in the binary. + // Look up the callee binary function and attach profile inline subtree + // to it. The profile will be additively included into the function. + LLVM_DEBUG({ + dbgs() << "No binary inline tree for " + << Twine::utohexstr(ProfileNode.GUID); + auto G2FDMap = Decoder.getGUID2FuncDescMap(); + auto FDIt = G2FDMap.find(ProfileNode.GUID); + if (FDIt != G2FDMap.end()) + dbgs() << " " << FDIt->FuncName; + dbgs() << '\n'; + }); + for (auto It : llvm::make_range(GUIDToBF.equal_range(ProfileNode.GUID))) { + BinaryFunction *CalleeBF = It.second; + LLVM_DEBUG(dbgs() << "Found outlined function " + << CalleeBF->getPrintName() << '\n'); + matchInlineTrees(*CalleeBF, nullptr, YamlBF, Idx, 1); + } + continue; + } + const MCDecodedPseudoProbeInlineTree *Child = ChildIt->second; + uint64_t BinaryHash = Decoder.getFuncDescForGUID(Child->Guid)->FuncHash; + if (BinaryHash == ProfileNode.Hash) { + // Match inline trees. + assert(!Map[Idx]); + Map[Idx] = Child; + ++Matched; + } else { + LLVM_DEBUG(dbgs() << "Mismatching hashes: " + << Twine::utohexstr(BinaryHash) << " " + << Twine::utohexstr(ProfileNode.Hash) << '\n'); + ++ProbeMatchingStats.MismatchingNodeHash; + } + ChildMap.erase(ChildIt); + } + + LLVM_DEBUG(dbgs() << "Matching outlined binary inline tree nodes\n"); + // Binary inline trees without correspondence in the profile => nodes are + // inlined in the binary and outlined in the profile. Match function-level + // profiles to these nodes. The profile will be scaled down based on call + // site count in the parent profile. + for (const auto &[ParentIdx, ChildMap] : ParentToChildren) { + const auto &CSI = ParentToCSI[ParentIdx]; + for (const auto &[CallSite, Node] : ChildMap) { + uint64_t GUID = Node->Guid; + assert(GUID == std::get<0>(CallSite)); + auto ChildCSIIt = CSI.find(CallSite); + if (ChildCSIIt == CSI.end()) { + LLVM_DEBUG({ + dbgs() << "Can't find profile call site info for " + << Twine::utohexstr(GUID); + auto G2FDMap = Decoder.getGUID2FuncDescMap(); + auto FDIt = G2FDMap.find(GUID); + if (FDIt != G2FDMap.end()) + dbgs() << " " << FDIt->FuncName << " H " + << Twine::utohexstr(FDIt->FuncHash); + dbgs() << '\n'; + }); + ++ProbeMatchingStats.MissingProfileNode; + continue; } + const CallSiteInfo *ChildCSI = ChildCSIIt->second; + assert(ChildCSI); + BinaryFunctionProfile *ChildYamlBF = IdToYamLBF.lookup(ChildCSI->DestId); + assert(ChildYamlBF); + uint64_t ChildExecCount = ChildYamlBF->ExecCount; + uint64_t Freq = ChildCSI->Count; + float ChildScale = ChildExecCount ? 1.f * Freq / ChildExecCount : 1; + LLVM_DEBUG({ + dbgs() << "Match inlined node " << Twine::utohexstr(GUID) + << " at call site " << std::get<1>(CallSite) << " in parent id " + << ParentIdx << " " << Twine::utohexstr(Map[ParentIdx]->Guid) + << " with inline exec count " << Freq + << " and outlined exec count " << ChildExecCount << '\n'; + }); + matchInlineTrees(BF, Node, *ChildYamlBF, 0, /*Scale * */ ChildScale); } - // Don't match nodes if the profile is stale (mismatching binary FuncHash - // and YAML Hash) - if (Cur && Decoder.getFuncDescForGUID(Cur->Guid)->FuncHash == Hash) - mapInlineTreeNode(InlineTreeNodeId, Cur); } - return Map.size(); + LLVM_DEBUG(dbgs() << "matchInlineTreesImpl done for " << BF << "@" + << Twine::utohexstr(Root.Guid) << " and " << YamlBF.Name + << "@" << Twine::utohexstr(FuncNode.GUID) << ": " << Matched + << "/" << Map.size() << '\n'); + ProbeMatchingStats.MatchedNodes += Matched; + return Matched; } -// Decode index deltas and indirection through \p YamlPD. Return modified copy -// of \p YamlInlineTree with populated decoded fields (GUID, Hash, ParentIndex). -static std::vector -decodeYamlInlineTree(const yaml::bolt::ProfilePseudoProbeDesc &YamlPD, - std::vector YamlInlineTree) { - uint32_t ParentId = 0; - uint32_t PrevGUIDIdx = 0; - for (yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) { - uint32_t GUIDIdx = InlineTreeNode.GUIDIndex; - if (GUIDIdx != UINT32_MAX) - PrevGUIDIdx = GUIDIdx; - else - GUIDIdx = PrevGUIDIdx; - uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx]; - ParentId += InlineTreeNode.ParentIndexDelta; - InlineTreeNode.GUID = YamlPD.GUID[GUIDIdx]; - InlineTreeNode.Hash = YamlPD.Hash[HashIdx]; - InlineTreeNode.ParentIndexDelta = ParentId; +void YAMLProfileReader::matchInlineTrees( + BinaryFunction &BF, const MCDecodedPseudoProbeInlineTree *Node, + yaml::bolt::BinaryFunctionProfile &YamlBF, uint32_t RootIdx, float Scale) { + ArrayRef Tree = YamlBF.InlineTree; + size_t Size = Tree.size(); + if (!Size) + return; + BinaryContext &BC = BF.getBinaryContext(); + LLVM_DEBUG(dbgs() << "Match inline trees for " << BF << " @" + << (Node ? Twine::utohexstr(Node->Guid) : "root") << " and " + << YamlBF.Name << "@" << RootIdx << " with scale " << Scale + << '\n'); + + if (!Node) + Node = lookupTopLevelNode(BF); + if (!Node) + return; + RootIdxToMapTy &FuncMatchSpecs = BFToProbeMatchSpecs[&BF][&YamlBF]; + auto MatchIt = FuncMatchSpecs.find(RootIdx); + if (MatchIt != FuncMatchSpecs.end()) { + LLVM_DEBUG(dbgs() << "Duplicate match attempt, skip\n"); + return; } - return YamlInlineTree; + const auto &[It, _] = FuncMatchSpecs.emplace( + RootIdx, std::pair(InlineTreeNodeMapTy(Size), Scale)); + MutableArrayRef Map = It->second.first; + size_t N = matchInlineTreesImpl(BF, YamlBF, *Node, RootIdx, Tree, Map, Scale); + if (N) + YamlBF.Used = true; + LLVM_DEBUG({ + const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); + dbgs() << N << "/" << Size << " match with " << BF << " at "; + // Print the inline context of head probe + ListSeparator LS(" inlined into "); + do { + dbgs() << LS << Decoder->getFuncDescForGUID(Node->Guid)->FuncName << ":" + << std::get<1>(Node->getInlineSite()); + Node = (const MCDecodedPseudoProbeInlineTree *)Node->Parent; + } while (!Node->isRoot()); + dbgs() << '\n'; + }); } size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) { @@ -652,12 +894,15 @@ size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) { return 0; const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); - const yaml::bolt::ProfilePseudoProbeDesc &YamlPD = YamlBP.PseudoProbeDesc; // Set existing BF->YamlBF match into ProbeMatchSpecs for (local) probe // matching. assert(Decoder && "If pseudo probes are in use, pseudo probe decoder should exist"); + for (BinaryFunction *BF : BC.getAllBinaryFunctions()) + if (uint64_t GUID = BF->getGUID()) + GUIDToBF.emplace(GUID, BF); + for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) { // BF is preliminary name-matched function to YamlBF // MatchedBF is final matched function @@ -669,22 +914,75 @@ size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) { uint64_t GUID = BF->getGUID(); if (!GUID) continue; - auto It = TopLevelGUIDToInlineTree.find(GUID); - if (It == TopLevelGUIDToInlineTree.end()) - continue; - const MCDecodedPseudoProbeInlineTree *Node = It->second; - assert(Node && "Malformed TopLevelGUIDToInlineTree"); - auto &MatchSpecs = BFToProbeMatchSpecs[BF]; - auto &InlineTreeMap = - MatchSpecs.emplace_back(InlineTreeNodeMapTy(), YamlBF).first; - std::vector ProfileInlineTree = - decodeYamlInlineTree(YamlPD, YamlBF.InlineTree); - // Erase unsuccessful match - if (!InlineTreeMap.matchInlineTrees(*Decoder, ProfileInlineTree, Node)) - MatchSpecs.pop_back(); - } - - return 0; + // Match YamlBF with BF at top-level inline tree level. + matchInlineTrees(*BF, nullptr, YamlBF, 0, 1); + } + // Remove empty match specs + for (auto &[BF, ProbeMatchSpecs] : BFToProbeMatchSpecs) { + for (auto PMII = ProbeMatchSpecs.begin(); PMII != ProbeMatchSpecs.end();) { + RootIdxToMapTy &RootIdxToMap = PMII->second; + for (auto II = RootIdxToMap.begin(); II != RootIdxToMap.end();) { + if (llvm::any_of(II->second.first, [](auto *P) { return P; })) + ++II; + else + II = RootIdxToMap.erase(II); + } + if (RootIdxToMap.empty()) + PMII = ProbeMatchSpecs.erase(PMII); + else + ++PMII; + } + // Don't drop empty BFToProbeMatchSpecs - used during matching + } + outs() << "BOLT-INFO: pseudo probe profile matching matched " + << formatv("{0:p} ({1}/{2}) ", + 1.0 * ProbeMatchingStats.MatchedRoots / + ProbeMatchingStats.AttemptedRoots, + ProbeMatchingStats.MatchedRoots, + ProbeMatchingStats.AttemptedRoots) + << "inline tree root(s) corresponding to profile or binary functions. " + + << "Couldn't match " << ProbeMatchingStats.MismatchingRootGUID + << " roots with mismatching GUIDs, " + << ProbeMatchingStats.MismatchingRootHash + << " with mismatching hash.\n" + + << "BOLT-INFO: pseudo probe profile matching matched " + << formatv("{0:p} ({1}/{2}) ", + 1.0 * ProbeMatchingStats.MatchedNodes / + ProbeMatchingStats.AttemptedNodes, + ProbeMatchingStats.MatchedNodes, + ProbeMatchingStats.AttemptedNodes) + << "inline tree node(s) corresponding to inlined source functions. " + + << "Couldn't match " << ProbeMatchingStats.MismatchingNodeHash + << " nodes with mismatching hash, " + << ProbeMatchingStats.MissingProfileNode + << " with missing profile node.\n" + + << "BOLT-INFO: the profile has " + << formatv("{0:p} ({1}/{2}) calls missing probe, ", + 1.0 * ProbeMatchingStats.MissingCallProbe / + ProbeMatchingStats.TotalCallSites, + ProbeMatchingStats.MissingCallProbe, + ProbeMatchingStats.TotalCallSites) + << formatv("{0:p} ({1}/{2}) calls missing callee, ", + 1.0 * ProbeMatchingStats.MissingCallee / + ProbeMatchingStats.TotalCallSites, + ProbeMatchingStats.MissingCallee, + ProbeMatchingStats.TotalCallSites) + << formatv("{0:p} ({1}/{2}) calls with callees missing inline trees, ", + 1.0 * ProbeMatchingStats.MissingInlineTree / + ProbeMatchingStats.TotalCallSites, + ProbeMatchingStats.MissingInlineTree, + ProbeMatchingStats.TotalCallSites) + << formatv("covering a total of {0:p} ({1}/{2}) call counts\n", + 1.0 * ProbeMatchingStats.MissingCallCount / + ProbeMatchingStats.TotalCallCount, + ProbeMatchingStats.MissingCallCount, + ProbeMatchingStats.TotalCallCount); + + return BFToProbeMatchSpecs.size(); } size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) { @@ -818,15 +1116,6 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { } } - if (opts::StaleMatchingWithPseudoProbes) { - const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); - assert(Decoder && - "If pseudo probes are in use, pseudo probe decoder should exist"); - for (const MCDecodedPseudoProbeInlineTree &TopLev : - Decoder->getDummyInlineRoot().getChildren()) - TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev; - } - // Map profiled function ids to names. for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) IdToYamLBF[YamlBF.Id] = &YamlBF; @@ -836,8 +1125,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { const size_t MatchedWithLTOCommonName = matchWithLTOCommonName(); const size_t MatchedWithCallGraph = matchWithCallGraph(BC); const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC); - [[maybe_unused]] const size_t MatchedWithPseudoProbes = - matchWithPseudoProbes(BC); + const size_t MatchedWithPseudoProbes = matchWithPseudoProbes(BC); for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF)) @@ -858,6 +1146,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { << " functions with matching LTO common names\n"; outs() << "BOLT-INFO: matched " << MatchedWithCallGraph << " functions with call graph\n"; + outs() << "BOLT-INFO: matched " << MatchedWithPseudoProbes + << " functions with pseudo probes\n"; outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity << " functions with similar names\n"; } @@ -870,9 +1160,17 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { if (BinaryFunction *BF = YamlProfileToFunction.lookup(YamlBF.Id)) parseFunctionProfile(*BF, YamlBF); else - ++NumUnused; + NumUnused += !YamlBF.Used; } + // Partial matching: pass phony profile for binary functions without + // corresponding real (non-partial) profile. + yaml::bolt::BinaryFunctionProfile Phony; + Phony.ExecCount = 1; + for (BinaryFunction *BF : llvm::make_first_range(BFToProbeMatchSpecs)) + if (!ProfiledFunctions.count(BF)) + parseFunctionProfile(*BF, Phony); + BC.setNumUnusedProfiledObjects(NumUnused); if (opts::Lite && diff --git a/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test b/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test index 9224cf163dbcc..beb67277db243 100644 --- a/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test +++ b/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test @@ -5,8 +5,10 @@ # RUN: -o %t.bolt -data %t/yaml -infer-stale-profile -v=2 \ # RUN: --stale-matching-with-pseudo-probes 2>&1 | FileCheck %s +# CHECK: BOLT-INFO: pseudo probe profile matching matched 100.00% (3/3) inline tree root(s) corresponding to profile or binary functions. Couldn't match 0 roots with mismatching GUIDs, 0 with mismatching hash. +# CHECK: BOLT-INFO: pseudo probe profile matching matched 100.00% (6/6) inline tree node(s) corresponding to inlined source functions. Couldn't match 0 nodes with mismatching hash, 0 with missing profile node. # CHECK: BOLT-WARNING: 3 (100.0% of all profiled) functions have invalid (possibly stale) profile -# CHECK: BOLT-INFO: inference found an exact pseudo probe match for 100.00% of basic blocks (3 out of 3 stale) +# CHECK: BOLT-INFO: inference found pseudo probe match for 233.33% of basic blocks (7 out of 3 stale) responsible for 233.33% samples (7 out of 3 stale) #--- yaml --- diff --git a/bolt/test/X86/match-blocks-with-pseudo-probes.test b/bolt/test/X86/match-blocks-with-pseudo-probes.test index 7be327d698b17..a28fadff4e763 100644 --- a/bolt/test/X86/match-blocks-with-pseudo-probes.test +++ b/bolt/test/X86/match-blocks-with-pseudo-probes.test @@ -8,7 +8,9 @@ # RUN: --print-cfg --funcs=main --infer-stale-profile \ # RUN: --stale-matching-with-pseudo-probes 2>&1 | FileCheck %s -# CHECK: BOLT-INFO: inference found an exact pseudo probe match for 100.00% of basic blocks (1 out of 1 stale) +# CHECK: BOLT-INFO: pseudo probe profile matching matched 100.00% (1/1) inline tree root(s) corresponding to profile or binary functions. Couldn't match 0 roots with mismatching GUIDs, 0 with mismatching hash. +# CHECK: BOLT-INFO: pseudo probe profile matching matched 100.00% (1/1) inline tree node(s) corresponding to inlined source functions. Couldn't match 0 nodes with mismatching hash, 0 with missing profile node. +# CHECK: BOLT-INFO: inference found pseudo probe match for 100.00% of basic blocks (1 out of 1 stale) #--- main.s .text diff --git a/bolt/test/X86/reader-stale-yaml.test b/bolt/test/X86/reader-stale-yaml.test index 4c6e9c25551f1..23338a885d280 100644 --- a/bolt/test/X86/reader-stale-yaml.test +++ b/bolt/test/X86/reader-stale-yaml.test @@ -77,10 +77,10 @@ CHECK2: pre-processing profile using YAML profile reader CHECK2: applying profile inference for "SolveCubic" CHECK2: Matched yaml block (bid = 0) with hash 4600940a609c0000 to BB (index = 0) with hash 4600940a609c0000 CHECK2-NEXT: exact match -CHECK2: Matched yaml block (bid = 13) with hash a8d50000f81902a7 to BB (index = 13) with hash a8d5aa43f81902a7 -CHECK2-NEXT: loose match CHECK2: Matched yaml block (bid = 1) with hash 167a1f084f130088 to BB (index = 1) with hash 167a1f084f130088 CHECK2-NEXT: exact match +CHECK2: Matched yaml block (bid = 13) with hash a8d50000f81902a7 to BB (index = 13) with hash a8d5aa43f81902a7 +CHECK2-NEXT: loose match CHECK2: Matched yaml block (bid = 3) with hash c516000073dc00a0 to BB (index = 3) with hash c516b1c973dc00a0 CHECK2-NEXT: loose match CHECK2: Matched yaml block (bid = 5) with hash 6446e1ea500111 to BB (index = 5) with hash 6446e1ea500111