diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 08ce892054874..c9b0e103ed514 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -723,12 +723,28 @@ class BinaryContext { /// Stats for stale profile matching: /// the total number of basic blocks in the profile uint32_t NumStaleBlocks{0}; - /// the number of matched basic blocks - uint32_t NumMatchedBlocks{0}; + /// the number of exactly matched basic blocks + uint32_t NumExactMatchedBlocks{0}; + /// the number of loosely matched basic blocks + uint32_t NumLooseMatchedBlocks{0}; + /// the number of exactly pseudo probe matched basic blocks + uint32_t NumPseudoProbeExactMatchedBlocks{0}; + /// the number of loosely pseudo probe matched basic blocks + uint32_t NumPseudoProbeLooseMatchedBlocks{0}; + /// the number of call matched basic blocks + uint32_t NumCallMatchedBlocks{0}; /// the total count of samples in the profile uint64_t StaleSampleCount{0}; - /// the count of matched samples - uint64_t MatchedSampleCount{0}; + /// the count of exactly matched samples + uint64_t ExactMatchedSampleCount{0}; + /// the count of loosely matched samples + uint64_t LooseMatchedSampleCount{0}; + /// the count of exactly pseudo probe matched samples + uint64_t PseudoProbeExactMatchedSampleCount{0}; + /// the count of loosely pseudo probe matched samples + uint64_t PseudoProbeLooseMatchedSampleCount{0}; + /// the count of call matched samples + uint64_t CallMatchedSampleCount{0}; /// the number of stale functions that have matching number of blocks in /// the profile uint64_t NumStaleFuncsWithEqualBlockCount{0}; diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h index 9865118f2696d..a8c9f7a10bbb9 100644 --- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h +++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h @@ -174,6 +174,9 @@ struct InlineTreeNode { uint32_t CallSiteProbe; // Index in PseudoProbeDesc.GUID, UINT32_MAX for same as previous (omitted) uint32_t GUIDIndex; + // Decoded contents, ParentIndexDelta becomes absolute value. + uint64_t GUID; + uint64_t Hash; bool operator==(const InlineTreeNode &) const { return false; } }; } // end namespace bolt diff --git a/bolt/include/bolt/Profile/YAMLProfileReader.h b/bolt/include/bolt/Profile/YAMLProfileReader.h index a6f0fd6f3251f..91f51f30e3ca4 100644 --- a/bolt/include/bolt/Profile/YAMLProfileReader.h +++ b/bolt/include/bolt/Profile/YAMLProfileReader.h @@ -14,6 +14,8 @@ #include namespace llvm { +class MCDecodedPseudoProbeInlineTree; + namespace bolt { class YAMLProfileReader : public ProfileReaderBase { @@ -43,6 +45,9 @@ class YAMLProfileReader : public ProfileReaderBase { using ProfileLookupMap = DenseMap; + using GUIDInlineTreeMap = + std::unordered_map; + /// A class for matching binary functions in functions in the YAML profile. /// First, a call graph is constructed for both profiled and binary functions. /// Then functions are hashed based on the names of their callee/caller @@ -96,6 +101,61 @@ class YAMLProfileReader : public ProfileReaderBase { YamlBFAdjacencyMap; }; + // A class for matching inline tree nodes between profile and binary. + // Provides the mapping from profile inline tree node id to a + // corresponding binary MCDecodedPseudoProbeInlineTree node. + // + // The whole mapping process is the following: + // + // (profile) (binary) + // | blocks ^ + // v | + // yaml::bolt::BinaryBasicBlockProfile ~= FlowBlock + // ||| probes ^ (majority vote) + // v ||| BBPseudoProbeToBlock + // yaml::bolt::PseudoProbeInfo MCDecodedPseudoProbe + // | InlineTreeIndex ^ + // v | probe id + // [ profile node id (uint32_t) -> MCDecodedPseudoProbeInlineTree *] + // InlineTreeNodeMapTy + class InlineTreeNodeMapTy { + DenseMap Map; + + void mapInlineTreeNode(uint32_t ProfileNodeIdx, + const MCDecodedPseudoProbeInlineTree *BinaryNode) { + auto Res = Map.try_emplace(ProfileNodeIdx, BinaryNode); + assert(Res.second && + "Duplicate mapping from profile node index to binary inline tree"); + (void)Res; + } + + public: + /// Returns matched InlineTree * for a given profile inline_tree_id. + const MCDecodedPseudoProbeInlineTree * + getInlineTreeNode(uint32_t ProfileInlineTreeNodeId) const { + auto It = Map.find(ProfileInlineTreeNodeId); + if (It == Map.end()) + return nullptr; + return It->second; + } + + // Match up \p YamlInlineTree with binary inline tree rooted at \p Root. + // Return the number of matched nodes. + // + // This function populates the mapping from profile inline tree node id to a + // corresponding binary MCDecodedPseudoProbeInlineTree node. + size_t matchInlineTrees( + const MCPseudoProbeDecoder &Decoder, + const std::vector &YamlInlineTree, + const MCDecodedPseudoProbeInlineTree *Root); + }; + + // Partial probe matching specification: matched inline tree and corresponding + // BinaryFunctionProfile + using ProbeMatchSpec = + std::pair>; + private: /// Adjustments for basic samples profiles (without LBR). bool NormalizeByInsnCount{false}; @@ -129,6 +189,13 @@ class YAMLProfileReader : public ProfileReaderBase { /// BinaryFunction pointers indexed by YamlBP functions. std::vector ProfileBFs; + // Pseudo probe function GUID to inline tree node + GUIDInlineTreeMap TopLevelGUIDToInlineTree; + + // Mapping from a binary function to its partial match specification + // (YAML profile and its inline tree mapping to binary). + DenseMap> BFToProbeMatchSpecs; + /// Populate \p Function profile with the one supplied in YAML format. bool parseFunctionProfile(BinaryFunction &Function, const yaml::bolt::BinaryFunctionProfile &YamlBF); @@ -139,7 +206,8 @@ class YAMLProfileReader : public ProfileReaderBase { /// Infer function profile from stale data (collected on older binaries). bool inferStaleProfile(BinaryFunction &Function, - const yaml::bolt::BinaryFunctionProfile &YamlBF); + const yaml::bolt::BinaryFunctionProfile &YamlBF, + const ArrayRef ProbeMatchSpecs); /// Initialize maps for profile matching. void buildNameMaps(BinaryContext &BC); @@ -156,6 +224,10 @@ class YAMLProfileReader : public ProfileReaderBase { /// Matches functions using the call graph. size_t matchWithCallGraph(BinaryContext &BC); + /// Matches functions using the call graph. + /// Populates BF->partial probe match spec map. + size_t matchWithPseudoProbes(BinaryContext &BC); + /// Matches functions with similarly named profiled functions. size_t matchWithNameSimilarity(BinaryContext &BC); diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index 5a676185227ec..8e46c22a4e024 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1549,10 +1549,48 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { "BOLT-INFO: inference found an exact match for %.2f%% of basic blocks" " (%zu out of %zu stale) responsible for %.2f%% samples" " (%zu out of %zu stale)\n", - 100.0 * BC.Stats.NumMatchedBlocks / BC.Stats.NumStaleBlocks, - BC.Stats.NumMatchedBlocks, BC.Stats.NumStaleBlocks, - 100.0 * BC.Stats.MatchedSampleCount / BC.Stats.StaleSampleCount, - BC.Stats.MatchedSampleCount, BC.Stats.StaleSampleCount); + 100.0 * BC.Stats.NumExactMatchedBlocks / BC.Stats.NumStaleBlocks, + BC.Stats.NumExactMatchedBlocks, BC.Stats.NumStaleBlocks, + 100.0 * BC.Stats.ExactMatchedSampleCount / BC.Stats.StaleSampleCount, + BC.Stats.ExactMatchedSampleCount, BC.Stats.StaleSampleCount); + BC.outs() << format( + "BOLT-INFO: inference found an exact pseudo probe match for %.2f%% of " + "basic blocks (%zu out of %zu stale) responsible for %.2f%% samples" + " (%zu out of %zu stale)\n", + 100.0 * BC.Stats.NumPseudoProbeExactMatchedBlocks / + BC.Stats.NumStaleBlocks, + BC.Stats.NumPseudoProbeExactMatchedBlocks, BC.Stats.NumStaleBlocks, + 100.0 * BC.Stats.PseudoProbeExactMatchedSampleCount / + BC.Stats.StaleSampleCount, + BC.Stats.PseudoProbeExactMatchedSampleCount, BC.Stats.StaleSampleCount); + BC.outs() << format( + "BOLT-INFO: inference found a loose pseudo probe match for %.2f%% of " + "basic blocks (%zu out of %zu stale) responsible for %.2f%% samples" + " (%zu out of %zu stale)\n", + 100.0 * BC.Stats.NumPseudoProbeLooseMatchedBlocks / + BC.Stats.NumStaleBlocks, + BC.Stats.NumPseudoProbeLooseMatchedBlocks, BC.Stats.NumStaleBlocks, + 100.0 * BC.Stats.PseudoProbeLooseMatchedSampleCount / + BC.Stats.StaleSampleCount, + BC.Stats.PseudoProbeLooseMatchedSampleCount, BC.Stats.StaleSampleCount); + BC.outs() << format( + "BOLT-INFO: inference found a call match for %.2f%% of basic " + "blocks" + " (%zu out of %zu stale) responsible for %.2f%% samples" + " (%zu out of %zu stale)\n", + 100.0 * BC.Stats.NumCallMatchedBlocks / BC.Stats.NumStaleBlocks, + BC.Stats.NumCallMatchedBlocks, BC.Stats.NumStaleBlocks, + 100.0 * BC.Stats.CallMatchedSampleCount / BC.Stats.StaleSampleCount, + BC.Stats.CallMatchedSampleCount, BC.Stats.StaleSampleCount); + BC.outs() << format( + "BOLT-INFO: inference found a loose match for %.2f%% of basic " + "blocks" + " (%zu out of %zu stale) responsible for %.2f%% samples" + " (%zu out of %zu stale)\n", + 100.0 * BC.Stats.NumLooseMatchedBlocks / BC.Stats.NumStaleBlocks, + BC.Stats.NumLooseMatchedBlocks, BC.Stats.NumStaleBlocks, + 100.0 * BC.Stats.LooseMatchedSampleCount / BC.Stats.StaleSampleCount, + BC.Stats.LooseMatchedSampleCount, BC.Stats.StaleSampleCount); } if (const uint64_t NumUnusedObjects = BC.getNumUnusedProfiledObjects()) { diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp index cd6e96f7e2cf4..a520c9a02a907 100644 --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -29,6 +29,7 @@ #include "bolt/Profile/YAMLProfileReader.h" #include "llvm/ADT/Bitfields.h" #include "llvm/ADT/Hashing.h" +#include "llvm/MC/MCPseudoProbe.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Timer.h" #include "llvm/Support/xxhash.h" @@ -116,6 +117,11 @@ cl::opt StaleMatchingCostJumpUnknownFTInc( "The cost of increasing an unknown fall-through jump count by one."), cl::init(3), cl::ReallyHidden, cl::cat(BoltOptCategory)); +cl::opt StaleMatchingWithPseudoProbes( + "stale-matching-with-pseudo-probes", + cl::desc("Turns on stale matching with block pseudo probes."), + cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory)); + } // namespace opts namespace llvm { @@ -208,11 +214,48 @@ class StaleMatcher { } } - /// Find the most similar block for a given hash. - const FlowBlock *matchBlock(BlendedBlockHash BlendedHash, - uint64_t CallHash) const { - const FlowBlock *BestBlock = matchWithOpcodes(BlendedHash); - return BestBlock ? BestBlock : matchWithCalls(BlendedHash, CallHash); + /// Creates a mapping from a pseudo probe to a flow block. + void mapProbeToBB(const MCDecodedPseudoProbe *Probe, FlowBlock *Block) { + BBPseudoProbeToBlock[Probe] = Block; + } + + enum MatchMethod : char { + MATCH_EXACT = 0, + MATCH_PROBE_EXACT, + MATCH_PROBE_LOOSE, + MATCH_OPCODE, + MATCH_CALL, + NO_MATCH + }; + + /// Find the most similar flow block for a profile block given blended hash. + std::pair + matchBlockStrict(BlendedBlockHash BlendedHash) { + const auto &[Block, ExactHash] = matchWithOpcodes(BlendedHash); + if (Block && ExactHash) + return {Block, MATCH_EXACT}; + return {nullptr, NO_MATCH}; + } + + /// Find the most similar flow block for a profile block given pseudo probes. + std::pair matchBlockProbe( + const ArrayRef PseudoProbes, + const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) { + const auto &[ProbeBlock, ExactProbe] = + matchWithPseudoProbes(PseudoProbes, InlineTreeNodeMap); + if (ProbeBlock) + return {ProbeBlock, ExactProbe ? MATCH_PROBE_EXACT : MATCH_PROBE_LOOSE}; + return {nullptr, NO_MATCH}; + } + + /// Find the most similar flow block for a profile block given its hashes. + std::pair + matchBlockLoose(BlendedBlockHash BlendedHash, uint64_t CallHash) { + if (const FlowBlock *CallBlock = matchWithCalls(BlendedHash, CallHash)) + return {CallBlock, MATCH_CALL}; + if (const FlowBlock *OpcodeBlock = matchWithOpcodes(BlendedHash).first) + return {OpcodeBlock, MATCH_OPCODE}; + return {nullptr, NO_MATCH}; } /// Returns true if the two basic blocks (in the binary and in the profile) @@ -227,22 +270,26 @@ class StaleMatcher { using HashBlockPairType = std::pair; std::unordered_map> OpHashToBlocks; std::unordered_map> CallHashToBlocks; + DenseMap BBPseudoProbeToBlock; // Uses OpcodeHash to find the most similar block for a given hash. - const FlowBlock *matchWithOpcodes(BlendedBlockHash BlendedHash) const { + std::pair + matchWithOpcodes(BlendedBlockHash BlendedHash) const { auto BlockIt = OpHashToBlocks.find(BlendedHash.OpcodeHash); if (BlockIt == OpHashToBlocks.end()) - return nullptr; + return {nullptr, false}; FlowBlock *BestBlock = nullptr; uint64_t BestDist = std::numeric_limits::max(); + BlendedBlockHash BestHash; for (const auto &[Hash, Block] : BlockIt->second) { uint64_t Dist = Hash.distance(BlendedHash); if (BestBlock == nullptr || Dist < BestDist) { BestDist = Dist; BestBlock = Block; + BestHash = Hash; } } - return BestBlock; + return {BestBlock, isHighConfidenceMatch(BestHash, BlendedHash)}; } // Uses CallHash to find the most similar block for a given hash. @@ -266,6 +313,73 @@ class StaleMatcher { } return BestBlock; } + + /// Matches a profile block with a binary block based on pseudo probes. + /// Returns the best matching block (or nullptr) and whether the match is + /// unambiguous. + std::pair matchWithPseudoProbes( + const ArrayRef BlockPseudoProbes, + const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) const { + + if (!opts::StaleMatchingWithPseudoProbes) + return {nullptr, false}; + + DenseMap FlowBlockMatchCount; + + auto matchProfileProbeToBlock = [&](uint32_t NodeId, + uint64_t ProbeId) -> const FlowBlock * { + const MCDecodedPseudoProbeInlineTree *BinaryNode = + InlineTreeNodeMap.getInlineTreeNode(NodeId); + if (!BinaryNode) + return nullptr; + const MCDecodedPseudoProbe Dummy(0, ProbeId, PseudoProbeType::Block, 0, 0, + nullptr); + ArrayRef BinaryProbes = BinaryNode->getProbes(); + auto BinaryProbeIt = llvm::lower_bound( + BinaryProbes, Dummy, [](const auto &LHS, const auto &RHS) { + return LHS.getIndex() < RHS.getIndex(); + }); + if (BinaryProbeIt == BinaryNode->getProbes().end() || + BinaryProbeIt->getIndex() != ProbeId) + return nullptr; + auto It = BBPseudoProbeToBlock.find(&*BinaryProbeIt); + if (It == BBPseudoProbeToBlock.end()) + return nullptr; + return It->second; + }; + + auto matchPseudoProbeInfo = [&](const yaml::bolt::PseudoProbeInfo + &ProfileProbe, + uint32_t NodeId) { + for (uint64_t Index = 0; Index < 64; ++Index) + if (ProfileProbe.BlockMask & 1ull << Index) + ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, Index + 1)]; + for (const auto &ProfileProbes : + {ProfileProbe.BlockProbes, ProfileProbe.IndCallProbes, + ProfileProbe.CallProbes}) + for (uint64_t ProfileProbe : ProfileProbes) + ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, ProfileProbe)]; + }; + + for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) { + if (!ProfileProbe.InlineTreeNodes.empty()) + for (uint32_t ProfileInlineTreeNode : ProfileProbe.InlineTreeNodes) + matchPseudoProbeInfo(ProfileProbe, ProfileInlineTreeNode); + else + matchPseudoProbeInfo(ProfileProbe, ProfileProbe.InlineTreeIndex); + } + uint32_t BestMatchCount = 0; + uint32_t TotalMatchCount = 0; + const FlowBlock *BestMatchBlock = nullptr; + for (const auto &[FlowBlock, Count] : FlowBlockMatchCount) { + TotalMatchCount += Count; + if (Count < BestMatchCount || (Count == BestMatchCount && BestMatchBlock)) + continue; + BestMatchBlock = FlowBlock; + BestMatchCount = Count; + } + return {BestMatchBlock, BestMatchCount == TotalMatchCount}; + } }; void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const { @@ -442,17 +556,18 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) { /// is to extract as much information from the stale profile as possible. Here /// we assume that each basic block is specified via a hash value computed from /// its content and the hashes of the unchanged basic blocks stay the same -/// across different revisions of the binary. +/// across different revisions of the binary. Blocks may also have pseudo probe +/// information in the profile and the binary which is used for matching. /// Whenever there is a count in the profile with the hash corresponding to one /// of the basic blocks in the binary, the count is "matched" to the block. /// Similarly, if both the source and the target of a count in the profile are /// matched to a jump in the binary, the count is recorded in CFG. -size_t -matchWeightsByHashes(BinaryContext &BC, - const BinaryFunction::BasicBlockOrderType &BlockOrder, - const yaml::bolt::BinaryFunctionProfile &YamlBF, - FlowFunction &Func, HashFunction HashFunction, - YAMLProfileReader::ProfileLookupMap &IdToYamlBF) { +size_t matchWeights( + BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder, + const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func, + HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF, + const BinaryFunction &BF, + const ArrayRef ProbeMatchSpecs) { assert(Func.Blocks.size() == BlockOrder.size() + 2); @@ -482,16 +597,67 @@ matchWeightsByHashes(BinaryContext &BC, << Twine::utohexstr(BB->getHash()) << "\n"); } StaleMatcher Matcher; + // Collects function pseudo probes for use in the StaleMatcher. + if (opts::StaleMatchingWithPseudoProbes) { + const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); + assert(Decoder && + "If pseudo probes are in use, pseudo probe decoder should exist"); + const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap(); + const uint64_t FuncAddr = BF.getAddress(); + for (const MCDecodedPseudoProbe &Probe : + ProbeMap.find(FuncAddr, FuncAddr + BF.getSize())) + if (const BinaryBasicBlock *BB = + BF.getBasicBlockContainingOffset(Probe.getAddress() - FuncAddr)) + Matcher.mapProbeToBB(&Probe, Blocks[BB->getIndex()]); + } Matcher.init(Blocks, BlendedHashes, CallHashes); - // Index in yaml profile => corresponding (matched) block - DenseMap MatchedBlocks; - // Match blocks from the profile to the blocks in CFG + using FlowBlockTy = + std::pair; + using ProfileBlockMatchMap = DenseMap; + // Binary profile => block index => matched block + its block profile + DenseMap + MatchedBlocks; + + // Map of FlowBlock and matching method. + DenseMap MatchedFlowBlocks; + + auto addMatchedBlock = + [&](std::pair BlockMethod, + const yaml::bolt::BinaryFunctionProfile &YamlBP, + const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { + const auto &[MatchedBlock, Method] = BlockMethod; + if (!MatchedBlock) + return; + // Don't override earlier matches + if (MatchedFlowBlocks.contains(MatchedBlock)) + return; + MatchedFlowBlocks.try_emplace(MatchedBlock, Method); + MatchedBlocks[&YamlBP][YamlBB.Index] = {MatchedBlock, &YamlBB}; + }; + + // Match blocks from the profile to the blocks in CFG by strict hash. + for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { + // Update matching stats. + ++BC.Stats.NumStaleBlocks; + BC.Stats.StaleSampleCount += YamlBB.ExecCount; + + assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile"); + BlendedBlockHash YamlHash(YamlBB.Hash); + addMatchedBlock(Matcher.matchBlockStrict(YamlHash), YamlBF, YamlBB); + } + // Match blocks from the profile to the blocks in CFG by pseudo probes. + for (const auto &[InlineNodeMap, YamlBP] : ProbeMatchSpecs) { + for (const yaml::bolt::BinaryBasicBlockProfile &BB : YamlBP.get().Blocks) + if (!BB.PseudoProbes.empty()) + addMatchedBlock(Matcher.matchBlockProbe(BB.PseudoProbes, InlineNodeMap), + YamlBP, BB); + } + // Match blocks from the profile to the blocks in CFG with loose methods. for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile"); BlendedBlockHash YamlHash(YamlBB.Hash); - const FlowBlock *MatchedBlock = nullptr; std::string CallHashStr = hashBlockCalls(IdToYamlBF, YamlBB); uint64_t CallHash = 0; if (!CallHashStr.empty()) { @@ -502,76 +668,103 @@ matchWeightsByHashes(BinaryContext &BC, else llvm_unreachable("Unhandled HashFunction"); } - MatchedBlock = Matcher.matchBlock(YamlHash, CallHash); - if (MatchedBlock == nullptr && YamlBB.Index == 0) + auto [MatchedBlock, Method] = Matcher.matchBlockLoose(YamlHash, CallHash); + if (MatchedBlock == nullptr && YamlBB.Index == 0) { MatchedBlock = Blocks[0]; - if (MatchedBlock != nullptr) { - const BinaryBasicBlock *BB = BlockOrder[MatchedBlock->Index - 1]; - MatchedBlocks[YamlBB.Index] = MatchedBlock; + // Report as loose match + Method = StaleMatcher::MATCH_OPCODE; + } + if (!MatchedBlock) { + LLVM_DEBUG(dbgs() << "Couldn't match yaml block (bid = " << YamlBB.Index + << ")" << " with hash " << Twine::utohexstr(YamlBB.Hash) + << "\n"); + continue; + } + addMatchedBlock({MatchedBlock, Method}, YamlBF, YamlBB); + } + + // Match jumps from the profile to the jumps from CFG + std::vector OutWeight(Func.Blocks.size(), 0); + std::vector InWeight(Func.Blocks.size(), 0); + + for (const auto &[YamlBF, MatchMap] : MatchedBlocks) { + for (const auto &[YamlBBIdx, FlowBlockProfile] : MatchMap) { + const auto &[MatchedBlock, YamlBB] = FlowBlockProfile; + StaleMatcher::MatchMethod Method = MatchedFlowBlocks.lookup(MatchedBlock); BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1]; - LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBB.Index << ")" - << " with hash " << Twine::utohexstr(YamlBB.Hash) + LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBBIdx << ")" + << " with hash " << Twine::utohexstr(YamlBB->Hash) << " to BB (index = " << MatchedBlock->Index - 1 << ")" << " with hash " << Twine::utohexstr(BinHash.combine()) << "\n"); + uint64_t ExecCount = YamlBB->ExecCount; // Update matching stats accounting for the matched block. - if (Matcher.isHighConfidenceMatch(BinHash, YamlHash)) { - ++BC.Stats.NumMatchedBlocks; - BC.Stats.MatchedSampleCount += YamlBB.ExecCount; + switch (Method) { + case StaleMatcher::MATCH_EXACT: + ++BC.Stats.NumExactMatchedBlocks; + BC.Stats.ExactMatchedSampleCount += ExecCount; LLVM_DEBUG(dbgs() << " exact match\n"); - } else { + break; + case StaleMatcher::MATCH_PROBE_EXACT: + ++BC.Stats.NumPseudoProbeExactMatchedBlocks; + BC.Stats.PseudoProbeExactMatchedSampleCount += ExecCount; + LLVM_DEBUG(dbgs() << " exact pseudo probe match\n"); + break; + case StaleMatcher::MATCH_PROBE_LOOSE: + ++BC.Stats.NumPseudoProbeLooseMatchedBlocks; + BC.Stats.PseudoProbeLooseMatchedSampleCount += ExecCount; + LLVM_DEBUG(dbgs() << " loose pseudo probe match\n"); + break; + case StaleMatcher::MATCH_CALL: + ++BC.Stats.NumCallMatchedBlocks; + BC.Stats.CallMatchedSampleCount += ExecCount; + LLVM_DEBUG(dbgs() << " call match\n"); + break; + case StaleMatcher::MATCH_OPCODE: + ++BC.Stats.NumLooseMatchedBlocks; + BC.Stats.LooseMatchedSampleCount += ExecCount; LLVM_DEBUG(dbgs() << " loose match\n"); + break; + case StaleMatcher::NO_MATCH: + LLVM_DEBUG(dbgs() << " no match\n"); } - if (YamlBB.NumInstructions == BB->size()) - ++BC.Stats.NumStaleBlocksWithEqualIcount; - } else { - LLVM_DEBUG( - dbgs() << "Couldn't match yaml block (bid = " << YamlBB.Index << ")" - << " with hash " << Twine::utohexstr(YamlBB.Hash) << "\n"); } - // Update matching stats. - ++BC.Stats.NumStaleBlocks; - BC.Stats.StaleSampleCount += YamlBB.ExecCount; - } - - // Match jumps from the profile to the jumps from CFG - std::vector OutWeight(Func.Blocks.size(), 0); - std::vector InWeight(Func.Blocks.size(), 0); - for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { - for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) { - if (YamlSI.Count == 0) - continue; - - // Try to find the jump for a given (src, dst) pair from the profile and - // assign the jump weight based on the profile count - const uint64_t SrcIndex = YamlBB.Index; - const uint64_t DstIndex = YamlSI.Index; - - const FlowBlock *MatchedSrcBlock = MatchedBlocks.lookup(SrcIndex); - const FlowBlock *MatchedDstBlock = MatchedBlocks.lookup(DstIndex); - - if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) { - // Find a jump between the two blocks - FlowJump *Jump = nullptr; - for (FlowJump *SuccJump : MatchedSrcBlock->SuccJumps) { - if (SuccJump->Target == MatchedDstBlock->Index) { - Jump = SuccJump; - break; + for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF->Blocks) { + for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) { + if (YamlSI.Count == 0) + continue; + + // Try to find the jump for a given (src, dst) pair from the profile and + // assign the jump weight based on the profile count + const uint64_t SrcIndex = YamlBB.Index; + const uint64_t DstIndex = YamlSI.Index; + + const FlowBlock *MatchedSrcBlock = MatchMap.lookup(SrcIndex).first; + const FlowBlock *MatchedDstBlock = MatchMap.lookup(DstIndex).first; + + if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) { + // Find a jump between the two blocks + FlowJump *Jump = nullptr; + for (FlowJump *SuccJump : MatchedSrcBlock->SuccJumps) { + if (SuccJump->Target == MatchedDstBlock->Index) { + Jump = SuccJump; + break; + } + } + // Assign the weight, if the corresponding jump is found + if (Jump != nullptr) { + Jump->Weight = YamlSI.Count; + Jump->HasUnknownWeight = false; } } - // Assign the weight, if the corresponding jump is found - if (Jump != nullptr) { - Jump->Weight = YamlSI.Count; - Jump->HasUnknownWeight = false; - } + // Assign the weight for the src block, if it is found + if (MatchedSrcBlock != nullptr) + OutWeight[MatchedSrcBlock->Index] += YamlSI.Count; + // Assign the weight for the dst block, if it is found + if (MatchedDstBlock != nullptr) + InWeight[MatchedDstBlock->Index] += YamlSI.Count; } - // Assign the weight for the src block, if it is found - if (MatchedSrcBlock != nullptr) - OutWeight[MatchedSrcBlock->Index] += YamlSI.Count; - // Assign the weight for the dst block, if it is found - if (MatchedDstBlock != nullptr) - InWeight[MatchedDstBlock->Index] += YamlSI.Count; } } @@ -585,7 +778,7 @@ matchWeightsByHashes(BinaryContext &BC, Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]); } - return MatchedBlocks.size(); + return MatchedBlocks[&YamlBF].size(); } /// The function finds all blocks that are (i) reachable from the Entry block @@ -803,7 +996,8 @@ void assignProfile(BinaryFunction &BF, } bool YAMLProfileReader::inferStaleProfile( - BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF) { + BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF, + const ArrayRef ProbeMatchSpecs) { NamedRegionTimer T("inferStaleProfile", "stale profile inference", "rewrite", "Rewrite passes", opts::TimeRewrite); @@ -827,8 +1021,8 @@ bool YAMLProfileReader::inferStaleProfile( // Match as many block/jump counts from the stale profile as possible size_t MatchedBlocks = - matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func, - YamlBP.Header.HashFunction, IdToYamLBF); + matchWeights(BF.getBinaryContext(), BlockOrder, YamlBF, Func, + YamlBP.Header.HashFunction, IdToYamLBF, BF, ProbeMatchSpecs); // Adjust the flow function by marking unreachable blocks Unlikely so that // they don't get any counts assigned. diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index a5dc8492b5900..ae39804823817 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/edit_distance.h" #include "llvm/Demangle/Demangle.h" +#include "llvm/MC/MCPseudoProbe.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -49,6 +50,8 @@ llvm::cl::opt llvm::cl::opt ProfileUseDFS("profile-use-dfs", cl::desc("use DFS order for YAML profile"), cl::Hidden, cl::cat(BoltOptCategory)); + +extern llvm::cl::opt StaleMatchingWithPseudoProbes; } // namespace opts namespace llvm { @@ -349,8 +352,13 @@ bool YAMLProfileReader::parseFunctionProfile( if (YamlBF.NumBasicBlocks != BF.size()) ++BC.Stats.NumStaleFuncsWithEqualBlockCount; - if (opts::InferStaleProfile && inferStaleProfile(BF, YamlBF)) - ProfileMatched = true; + if (!opts::InferStaleProfile) + return false; + ArrayRef ProbeMatchSpecs; + auto BFIt = BFToProbeMatchSpecs.find(&BF); + if (BFIt != BFToProbeMatchSpecs.end()) + ProbeMatchSpecs = BFIt->second; + ProfileMatched = inferStaleProfile(BF, YamlBF, ProbeMatchSpecs); } if (ProfileMatched) BF.markProfiled(YamlBP.Header.Flags); @@ -585,6 +593,101 @@ size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) { return MatchedWithCallGraph; } +size_t YAMLProfileReader::InlineTreeNodeMapTy::matchInlineTrees( + const MCPseudoProbeDecoder &Decoder, + const std::vector &DecodedInlineTree, + const MCDecodedPseudoProbeInlineTree *Root) { + // Match inline tree nodes by GUID, checksum, parent, and call site. + for (const auto &[InlineTreeNodeId, InlineTreeNode] : + llvm::enumerate(DecodedInlineTree)) { + uint64_t GUID = InlineTreeNode.GUID; + uint64_t Hash = InlineTreeNode.Hash; + uint32_t ParentId = InlineTreeNode.ParentIndexDelta; + uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe; + const MCDecodedPseudoProbeInlineTree *Cur = nullptr; + if (!InlineTreeNodeId) { + Cur = Root; + } else if (const MCDecodedPseudoProbeInlineTree *Parent = + getInlineTreeNode(ParentId)) { + for (const MCDecodedPseudoProbeInlineTree &Child : + Parent->getChildren()) { + if (Child.Guid == GUID) { + if (std::get<1>(Child.getInlineSite()) == CallSiteProbe) + Cur = &Child; + break; + } + } + } + // Don't match nodes if the profile is stale (mismatching binary FuncHash + // and YAML Hash) + if (Cur && Decoder.getFuncDescForGUID(Cur->Guid)->FuncHash == Hash) + mapInlineTreeNode(InlineTreeNodeId, Cur); + } + return Map.size(); +} + +// Decode index deltas and indirection through \p YamlPD. Return modified copy +// of \p YamlInlineTree with populated decoded fields (GUID, Hash, ParentIndex). +static std::vector +decodeYamlInlineTree(const yaml::bolt::ProfilePseudoProbeDesc &YamlPD, + std::vector YamlInlineTree) { + uint32_t ParentId = 0; + uint32_t PrevGUIDIdx = 0; + for (yaml::bolt::InlineTreeNode &InlineTreeNode : YamlInlineTree) { + uint32_t GUIDIdx = InlineTreeNode.GUIDIndex; + if (GUIDIdx != UINT32_MAX) + PrevGUIDIdx = GUIDIdx; + else + GUIDIdx = PrevGUIDIdx; + uint32_t HashIdx = YamlPD.GUIDHashIdx[GUIDIdx]; + ParentId += InlineTreeNode.ParentIndexDelta; + InlineTreeNode.GUID = YamlPD.GUID[GUIDIdx]; + InlineTreeNode.Hash = YamlPD.Hash[HashIdx]; + InlineTreeNode.ParentIndexDelta = ParentId; + } + return YamlInlineTree; +} + +size_t YAMLProfileReader::matchWithPseudoProbes(BinaryContext &BC) { + if (!opts::StaleMatchingWithPseudoProbes) + return 0; + + const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); + const yaml::bolt::ProfilePseudoProbeDesc &YamlPD = YamlBP.PseudoProbeDesc; + + // Set existing BF->YamlBF match into ProbeMatchSpecs for (local) probe + // matching. + assert(Decoder && + "If pseudo probes are in use, pseudo probe decoder should exist"); + for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) { + // BF is preliminary name-matched function to YamlBF + // MatchedBF is final matched function + BinaryFunction *MatchedBF = YamlProfileToFunction.lookup(YamlBF.Id); + if (!BF) + BF = MatchedBF; + if (!BF) + continue; + uint64_t GUID = BF->getGUID(); + if (!GUID) + continue; + auto It = TopLevelGUIDToInlineTree.find(GUID); + if (It == TopLevelGUIDToInlineTree.end()) + continue; + const MCDecodedPseudoProbeInlineTree *Node = It->second; + assert(Node && "Malformed TopLevelGUIDToInlineTree"); + auto &MatchSpecs = BFToProbeMatchSpecs[BF]; + auto &InlineTreeMap = + MatchSpecs.emplace_back(InlineTreeNodeMapTy(), YamlBF).first; + std::vector ProfileInlineTree = + decodeYamlInlineTree(YamlPD, YamlBF.InlineTree); + // Erase unsuccessful match + if (!InlineTreeMap.matchInlineTrees(*Decoder, ProfileInlineTree, Node)) + MatchSpecs.pop_back(); + } + + return 0; +} + size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) { if (opts::NameSimilarityFunctionMatchingThreshold == 0) return 0; @@ -716,6 +819,15 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { } } + if (opts::StaleMatchingWithPseudoProbes) { + const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder(); + assert(Decoder && + "If pseudo probes are in use, pseudo probe decoder should exist"); + for (const MCDecodedPseudoProbeInlineTree &TopLev : + Decoder->getDummyInlineRoot().getChildren()) + TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev; + } + // Map profiled function ids to names. for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) IdToYamLBF[YamlBF.Id] = &YamlBF; @@ -725,6 +837,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { const size_t MatchedWithLTOCommonName = matchWithLTOCommonName(); const size_t MatchedWithCallGraph = matchWithCallGraph(BC); const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC); + const size_t MatchedWithPseudoProbes = matchWithPseudoProbes(BC); for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF)) diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp index 8647df4b0edf8..0324b8f5794c3 100644 --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -51,6 +51,7 @@ static cl::opt PrintPseudoProbes( cl::Hidden, cl::cat(BoltCategory)); extern cl::opt ProfileWritePseudoProbes; +extern cl::opt StaleMatchingWithPseudoProbes; } // namespace opts namespace { @@ -92,14 +93,14 @@ class PseudoProbeRewriter final : public MetadataRewriter { }; Error PseudoProbeRewriter::preCFGInitializer() { - if (opts::ProfileWritePseudoProbes) - parsePseudoProbe(true); + if (opts::ProfileWritePseudoProbes || opts::StaleMatchingWithPseudoProbes) + parsePseudoProbe(opts::ProfileWritePseudoProbes); return Error::success(); } Error PseudoProbeRewriter::postEmitFinalizer() { - if (!opts::ProfileWritePseudoProbes) + if (!opts::StaleMatchingWithPseudoProbes) parsePseudoProbe(); updatePseudoProbes(); diff --git a/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test b/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test new file mode 100644 index 0000000000000..accb4742851ea --- /dev/null +++ b/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test @@ -0,0 +1,65 @@ +## Test stale block matching with pseudo probes including inline tree matching. +# RUN: split-file %s %t +# RUN: llvm-bolt \ +# RUN: %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin \ +# RUN: -o %t.bolt -data %t/yaml -infer-stale-profile -v=2 \ +# RUN: --stale-matching-with-pseudo-probes 2>&1 | FileCheck %s + +# CHECK: BOLT-WARNING: 3 (100.0% of all profiled) functions have invalid (possibly stale) profile +# CHECK: BOLT-INFO: inference found an exact pseudo probe match for 100.00% of basic blocks (3 out of 3 stale) + +#--- yaml +--- +header: + profile-version: 1 + binary-name: 'inline-cs-pseudoprobe.perfbin' + binary-build-id: '' + profile-flags: [ lbr ] + profile-origin: perf data aggregator + profile-events: '' + dfs-order: false + hash-func: xxh3 +functions: + - name: bar + fid: 9 + hash: 0x1 + exec: 1 + nblocks: 1 + blocks: + - bid: 0 + insns: 11 + hash: 0x1 + exec: 1 + probes: [ { blx: 9 } ] + inline_tree: [ { } ] + - name: foo + fid: 10 + hash: 0x2 + exec: 1 + nblocks: 6 + blocks: + - bid: 0 + insns: 3 + hash: 0x2 + exec: 1 + succ: [ { bid: 3, cnt: 0 } ] + probes: [ { blx: 3 } ] + inline_tree: [ { g: 1 }, { g: 0, cs: 8 } ] + - name: main + fid: 11 + hash: 0x3 + exec: 1 + nblocks: 6 + blocks: + - bid: 0 + insns: 3 + hash: 0x3 + exec: 1 + succ: [ { bid: 3, cnt: 0 } ] + probes: [ { blx: 3, id: 1 }, { blx: 1 } ] + inline_tree: [ { g: 2 }, { g: 1, cs: 2 }, { g: 0, p: 1, cs: 8 } ] +pseudo_probe_desc: + gs: [ 0xE413754A191DB537, 0x5CF8C24CDB18BDAC, 0xDB956436E78DD5FA ] + gh: [ 2, 0, 1 ] + hs: [ 0x200205A19C5B4, 0x10000FFFFFFFF, 0x10E852DA94 ] +... diff --git a/bolt/test/X86/match-blocks-with-pseudo-probes.test b/bolt/test/X86/match-blocks-with-pseudo-probes.test new file mode 100644 index 0000000000000..40cb64ee82919 --- /dev/null +++ b/bolt/test/X86/match-blocks-with-pseudo-probes.test @@ -0,0 +1,63 @@ +## Tests stale block matching with pseudo probes. + +# REQUIRES: system-linux +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \ +# RUN: --print-cfg --funcs=main --infer-stale-profile \ +# RUN: --stale-matching-with-pseudo-probes 2>&1 | FileCheck %s + +# CHECK: BOLT-INFO: inference found an exact pseudo probe match for 100.00% of basic blocks (1 out of 1 stale) + +#--- main.s + .text + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +# %bb.0: + pushq %rbp + movq %rsp, %rbp + movl $0, -4(%rbp) + .pseudoprobe 15822663052811949562 1 0 0 main + xorl %eax, %eax + popq %rbp + retq +.Lfunc_end0: + .size main, .Lfunc_end0-main + # -- End function + .section .pseudo_probe_desc,"",@progbits + .quad -2624081020897602054 + .quad 4294967295 + .byte 4 + .ascii "main" + +#--- yaml +--- +header: + profile-version: 1 + binary-name: 'match-blocks-with-pseudo-probes.s.tmp.exe' + binary-build-id: '' + profile-flags: [ lbr ] + profile-origin: branch profile reader + profile-events: '' + dfs-order: false + hash-func: xxh3 +functions: + - name: main + fid: 0 + hash: 0x0000000000000001 + exec: 1 + nblocks: 6 + blocks: + - bid: 1 + hash: 0xFFFFFFFFFFFFFFF1 + insns: 1 + succ: [ { bid: 3, cnt: 1} ] + probes: [ { blx: 1 } ] + inline_tree: [ { g: 0 } ] +pseudo_probe_desc: + gs: [ 0xDB956436E78DD5FA ] + gh: [ 0 ] + hs: [ 0xFFFFFFFF ] diff --git a/bolt/test/X86/match-functions-with-calls-as-anchors.test b/bolt/test/X86/match-functions-with-calls-as-anchors.test index 984d614fbf85f..f8ef2880a6ce9 100644 --- a/bolt/test/X86/match-functions-with-calls-as-anchors.test +++ b/bolt/test/X86/match-functions-with-calls-as-anchors.test @@ -5,15 +5,16 @@ # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib # RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml --profile-ignore-hash -v=1 \ -# RUN: --dyno-stats --print-cfg --infer-stale-profile=1 --debug 2>&1 | FileCheck %s +# RUN: --dyno-stats --print-cfg --infer-stale-profile=1 --debug-only=bolt-prof \ +# RUN: 2>&1 | FileCheck %s # CHECK: BOLT-INFO: applying profile inference for "qux" # CHECK: Matched yaml block (bid = 1) with hash 4 to BB (index = 0) with hash 314e1bc10000 -# CHECK: loose match +# CHECK: call match # CHECK: BOLT-INFO: applying profile inference for "fred" # CHECK: Matched yaml block (bid = 1) with hash 5 to BB (index = 0) with hash 7541bc10000 -# CHECK: loose match +# CHECK: call match #--- main.s .globl foo # -- Begin function foo diff --git a/bolt/test/X86/reader-stale-yaml.test b/bolt/test/X86/reader-stale-yaml.test index 378abc3825246..7a9540c48f80d 100644 --- a/bolt/test/X86/reader-stale-yaml.test +++ b/bolt/test/X86/reader-stale-yaml.test @@ -77,10 +77,10 @@ CHECK2: pre-processing profile using YAML profile reader CHECK2: applying profile inference for "SolveCubic" CHECK2: Matched yaml block (bid = 0) with hash 4600940a609c0000 to BB (index = 0) with hash 4600940a609c0000 CHECK2-NEXT: exact match -CHECK2: Matched yaml block (bid = 1) with hash 167a1f084f130088 to BB (index = 1) with hash 167a1f084f130088 -CHECK2-NEXT: exact match CHECK2: Matched yaml block (bid = 13) with hash a8d50000f81902a7 to BB (index = 13) with hash a8d5aa43f81902a7 CHECK2-NEXT: loose match +CHECK2: Matched yaml block (bid = 1) with hash 167a1f084f130088 to BB (index = 1) with hash 167a1f084f130088 +CHECK2-NEXT: exact match CHECK2: Matched yaml block (bid = 3) with hash c516000073dc00a0 to BB (index = 3) with hash c516b1c973dc00a0 CHECK2-NEXT: loose match CHECK2: Matched yaml block (bid = 5) with hash 6446e1ea500111 to BB (index = 5) with hash 6446e1ea500111