@@ -117,8 +117,8 @@ cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
117117 " The cost of increasing an unknown fall-through jump count by one." ),
118118 cl::init(3 ), cl::ReallyHidden, cl::cat(BoltOptCategory));
119119
120- cl::opt<bool > StaleMatchingWithBlockPseudoProbes (
121- " stale-matching-with-block- pseudo-probes" ,
120+ cl::opt<bool > StaleMatchingWithPseudoProbes (
121+ " stale-matching-with-pseudo-probes" ,
122122 cl::desc (" Turns on stale matching with block pseudo probes." ),
123123 cl::init(false ), cl::ReallyHidden, cl::cat(BoltOptCategory));
124124
@@ -328,7 +328,7 @@ class StaleMatcher {
328328 std::pair<const FlowBlock *, bool > matchWithPseudoProbes (
329329 const ArrayRef<yaml::bolt::PseudoProbeInfo> BlockPseudoProbes,
330330 const ArrayRef<yaml::bolt::InlineTreeInfo> InlineTree) const {
331- if (!opts::StaleMatchingWithBlockPseudoProbes )
331+ if (!opts::StaleMatchingWithPseudoProbes )
332332 return {nullptr , false };
333333
334334 DenseMap<const FlowBlock *, uint32_t > FlowBlockMatchCount;
@@ -574,7 +574,8 @@ size_t matchWeightsByHashes(
574574 BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder,
575575 const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func,
576576 HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF,
577- const BinaryFunction &BF, const yaml::bolt::PseudoProbeDesc &YamlPD) {
577+ const BinaryFunction &BF, const yaml::bolt::PseudoProbeDesc &YamlPD,
578+ const YAMLProfileReader::GUIDInlineTreeMap &TopLevelGUIDToInlineTree) {
578579
579580 assert (Func.Blocks .size () == BlockOrder.size () + 2 );
580581
@@ -605,21 +606,19 @@ size_t matchWeightsByHashes(
605606 }
606607 StaleMatcher Matcher;
607608 // Collects function pseudo probes for use in the StaleMatcher.
608- if (opts::StaleMatchingWithBlockPseudoProbes ) {
609- const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder ();
610- assert (PseudoProbeDecoder &&
609+ if (opts::StaleMatchingWithPseudoProbes ) {
610+ const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder ();
611+ assert (Decoder &&
611612 " If pseudo probes are in use, pseudo probe decoder should exist" );
612- const AddressProbesMap &ProbeMap =
613- PseudoProbeDecoder->getAddress2ProbesMap ();
613+ const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap ();
614614 const uint64_t FuncAddr = BF.getAddress ();
615615 for (const MCDecodedPseudoProbe &Probe :
616616 ProbeMap.find (FuncAddr, FuncAddr + BF.getSize ()))
617617 if (const BinaryBasicBlock *BB =
618618 BF.getBasicBlockContainingOffset (Probe.getAddress () - FuncAddr))
619619 Matcher.mapProbeToBB (&Probe, Blocks[BB->getIndex ()]);
620+
620621 // Match inline tree nodes by GUID, checksum, parent, and call site.
621- const MCDecodedPseudoProbeInlineTree *DummyInlineRoot =
622- &PseudoProbeDecoder->getDummyInlineRoot ();
623622 uint32_t ParentId = 0 ;
624623 uint32_t PrevGUIDIdx = 0 ;
625624 uint32_t Index = 0 ;
@@ -638,23 +637,24 @@ size_t matchWeightsByHashes(
638637 uint32_t InlineTreeNodeId = Index++;
639638 ParentId += InlineTreeNode.ParentIndexDelta ;
640639 uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe ;
641- const MCDecodedPseudoProbeInlineTree *ParentNode =
642- InlineTreeNodeId ? Matcher.getInlineTreeNode (ParentId)
643- : DummyInlineRoot;
644- if (!ParentNode)
645- continue ;
646- for (const MCDecodedPseudoProbeInlineTree &Child :
647- ParentNode->getChildren ()) {
648- if (Child.Guid != GUID ||
649- PseudoProbeDecoder->getFuncDescForGUID (GUID)->FuncHash != Hash)
650- continue ;
651- // Check inline site for non-toplev inline tree nodes.
652- if (ParentNode != DummyInlineRoot &&
653- std::get<1 >(Child.getInlineSite ()) != CallSiteProbe)
654- continue ;
655- Matcher.mapInlineTreeNode (InlineTreeNodeId, &Child);
656- break ;
640+ const MCDecodedPseudoProbeInlineTree *Cur = nullptr ;
641+ if (!InlineTreeNodeId) {
642+ auto It = TopLevelGUIDToInlineTree.find (GUID);
643+ if (It != TopLevelGUIDToInlineTree.end ())
644+ Cur = It->second ;
645+ } else if (const MCDecodedPseudoProbeInlineTree *Parent =
646+ Matcher.getInlineTreeNode (ParentId)) {
647+ for (const MCDecodedPseudoProbeInlineTree &Child :
648+ Parent->getChildren ()) {
649+ if (Child.Guid == GUID) {
650+ if (std::get<1 >(Child.getInlineSite ()) == CallSiteProbe)
651+ Cur = &Child;
652+ break ;
653+ }
654+ }
657655 }
656+ if (Cur && Decoder->getFuncDescForGUID (GUID)->FuncHash == Hash)
657+ Matcher.mapInlineTreeNode (InlineTreeNodeId, Cur);
658658 }
659659 }
660660 Matcher.init (Blocks, BlendedHashes, CallHashes);
@@ -1028,9 +1028,10 @@ bool YAMLProfileReader::inferStaleProfile(
10281028 FlowFunction Func = createFlowFunction (BlockOrder);
10291029
10301030 // Match as many block/jump counts from the stale profile as possible
1031- size_t MatchedBlocks = matchWeightsByHashes (
1032- BF.getBinaryContext (), BlockOrder, YamlBF, Func,
1033- YamlBP.Header .HashFunction , IdToYamLBF, BF, YamlBP.PseudoProbeDesc );
1031+ size_t MatchedBlocks =
1032+ matchWeightsByHashes (BF.getBinaryContext (), BlockOrder, YamlBF, Func,
1033+ YamlBP.Header .HashFunction , IdToYamLBF, BF,
1034+ YamlBP.PseudoProbeDesc , TopLevelGUIDToInlineTree);
10341035
10351036 // Adjust the flow function by marking unreachable blocks Unlikely so that
10361037 // they don't get any counts assigned.
0 commit comments