@@ -77,6 +77,11 @@ FilterPID("pid",
7777 cl::Optional,
7878 cl::cat(AggregatorCategory));
7979
80+ static cl::opt<bool > ImputeTraceFallthrough (
81+ " impute-trace-fall-through" ,
82+ cl::desc (" impute missing fall-throughs for branch-only traces" ),
83+ cl::Optional, cl::cat(AggregatorCategory));
84+
8085static cl::opt<bool >
8186IgnoreBuildID (" ignore-build-id" ,
8287 cl::desc (" continue even if build-ids in input binary and perf.data mismatch" ),
@@ -513,6 +518,69 @@ void DataAggregator::parsePerfData(BinaryContext &BC) {
513518 deleteTempFiles ();
514519}
515520
521+ void DataAggregator::imputeFallThroughs () {
522+ if (Traces.empty ())
523+ return ;
524+
525+ std::pair PrevBranch (Trace::EXTERNAL, Trace::EXTERNAL);
526+ uint64_t AggregateCount = 0 ;
527+ uint64_t AggregateFallthroughSize = 0 ;
528+ uint64_t InferredTraces = 0 ;
529+
530+ // Helper map with whether the instruction is a call/ret/unconditional branch
531+ std::unordered_map<uint64_t , bool > IsUncondCTMap;
532+ auto checkUnconditionalControlTransfer = [&](const uint64_t Addr) {
533+ auto isUncondCT = [&](const MCInst &MI) -> bool {
534+ return BC->MIB ->isUnconditionalControlTransfer (MI);
535+ };
536+ return testAndSet<bool >(Addr, isUncondCT, IsUncondCTMap).value_or (true );
537+ };
538+
539+ // Traces are sorted by their component addresses (Branch, From, To).
540+ // assert(is_sorted(Traces));
541+
542+ // Traces corresponding to the top-of-stack branch entry with a missing
543+ // fall-through have BR_ONLY(-1ULL/UINT64_MAX) in To field, meaning that for
544+ // fixed values of Branch and From branch-only traces are stored after all
545+ // traces with valid fall-through.
546+ //
547+ // Group traces by (Branch, From) and compute weighted average fall-through
548+ // length for the top-of-stack trace (closing the group) by accumulating the
549+ // fall-through lengths of traces with valid fall-throughs earlier in the
550+ // group.
551+ for (auto &[Trace, Info] : Traces) {
552+ // Skip fall-throughs in external code.
553+ if (Trace.From == Trace::EXTERNAL)
554+ continue ;
555+ std::pair CurrentBranch (Trace.Branch , Trace.From );
556+ // BR_ONLY must be the last trace in the group
557+ if (Trace.To == Trace::BR_ONLY) {
558+ // If the group is not empty, use aggregate values, otherwise 0-length
559+ // for unconditional jumps (call/ret/uncond branch) or 1-length for others
560+ uint64_t InferredBytes =
561+ PrevBranch == CurrentBranch
562+ ? AggregateFallthroughSize / AggregateCount
563+ : !checkUnconditionalControlTransfer (Trace.From );
564+ Trace.To = Trace.From + InferredBytes;
565+ LLVM_DEBUG (dbgs () << " imputed " << Trace << " (" << InferredBytes
566+ << " bytes)\n " );
567+ ++InferredTraces;
568+ } else {
569+ // Trace with a valid fall-through
570+ // New group: reset aggregates.
571+ if (CurrentBranch != PrevBranch)
572+ AggregateCount = AggregateFallthroughSize = 0 ;
573+ // Only use valid fall-through lengths
574+ if (Trace.To != Trace::EXTERNAL)
575+ AggregateFallthroughSize += (Trace.To - Trace.From ) * Info.TakenCount ;
576+ AggregateCount += Info.TakenCount ;
577+ }
578+ PrevBranch = CurrentBranch;
579+ }
580+ if (opts::Verbosity >= 1 )
581+ outs () << " BOLT-INFO: imputed " << InferredTraces << " traces\n " ;
582+ }
583+
516584Error DataAggregator::preprocessProfile (BinaryContext &BC) {
517585 this ->BC = &BC;
518586
@@ -525,6 +593,9 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
525593 // Sort parsed traces for faster processing.
526594 llvm::sort (Traces, llvm::less_first ());
527595
596+ if (opts::ImputeTraceFallthrough)
597+ imputeFallThroughs ();
598+
528599 if (opts::HeatmapMode) {
529600 if (std::error_code EC = printLBRHeatMap ())
530601 return errorCodeToError (EC);
@@ -726,22 +797,10 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
726797}
727798
728799bool DataAggregator::checkReturn (uint64_t Addr) {
729- auto isReturn = [&](auto MI) { return MI && BC->MIB ->isReturn (*MI); };
730- if (llvm::is_contained (Returns, Addr))
731- return true ;
732-
733- BinaryFunction *Func = getBinaryFunctionContainingAddress (Addr);
734- if (!Func)
735- return false ;
736-
737- const uint64_t Offset = Addr - Func->getAddress ();
738- if (Func->hasInstructions ()
739- ? isReturn (Func->getInstructionAtOffset (Offset))
740- : isReturn (Func->disassembleInstructionAtOffset (Offset))) {
741- Returns.emplace (Addr);
742- return true ;
743- }
744- return false ;
800+ auto isReturn = [&](const MCInst &MI) -> bool {
801+ return BC->MIB ->isReturn (MI);
802+ };
803+ return testAndSet<bool >(Addr, isReturn, Returns).value_or (false );
745804}
746805
747806bool DataAggregator::doBranch (uint64_t From, uint64_t To, uint64_t Count,
@@ -1331,7 +1390,7 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
13311390 if (!Addr[0 ]->Offset )
13321391 Addr[0 ]->Offset = Trace::FT_EXTERNAL_RETURN;
13331392 else
1334- Returns.emplace (Addr[0 ]->Offset );
1393+ Returns.emplace (Addr[0 ]->Offset , true );
13351394 }
13361395
13371396 // / Record a trace.
@@ -1592,7 +1651,7 @@ void DataAggregator::processBranchEvents() {
15921651 NamedRegionTimer T (" processBranch" , " Processing branch events" ,
15931652 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
15941653
1595- Returns.emplace (Trace::FT_EXTERNAL_RETURN);
1654+ Returns.emplace (Trace::FT_EXTERNAL_RETURN, true );
15961655 for (const auto &[Trace, Info] : Traces) {
15971656 bool IsReturn = checkReturn (Trace.Branch );
15981657 // Ignore returns.
0 commit comments