@@ -59,6 +59,7 @@ void VirtualUnwinder::unwindCall(UnwindState &State) {
5959 // pro/epi tracker(Dwarf CFI) for the precise check.
6060 uint64_t Source = State.getCurrentLBRSource ();
6161 auto *ParentFrame = State.getParentFrame ();
62+
6263 if (ParentFrame == State.getDummyRootPtr () ||
6364 ParentFrame->Address != Source) {
6465 State.switchToFrame (Source);
@@ -121,7 +122,7 @@ void VirtualUnwinder::unwindReturn(UnwindState &State) {
121122 State.InstPtr .update (LBR.Source );
122123}
123124
124- void VirtualUnwinder::unwindBranchWithinFrame (UnwindState &State) {
125+ void VirtualUnwinder::unwindBranch (UnwindState &State) {
125126 // TODO: Tolerate tail call for now, as we may see tail call from libraries.
126127 // This is only for intra function branches, excluding tail calls.
127128 uint64_t Source = State.getCurrentLBRSource ();
@@ -219,7 +220,7 @@ void VirtualUnwinder::collectSamplesFromFrameTrie(
219220
220221void VirtualUnwinder::recordBranchCount (const LBREntry &Branch,
221222 UnwindState &State, uint64_t Repeat) {
222- if (Branch.IsArtificial )
223+ if (Branch.IsArtificial || Branch. Target == ExternalAddr )
223224 return ;
224225
225226 if (Binary->usePseudoProbes ()) {
@@ -242,21 +243,18 @@ bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) {
242243 if (!State.validateInitialState ())
243244 return false ;
244245
245- // Also do not attempt linear unwind for the leaf range as it's incomplete.
246- bool IsLeaf = true ;
247-
248246 // Now process the LBR samples in parrallel with stack sample
249247 // Note that we do not reverse the LBR entry order so we can
250248 // unwind the sample stack as we walk through LBR entries.
251249 while (State.hasNextLBR ()) {
252250 State.checkStateConsistency ();
253251
254- // Unwind implicit calls/returns from inlining, along the linear path,
255- // break into smaller sub section each with its own calling context.
256- if (!IsLeaf) {
252+ // Do not attempt linear unwind for the leaf range as it's incomplete.
253+ if (!State.IsLastLBR ()) {
254+ // Unwind implicit calls/returns from inlining, along the linear path,
255+ // break into smaller sub section each with its own calling context.
257256 unwindLinear (State, Repeat);
258257 }
259- IsLeaf = false ;
260258
261259 // Save the LBR branch before it gets unwound.
262260 const LBREntry &Branch = State.getCurrentLBR ();
@@ -271,9 +269,15 @@ bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) {
271269 // Unwind returns - check whether the IP is indeed at a return instruction
272270 unwindReturn (State);
273271 } else {
274- // Unwind branches - for regular intra function branches, we only
275- // need to record branch with context.
276- unwindBranchWithinFrame (State);
272+ // Unwind branches
273+ // For regular intra function branches, we only need to record branch with
274+ // context. For an artificial branch cross function boundaries, we got an
275+ // issue with returning to external code. Take the two LBR enties for
276+ // example: [foo:8(RETURN), ext:1] [ext:3(CALL), bar:1] After perf reader,
277+ // we only get[foo:8(RETURN), bar:1], unwinder will be confused like foo
278+ // return to bar. Here we detect and treat this case as BRANCH instead of
279+ // RETURN which only update the source address.
280+ unwindBranch (State);
277281 }
278282 State.advanceLBR ();
279283 // Record `branch` with calling context after unwinding.
@@ -432,9 +436,9 @@ void HybridPerfReader::unwindSamples() {
432436 if (Binary->useFSDiscriminator ())
433437 exitWithError (" FS discriminator is not supported in CS profile." );
434438 std::set<uint64_t > AllUntrackedCallsites;
439+ VirtualUnwinder Unwinder (&SampleCounters, Binary);
435440 for (const auto &Item : AggregatedSamples) {
436441 const PerfSample *Sample = Item.first .getPtr ();
437- VirtualUnwinder Unwinder (&SampleCounters, Binary);
438442 Unwinder.unwind (Sample, Item.second );
439443 auto &CurrUntrackedCallsites = Unwinder.getUntrackedCallsites ();
440444 AllUntrackedCallsites.insert (CurrUntrackedCallsites.begin (),
@@ -508,26 +512,32 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt,
508512 bool IsOutgoing = SrcIsInternal && !DstIsInternal;
509513 bool IsArtificial = false ;
510514
511- // Ignore branches outside the current binary. Ignore all remaining branches
512- // if there's no incoming branch before the external branch in reverse
513- // order.
515+ // Ignore branches outside the current binary.
514516 if (IsExternal) {
515- if (PrevTrDst)
516- continue ;
517- if (!LBRStack.empty ()) {
517+ if (!PrevTrDst && !LBRStack.empty ()) {
518518 WithColor::warning ()
519519 << " Invalid transfer to external code in LBR record at line "
520520 << TraceIt.getLineNumber () << " : " << TraceIt.getCurrentLine ()
521521 << " \n " ;
522522 }
523- break ;
523+ // Do not ignore the entire samples, the remaining LBR can still be
524+ // unwound using a context-less stack.
525+ continue ;
524526 }
525527
526528 if (IsOutgoing) {
527529 if (!PrevTrDst) {
528- // This is unpaired outgoing jump which is likely due to interrupt or
529- // incomplete LBR trace. Ignore current and subsequent entries since
530- // they are likely in different contexts.
530+ // This is a leading outgoing LBR, we should keep processing the LBRs.
531+ if (LBRStack.empty ()) {
532+ NumLeadingOutgoingLBR++;
533+ // Record this LBR since current source and next LBR' target is still
534+ // a valid range.
535+ LBRStack.emplace_back (LBREntry (Src, ExternalAddr, false ));
536+ continue ;
537+ }
538+ // This is middle unpaired outgoing jump which is likely due to
539+ // interrupt or incomplete LBR trace. Ignore current and subsequent
540+ // entries since they are likely in different contexts.
531541 break ;
532542 }
533543
@@ -593,9 +603,17 @@ bool PerfScriptReader::extractCallstack(TraceStream &TraceIt,
593603 }
594604 TraceIt.advance ();
595605 // Currently intermixed frame from different binaries is not supported.
596- // Ignore caller frames not from binary of interest.
597- if (!Binary->addressIsCode (FrameAddr))
598- break ;
606+ if (!Binary->addressIsCode (FrameAddr)) {
607+ if (CallStack.empty ())
608+ NumLeafExternalFrame++;
609+ // Push a special value(ExternalAddr) for the external frames so that
610+ // unwinder can still work on this with artificial Call/Return branch.
611+ // After unwinding, the context will be truncated for external frame.
612+ // Also deduplicate the consecutive external addresses.
613+ if (CallStack.empty () || CallStack.back () != ExternalAddr)
614+ CallStack.emplace_back (ExternalAddr);
615+ continue ;
616+ }
599617
600618 // We need to translate return address to call address for non-leaf frames.
601619 if (!CallStack.empty ()) {
@@ -613,6 +631,10 @@ bool PerfScriptReader::extractCallstack(TraceStream &TraceIt,
613631 CallStack.emplace_back (FrameAddr);
614632 }
615633
634+ // Strip out the bottom external addr.
635+ if (CallStack.size () > 1 && CallStack.back () == ExternalAddr)
636+ CallStack.pop_back ();
637+
616638 // Skip other unrelated line, find the next valid LBR line
617639 // Note that even for empty call stack, we should skip the address at the
618640 // bottom, otherwise the following pass may generate a truncated callstack
@@ -885,6 +907,7 @@ uint64_t PerfScriptReader::parseAggregatedCount(TraceStream &TraceIt) {
885907}
886908
887909void PerfScriptReader::parseSample (TraceStream &TraceIt) {
910+ NumTotalSample++;
888911 uint64_t Count = parseAggregatedCount (TraceIt);
889912 assert (Count >= 1 && " Aggregated count should be >= 1!" );
890913 parseSample (TraceIt, Count);
@@ -1131,6 +1154,11 @@ void PerfScriptReader::parsePerfTraces() {
11311154 // Parse perf traces and do aggregation.
11321155 parseAndAggregateTrace ();
11331156
1157+ emitWarningSummary (NumLeafExternalFrame, NumTotalSample,
1158+ " of samples have leaf external frame in call stack." );
1159+ emitWarningSummary (NumLeadingOutgoingLBR, NumTotalSample,
1160+ " of samples have leading external LBR." );
1161+
11341162 // Generate unsymbolized profile.
11351163 warnTruncatedStack ();
11361164 warnInvalidRange ();
0 commit comments