diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 96969cf53baca..f70771f01dfff 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -99,16 +99,17 @@ class DataAggregator : public DataReader { uint64_t Addr; }; - /// Container for the unit of branch data. - /// Backwards compatible with legacy use for branches and fall-throughs: - /// - if \p Branch is FT_ONLY or FT_EXTERNAL_ORIGIN, the trace only - /// contains fall-through data, - /// - if \p To is BR_ONLY, the trace only contains branch data. + /// Container for the unit of branch data, matching pre-aggregated trace type. + /// Backwards compatible with branch and fall-through types: + /// - if \p To is < 0, the trace only contains branch data (BR_ONLY), + /// - if \p Branch is < 0, the trace only contains fall-through data + /// (FT_ONLY, FT_EXTERNAL_ORIGIN, or FT_EXTERNAL_RETURN). struct Trace { static constexpr const uint64_t EXTERNAL = 0ULL; static constexpr const uint64_t BR_ONLY = -1ULL; static constexpr const uint64_t FT_ONLY = -1ULL; static constexpr const uint64_t FT_EXTERNAL_ORIGIN = -2ULL; + static constexpr const uint64_t FT_EXTERNAL_RETURN = -3ULL; uint64_t Branch; uint64_t From; @@ -388,9 +389,9 @@ class DataAggregator : public DataReader { /// File format syntax: /// E /// S - /// T + /// [TR] /// B - /// [Ff] + /// [Ffr] /// /// where , , have the format [:] /// @@ -401,8 +402,11 @@ class DataAggregator : public DataReader { /// f - an aggregated fall-through with external origin - used to disambiguate /// between a return hitting a basic block head and a regular internal /// jump to the block + /// r - an aggregated fall-through originating at an external return, no + /// checks are performed for a fallthrough start /// T - an aggregated trace: branch from to with a fall-through /// to + /// R - an aggregated trace originating at a return /// /// - build id of the object containing the address. We can skip it for /// the main binary and use "X" for an unknown object. This will save some @@ -530,7 +534,12 @@ inline raw_ostream &operator<<(raw_ostream &OS, const DataAggregator::Trace &T) { switch (T.Branch) { case DataAggregator::Trace::FT_ONLY: + break; case DataAggregator::Trace::FT_EXTERNAL_ORIGIN: + OS << "X:0 -> "; + break; + case DataAggregator::Trace::FT_EXTERNAL_RETURN: + OS << "X:R -> "; break; default: OS << Twine::utohexstr(T.Branch) << " -> "; diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 178c9d3a63730..1c94a2569ea08 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -524,8 +524,7 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { heatmap: // Sort parsed traces for faster processing. - if (!opts::BasicAggregation) - llvm::sort(Traces, llvm::less_first()); + llvm::sort(Traces, llvm::less_first()); if (!opts::HeatmapMode) return Error::success(); @@ -870,13 +869,9 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, const Trace &Trace, // Adjust FromBB if the first LBR is a return from the last instruction in // the previous block (that instruction should be a call). - if (IsReturn) { - if (From) - FromBB = BF.getBasicBlockContainingOffset(From - 1); - else - LLVM_DEBUG(dbgs() << "return to the function start: " << Trace << '\n'); - } else if (Trace.Branch == Trace::EXTERNAL && From == FromBB->getOffset() && - !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { + if (Trace.Branch != Trace::FT_ONLY && !BF.containsAddress(Trace.Branch) && + From == FromBB->getOffset() && + (IsReturn ? From : !(FromBB->isEntryPoint() || FromBB->isLandingPad()))) { const BinaryBasicBlock *PrevBB = BF.getLayout().getBlock(FromBB->getIndex() - 1); if (PrevBB->getSuccessor(FromBB->getLabel())) { @@ -1202,12 +1197,14 @@ ErrorOr DataAggregator::parseLocationOrOffset() { std::error_code DataAggregator::parseAggregatedLBREntry() { enum AggregatedLBREntry : char { INVALID = 0, - EVENT_NAME, // E - TRACE, // T - SAMPLE, // S - BRANCH, // B - FT, // F - FT_EXTERNAL_ORIGIN // f + EVENT_NAME, // E + TRACE, // T + RETURN, // R + SAMPLE, // S + BRANCH, // B + FT, // F + FT_EXTERNAL_ORIGIN, // f + FT_EXTERNAL_RETURN // r } Type = INVALID; /// The number of fields to parse, set based on \p Type. @@ -1235,20 +1232,22 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { Type = StringSwitch(Str) .Case("T", TRACE) + .Case("R", RETURN) .Case("S", SAMPLE) .Case("E", EVENT_NAME) .Case("B", BRANCH) .Case("F", FT) .Case("f", FT_EXTERNAL_ORIGIN) + .Case("r", FT_EXTERNAL_RETURN) .Default(INVALID); if (Type == INVALID) { - reportError("expected T, S, E, B, F or f"); + reportError("expected T, R, S, E, B, F, f or r"); return make_error_code(llvm::errc::io_error); } using SSI = StringSwitch; - AddrNum = SSI(Str).Case("T", 3).Case("S", 1).Case("E", 0).Default(2); + AddrNum = SSI(Str).Cases("T", "R", 3).Case("S", 1).Case("E", 0).Default(2); CounterNum = SSI(Str).Case("B", 2).Case("E", 0).Default(1); } @@ -1305,17 +1304,30 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { if (ToFunc) ToFunc->setHasProfileAvailable(); - /// For legacy fall-through types, adjust locations to match Trace container. - if (Type == FT || Type == FT_EXTERNAL_ORIGIN) { + /// For fall-through types, adjust locations to match Trace container. + if (Type == FT || Type == FT_EXTERNAL_ORIGIN || Type == FT_EXTERNAL_RETURN) { Addr[2] = Location(Addr[1]->Offset); // Trace To Addr[1] = Location(Addr[0]->Offset); // Trace From - // Put a magic value into Trace Branch to differentiate from a full trace. - Addr[0] = Location(Type == FT ? Trace::FT_ONLY : Trace::FT_EXTERNAL_ORIGIN); + // Put a magic value into Trace Branch to differentiate from a full trace: + if (Type == FT) + Addr[0] = Location(Trace::FT_ONLY); + else if (Type == FT_EXTERNAL_ORIGIN) + Addr[0] = Location(Trace::FT_EXTERNAL_ORIGIN); + else if (Type == FT_EXTERNAL_RETURN) + Addr[0] = Location(Trace::FT_EXTERNAL_RETURN); + else + llvm_unreachable("Unexpected fall-through type"); } - /// For legacy branch type, mark Trace To to differentite from a full trace. - if (Type == BRANCH) { + /// For branch type, mark Trace To to differentiate from a full trace. + if (Type == BRANCH) Addr[2] = Location(Trace::BR_ONLY); + + if (Type == RETURN) { + if (!Addr[0]->Offset) + Addr[0]->Offset = Trace::FT_EXTERNAL_RETURN; + else + Returns.emplace(Addr[0]->Offset); } /// Record a trace. @@ -1565,6 +1577,7 @@ void DataAggregator::processBranchEvents() { NamedRegionTimer T("processBranch", "Processing branch events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); + Returns.emplace(Trace::FT_EXTERNAL_RETURN); for (const auto &[Trace, Info] : Traces) { bool IsReturn = checkReturn(Trace.Branch); // Ignore returns. diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s index c2ef024db9475..8c05491e7bca0 100644 --- a/bolt/test/X86/callcont-fallthru.s +++ b/bolt/test/X86/callcont-fallthru.s @@ -10,6 +10,10 @@ # RUN: link_fdata %s %t %t.pa-ret PREAGG-RET # Trace from an external location to a landing pad/entry point call continuation # RUN: link_fdata %s %t %t.pa-ext PREAGG-EXT +# Return trace to a landing pad/entry point call continuation +# RUN: link_fdata %s %t %t.pa-pret PREAGG-PRET +# External return to a landing pad/entry point call continuation +# RUN: link_fdata %s %t %t.pa-eret PREAGG-ERET # RUN-DISABLED: link_fdata %s %t %t.pa-plt PREAGG-PLT # RUN: llvm-strip --strip-unneeded %t -o %t.strip @@ -38,6 +42,21 @@ # RUN: llvm-bolt %t.strip --pa -p %t.pa-ext -o %t.out \ # RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-SKIP +## Check pre-aggregated return traces from external location attach call +## continuation fallthrough count to secondary entry point (unstripped) +# RUN: llvm-bolt %t --pa -p %t.pa-pret -o %t.out \ +# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH +## Check pre-aggregated return traces from external location attach call +## continuation fallthrough count to landing pad (stripped, landing pad) +# RUN: llvm-bolt %t.strip --pa -p %t.pa-pret -o %t.out \ +# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH + +## Same for external return type +# RUN: llvm-bolt %t --pa -p %t.pa-eret -o %t.out \ +# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH +# RUN: llvm-bolt %t.strip --pa -p %t.pa-eret -o %t.out \ +# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH + ## Check pre-aggregated traces don't report zero-sized PLT fall-through as ## invalid trace # RUN-DISABLED: llvm-bolt %t.strip --pa -p %t.pa-plt -o %t.out | FileCheck %s \ @@ -92,6 +111,10 @@ Ltmp4_br: # PREAGG-RET: T #Lfoo_ret# #Ltmp3# #Ltmp3_br# 1 ## Target is a secondary entry point (unstripped) or a landing pad (stripped) # PREAGG-EXT: T X:0 #Ltmp3# #Ltmp3_br# 1 +## Pre-aggregated return trace +# PREAGG-PRET: R X:0 #Ltmp3# #Ltmp3_br# 1 +## External return +# PREAGG-ERET: r #Ltmp3# #Ltmp3_br# 1 # CHECK-ATTACH: callq foo # CHECK-ATTACH-NEXT: count: 1 diff --git a/bolt/test/link_fdata.py b/bolt/test/link_fdata.py index 5a9752068bb9f..898dce8e3fb5f 100755 --- a/bolt/test/link_fdata.py +++ b/bolt/test/link_fdata.py @@ -36,9 +36,9 @@ fdata_pat = re.compile(r"([01].*) (?P\d+) (?P\d+)") # Pre-aggregated profile: -# {T|S|E|B|F|f} [] [] [] +# {T|R|S|E|B|F|f|r} [] [] [] # : [:] -preagg_pat = re.compile(r"(?P[TSBFf]) (?P.*)") +preagg_pat = re.compile(r"(?P[TRSBFfr]) (?P.*)") # No-LBR profile: #