Skip to content

Commit cf11ce0

Browse files
author
joaosaffran
committed
Merge branch 'main' into metadata/static-samplers
2 parents 2507ea3 + b5d5708 commit cf11ce0

File tree

439 files changed

+14804
-7069
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

439 files changed

+14804
-7069
lines changed

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ class DataAggregator : public DataReader {
8585
};
8686
friend raw_ostream &operator<<(raw_ostream &OS, const LBREntry &);
8787

88+
friend struct PerfSpeEventsTestHelper;
89+
8890
struct PerfBranchSample {
8991
SmallVector<LBREntry, 32> LBR;
9092
};
@@ -99,16 +101,17 @@ class DataAggregator : public DataReader {
99101
uint64_t Addr;
100102
};
101103

102-
/// Container for the unit of branch data.
103-
/// Backwards compatible with legacy use for branches and fall-throughs:
104-
/// - if \p Branch is FT_ONLY or FT_EXTERNAL_ORIGIN, the trace only
105-
/// contains fall-through data,
106-
/// - if \p To is BR_ONLY, the trace only contains branch data.
104+
/// Container for the unit of branch data, matching pre-aggregated trace type.
105+
/// Backwards compatible with branch and fall-through types:
106+
/// - if \p To is < 0, the trace only contains branch data (BR_ONLY),
107+
/// - if \p Branch is < 0, the trace only contains fall-through data
108+
/// (FT_ONLY, FT_EXTERNAL_ORIGIN, or FT_EXTERNAL_RETURN).
107109
struct Trace {
108110
static constexpr const uint64_t EXTERNAL = 0ULL;
109111
static constexpr const uint64_t BR_ONLY = -1ULL;
110112
static constexpr const uint64_t FT_ONLY = -1ULL;
111113
static constexpr const uint64_t FT_EXTERNAL_ORIGIN = -2ULL;
114+
static constexpr const uint64_t FT_EXTERNAL_RETURN = -3ULL;
112115

113116
uint64_t Branch;
114117
uint64_t From;
@@ -388,9 +391,9 @@ class DataAggregator : public DataReader {
388391
/// File format syntax:
389392
/// E <event>
390393
/// S <start> <count>
391-
/// T <start> <end> <ft_end> <count>
394+
/// [TR] <start> <end> <ft_end> <count>
392395
/// B <start> <end> <count> <mispred_count>
393-
/// [Ff] <start> <end> <count>
396+
/// [Ffr] <start> <end> <count>
394397
///
395398
/// where <start>, <end>, <ft_end> have the format [<id>:]<offset>
396399
///
@@ -401,8 +404,11 @@ class DataAggregator : public DataReader {
401404
/// f - an aggregated fall-through with external origin - used to disambiguate
402405
/// between a return hitting a basic block head and a regular internal
403406
/// jump to the block
407+
/// r - an aggregated fall-through originating at an external return, no
408+
/// checks are performed for a fallthrough start
404409
/// T - an aggregated trace: branch from <start> to <end> with a fall-through
405410
/// to <ft_end>
411+
/// R - an aggregated trace originating at a return
406412
///
407413
/// <id> - build id of the object containing the address. We can skip it for
408414
/// the main binary and use "X" for an unknown object. This will save some
@@ -530,7 +536,12 @@ inline raw_ostream &operator<<(raw_ostream &OS,
530536
const DataAggregator::Trace &T) {
531537
switch (T.Branch) {
532538
case DataAggregator::Trace::FT_ONLY:
539+
break;
533540
case DataAggregator::Trace::FT_EXTERNAL_ORIGIN:
541+
OS << "X:0 -> ";
542+
break;
543+
case DataAggregator::Trace::FT_EXTERNAL_RETURN:
544+
OS << "X:R -> ";
534545
break;
535546
default:
536547
OS << Twine::utohexstr(T.Branch) << " -> ";

bolt/include/bolt/Utils/CommandLineOpts.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ extern llvm::cl::OptionCategory BinaryAnalysisCategory;
4848
extern llvm::cl::opt<unsigned> AlignText;
4949
extern llvm::cl::opt<unsigned> AlignFunctions;
5050
extern llvm::cl::opt<bool> AggregateOnly;
51+
extern llvm::cl::opt<bool> ArmSPE;
5152
extern llvm::cl::opt<unsigned> BucketsPerLine;
5253
extern llvm::cl::opt<bool> CompactCodeModel;
5354
extern llvm::cl::opt<bool> DiffOnly;

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 85 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ static cl::opt<bool>
4949
cl::desc("aggregate basic samples (without LBR info)"),
5050
cl::cat(AggregatorCategory));
5151

52+
cl::opt<bool> ArmSPE("spe", cl::desc("Enable Arm SPE mode."),
53+
cl::cat(AggregatorCategory));
54+
5255
static cl::opt<std::string>
5356
ITraceAggregation("itrace",
5457
cl::desc("Generate LBR info with perf itrace argument"),
@@ -181,11 +184,21 @@ void DataAggregator::start() {
181184

182185
findPerfExecutable();
183186

187+
if (opts::ArmSPE) {
188+
// pid from_ip to_ip flags
189+
// where flags could be:
190+
// P/M: whether branch was Predicted or Mispredicted.
191+
// N: optionally appears when the branch was Not-Taken (ie fall-through)
192+
// 12345 0x123/0x456/PN/-/-/8/RET/-
193+
opts::ITraceAggregation = "bl";
194+
opts::ParseMemProfile = true;
195+
opts::BasicAggregation = false;
196+
}
197+
184198
if (opts::BasicAggregation) {
185-
launchPerfProcess("events without LBR",
186-
MainEventsPPI,
199+
launchPerfProcess("events without LBR", MainEventsPPI,
187200
"script -F pid,event,ip",
188-
/*Wait = */false);
201+
/*Wait = */ false);
189202
} else if (!opts::ITraceAggregation.empty()) {
190203
// Disable parsing memory profile from trace data, unless requested by user.
191204
if (!opts::ParseMemProfile.getNumOccurrences())
@@ -524,8 +537,7 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
524537

525538
heatmap:
526539
// Sort parsed traces for faster processing.
527-
if (!opts::BasicAggregation)
528-
llvm::sort(Traces, llvm::less_first());
540+
llvm::sort(Traces, llvm::less_first());
529541

530542
if (!opts::HeatmapMode)
531543
return Error::success();
@@ -870,13 +882,9 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, const Trace &Trace,
870882

871883
// Adjust FromBB if the first LBR is a return from the last instruction in
872884
// the previous block (that instruction should be a call).
873-
if (IsReturn) {
874-
if (From)
875-
FromBB = BF.getBasicBlockContainingOffset(From - 1);
876-
else
877-
LLVM_DEBUG(dbgs() << "return to the function start: " << Trace << '\n');
878-
} else if (Trace.Branch == Trace::EXTERNAL && From == FromBB->getOffset() &&
879-
!FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
885+
if (Trace.Branch != Trace::FT_ONLY && !BF.containsAddress(Trace.Branch) &&
886+
From == FromBB->getOffset() &&
887+
(IsReturn ? From : !(FromBB->isEntryPoint() || FromBB->isLandingPad()))) {
880888
const BinaryBasicBlock *PrevBB =
881889
BF.getLayout().getBlock(FromBB->getIndex() - 1);
882890
if (PrevBB->getSuccessor(FromBB->getLabel())) {
@@ -994,9 +1002,22 @@ ErrorOr<DataAggregator::LBREntry> DataAggregator::parseLBREntry() {
9941002
if (std::error_code EC = MispredStrRes.getError())
9951003
return EC;
9961004
StringRef MispredStr = MispredStrRes.get();
997-
if (MispredStr.size() != 1 ||
998-
(MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
999-
reportError("expected single char for mispred bit");
1005+
// SPE brstack mispredicted flags might be up to two characters long:
1006+
// 'PN' or 'MN'. Where 'N' optionally appears.
1007+
bool ValidStrSize = opts::ArmSPE
1008+
? MispredStr.size() >= 1 && MispredStr.size() <= 2
1009+
: MispredStr.size() == 1;
1010+
bool SpeTakenBitErr =
1011+
(opts::ArmSPE && MispredStr.size() == 2 && MispredStr[1] != 'N');
1012+
bool PredictionBitErr =
1013+
!ValidStrSize ||
1014+
(MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-');
1015+
if (SpeTakenBitErr)
1016+
reportError("expected 'N' as SPE prediction bit for a not-taken branch");
1017+
if (PredictionBitErr)
1018+
reportError("expected 'P', 'M' or '-' char as a prediction bit");
1019+
1020+
if (SpeTakenBitErr || PredictionBitErr) {
10001021
Diag << "Found: " << MispredStr << "\n";
10011022
return make_error_code(llvm::errc::io_error);
10021023
}
@@ -1202,12 +1223,14 @@ ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
12021223
std::error_code DataAggregator::parseAggregatedLBREntry() {
12031224
enum AggregatedLBREntry : char {
12041225
INVALID = 0,
1205-
EVENT_NAME, // E
1206-
TRACE, // T
1207-
SAMPLE, // S
1208-
BRANCH, // B
1209-
FT, // F
1210-
FT_EXTERNAL_ORIGIN // f
1226+
EVENT_NAME, // E
1227+
TRACE, // T
1228+
RETURN, // R
1229+
SAMPLE, // S
1230+
BRANCH, // B
1231+
FT, // F
1232+
FT_EXTERNAL_ORIGIN, // f
1233+
FT_EXTERNAL_RETURN // r
12111234
} Type = INVALID;
12121235

12131236
/// The number of fields to parse, set based on \p Type.
@@ -1235,20 +1258,22 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
12351258

12361259
Type = StringSwitch<AggregatedLBREntry>(Str)
12371260
.Case("T", TRACE)
1261+
.Case("R", RETURN)
12381262
.Case("S", SAMPLE)
12391263
.Case("E", EVENT_NAME)
12401264
.Case("B", BRANCH)
12411265
.Case("F", FT)
12421266
.Case("f", FT_EXTERNAL_ORIGIN)
1267+
.Case("r", FT_EXTERNAL_RETURN)
12431268
.Default(INVALID);
12441269

12451270
if (Type == INVALID) {
1246-
reportError("expected T, S, E, B, F or f");
1271+
reportError("expected T, R, S, E, B, F, f or r");
12471272
return make_error_code(llvm::errc::io_error);
12481273
}
12491274

12501275
using SSI = StringSwitch<int>;
1251-
AddrNum = SSI(Str).Case("T", 3).Case("S", 1).Case("E", 0).Default(2);
1276+
AddrNum = SSI(Str).Cases("T", "R", 3).Case("S", 1).Case("E", 0).Default(2);
12521277
CounterNum = SSI(Str).Case("B", 2).Case("E", 0).Default(1);
12531278
}
12541279

@@ -1305,17 +1330,30 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
13051330
if (ToFunc)
13061331
ToFunc->setHasProfileAvailable();
13071332

1308-
/// For legacy fall-through types, adjust locations to match Trace container.
1309-
if (Type == FT || Type == FT_EXTERNAL_ORIGIN) {
1333+
/// For fall-through types, adjust locations to match Trace container.
1334+
if (Type == FT || Type == FT_EXTERNAL_ORIGIN || Type == FT_EXTERNAL_RETURN) {
13101335
Addr[2] = Location(Addr[1]->Offset); // Trace To
13111336
Addr[1] = Location(Addr[0]->Offset); // Trace From
1312-
// Put a magic value into Trace Branch to differentiate from a full trace.
1313-
Addr[0] = Location(Type == FT ? Trace::FT_ONLY : Trace::FT_EXTERNAL_ORIGIN);
1337+
// Put a magic value into Trace Branch to differentiate from a full trace:
1338+
if (Type == FT)
1339+
Addr[0] = Location(Trace::FT_ONLY);
1340+
else if (Type == FT_EXTERNAL_ORIGIN)
1341+
Addr[0] = Location(Trace::FT_EXTERNAL_ORIGIN);
1342+
else if (Type == FT_EXTERNAL_RETURN)
1343+
Addr[0] = Location(Trace::FT_EXTERNAL_RETURN);
1344+
else
1345+
llvm_unreachable("Unexpected fall-through type");
13141346
}
13151347

1316-
/// For legacy branch type, mark Trace To to differentite from a full trace.
1317-
if (Type == BRANCH) {
1348+
/// For branch type, mark Trace To to differentiate from a full trace.
1349+
if (Type == BRANCH)
13181350
Addr[2] = Location(Trace::BR_ONLY);
1351+
1352+
if (Type == RETURN) {
1353+
if (!Addr[0]->Offset)
1354+
Addr[0]->Offset = Trace::FT_EXTERNAL_RETURN;
1355+
else
1356+
Returns.emplace(Addr[0]->Offset);
13191357
}
13201358

13211359
/// Record a trace.
@@ -1497,7 +1535,9 @@ void DataAggregator::printBranchStacksDiagnostics(
14971535
}
14981536

14991537
std::error_code DataAggregator::parseBranchEvents() {
1500-
outs() << "PERF2BOLT: parse branch events...\n";
1538+
std::string BranchEventTypeStr =
1539+
opts::ArmSPE ? "SPE branch events in LBR-format" : "branch events";
1540+
outs() << "PERF2BOLT: parse " << BranchEventTypeStr << "...\n";
15011541
NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
15021542
TimerGroupDesc, opts::TimeAggregator);
15031543

@@ -1525,7 +1565,8 @@ std::error_code DataAggregator::parseBranchEvents() {
15251565
}
15261566

15271567
NumEntries += Sample.LBR.size();
1528-
if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1568+
if (this->BC->isX86() && BAT && Sample.LBR.size() == 32 &&
1569+
!NeedsSkylakeFix) {
15291570
errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
15301571
NeedsSkylakeFix = true;
15311572
}
@@ -1548,10 +1589,18 @@ std::error_code DataAggregator::parseBranchEvents() {
15481589
if (NumSamples && NumSamplesNoLBR == NumSamples) {
15491590
// Note: we don't know if perf2bolt is being used to parse memory samples
15501591
// at this point. In this case, it is OK to parse zero LBRs.
1551-
errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1552-
"LBR. Record profile with perf record -j any or run perf2bolt "
1553-
"in no-LBR mode with -nl (the performance improvement in -nl "
1554-
"mode may be limited)\n";
1592+
if (!opts::ArmSPE)
1593+
errs()
1594+
<< "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1595+
"LBR. Record profile with perf record -j any or run perf2bolt "
1596+
"in no-LBR mode with -nl (the performance improvement in -nl "
1597+
"mode may be limited)\n";
1598+
else
1599+
errs()
1600+
<< "PERF2BOLT-WARNING: All recorded samples for this binary lack "
1601+
"SPE brstack entries. Make sure you are running Linux perf 6.14 "
1602+
"or later, otherwise you get zero samples. Record the profile "
1603+
"with: perf record -e 'arm_spe_0/branch_filter=1/'.";
15551604
} else {
15561605
printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
15571606
}
@@ -1565,6 +1614,7 @@ void DataAggregator::processBranchEvents() {
15651614
NamedRegionTimer T("processBranch", "Processing branch events",
15661615
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
15671616

1617+
Returns.emplace(Trace::FT_EXTERNAL_RETURN);
15681618
for (const auto &[Trace, Info] : Traces) {
15691619
bool IsReturn = checkReturn(Trace.Branch);
15701620
// Ignore returns.

bolt/lib/Rewrite/LinuxKernelRewriter.cpp

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -432,25 +432,33 @@ class LinuxKernelRewriter final : public MetadataRewriter {
432432
};
433433

434434
Error LinuxKernelRewriter::detectLinuxKernelVersion() {
435-
if (BinaryData *BD = BC.getBinaryDataByName("linux_banner")) {
436-
const BinarySection &Section = BD->getSection();
437-
const std::string S =
438-
Section.getContents().substr(BD->getOffset(), BD->getSize()).str();
439-
440-
const std::regex Re(R"---(Linux version ((\d+)\.(\d+)(\.(\d+))?))---");
441-
std::smatch Match;
442-
if (std::regex_search(S, Match, Re)) {
443-
const unsigned Major = std::stoi(Match[2].str());
444-
const unsigned Minor = std::stoi(Match[3].str());
445-
const unsigned Rev = Match[5].matched ? std::stoi(Match[5].str()) : 0;
446-
LinuxKernelVersion = LKVersion(Major, Minor, Rev);
447-
BC.outs() << "BOLT-INFO: Linux kernel version is " << Match[1].str()
448-
<< "\n";
449-
return Error::success();
450-
}
435+
// Check for global and local linux_banner symbol.
436+
BinaryData *BD = BC.getBinaryDataByName("linux_banner");
437+
if (!BD)
438+
BD = BC.getBinaryDataByName("linux_banner/1");
439+
440+
if (!BD)
441+
return createStringError(errc::executable_format_error,
442+
"unable to locate linux_banner");
443+
444+
const BinarySection &Section = BD->getSection();
445+
const std::string S =
446+
Section.getContents().substr(BD->getOffset(), BD->getSize()).str();
447+
448+
const std::regex Re(R"---(Linux version ((\d+)\.(\d+)(\.(\d+))?))---");
449+
std::smatch Match;
450+
if (std::regex_search(S, Match, Re)) {
451+
const unsigned Major = std::stoi(Match[2].str());
452+
const unsigned Minor = std::stoi(Match[3].str());
453+
const unsigned Rev = Match[5].matched ? std::stoi(Match[5].str()) : 0;
454+
LinuxKernelVersion = LKVersion(Major, Minor, Rev);
455+
BC.outs() << "BOLT-INFO: Linux kernel version is " << Match[1].str()
456+
<< "\n";
457+
return Error::success();
451458
}
459+
452460
return createStringError(errc::executable_format_error,
453-
"Linux kernel version is unknown");
461+
"Linux kernel version is unknown: " + S);
454462
}
455463

456464
void LinuxKernelRewriter::processLKSections() {

bolt/test/X86/callcont-fallthru.s

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
# RUN: link_fdata %s %t %t.pa-ret PREAGG-RET
1111
# Trace from an external location to a landing pad/entry point call continuation
1212
# RUN: link_fdata %s %t %t.pa-ext PREAGG-EXT
13+
# Return trace to a landing pad/entry point call continuation
14+
# RUN: link_fdata %s %t %t.pa-pret PREAGG-PRET
15+
# External return to a landing pad/entry point call continuation
16+
# RUN: link_fdata %s %t %t.pa-eret PREAGG-ERET
1317
# RUN-DISABLED: link_fdata %s %t %t.pa-plt PREAGG-PLT
1418

1519
# RUN: llvm-strip --strip-unneeded %t -o %t.strip
@@ -38,6 +42,21 @@
3842
# RUN: llvm-bolt %t.strip --pa -p %t.pa-ext -o %t.out \
3943
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-SKIP
4044

45+
## Check pre-aggregated return traces from external location attach call
46+
## continuation fallthrough count to secondary entry point (unstripped)
47+
# RUN: llvm-bolt %t --pa -p %t.pa-pret -o %t.out \
48+
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH
49+
## Check pre-aggregated return traces from external location attach call
50+
## continuation fallthrough count to landing pad (stripped, landing pad)
51+
# RUN: llvm-bolt %t.strip --pa -p %t.pa-pret -o %t.out \
52+
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH
53+
54+
## Same for external return type
55+
# RUN: llvm-bolt %t --pa -p %t.pa-eret -o %t.out \
56+
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH
57+
# RUN: llvm-bolt %t.strip --pa -p %t.pa-eret -o %t.out \
58+
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH
59+
4160
## Check pre-aggregated traces don't report zero-sized PLT fall-through as
4261
## invalid trace
4362
# RUN-DISABLED: llvm-bolt %t.strip --pa -p %t.pa-plt -o %t.out | FileCheck %s \
@@ -92,6 +111,10 @@ Ltmp4_br:
92111
# PREAGG-RET: T #Lfoo_ret# #Ltmp3# #Ltmp3_br# 1
93112
## Target is a secondary entry point (unstripped) or a landing pad (stripped)
94113
# PREAGG-EXT: T X:0 #Ltmp3# #Ltmp3_br# 1
114+
## Pre-aggregated return trace
115+
# PREAGG-PRET: R X:0 #Ltmp3# #Ltmp3_br# 1
116+
## External return
117+
# PREAGG-ERET: r #Ltmp3# #Ltmp3_br# 1
95118

96119
# CHECK-ATTACH: callq foo
97120
# CHECK-ATTACH-NEXT: count: 1

0 commit comments

Comments
 (0)