Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,14 +223,16 @@ class DataAggregator : public DataReader {
bool recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
uint64_t Count = 1) const;

/// Aggregation statistics
/// Branch stacks aggregation statistics
uint64_t NumTraces{0};
uint64_t NumInvalidTraces{0};
uint64_t NumLongRangeTraces{0};
/// Specifies how many samples were recorded in cold areas if we are dealing
/// with profiling data collected in a bolted binary. For LBRs, incremented
/// for the source of the branch to avoid counting cold activity twice (one
/// for source and another for destination).
uint64_t NumColdSamples{0};
uint64_t NumTotalSamples{0};

/// Looks into system PATH for Linux Perf and set up the aggregator to use it
void findPerfExecutable();
Expand Down Expand Up @@ -327,8 +329,8 @@ class DataAggregator : public DataReader {
/// Parse a single LBR entry as output by perf script -Fbrstack
ErrorOr<LBREntry> parseLBREntry();

/// Parse LBR sample, returns the number of traces.
uint64_t parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix);
/// Parse LBR sample.
void parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix);

/// Parse and pre-aggregate branch events.
std::error_code parseBranchEvents();
Expand Down Expand Up @@ -487,6 +489,13 @@ class DataAggregator : public DataReader {
void dump(const PerfBranchSample &Sample) const;
void dump(const PerfMemSample &Sample) const;

/// Profile diagnostics print methods
void printColdSamplesDiagnostic() const;
void printLongRangeTracesDiagnostic() const;
void printBranchSamplesDiagnostics() const;
void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const;
void printBranchStacksDiagnostics(uint64_t IgnoredSamples) const;

public:
/// If perf.data was collected without build ids, the buildid-list may contain
/// incomplete entries. Return true if the buffer containing
Expand Down
218 changes: 92 additions & 126 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,7 @@ bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
uint64_t Count) {
BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
if (ParentFunc)
if (ParentFunc || (BAT && !BAT->isBATFunction(OrigFunc.getAddress())))
NumColdSamples += Count;

auto I = NamesToSamples.find(Func.getOneName());
Expand Down Expand Up @@ -756,12 +756,13 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);

BinaryFunction *ParentFunc = getBATParentFunction(*Func);
if (IsFrom &&
(ParentFunc || (BAT && !BAT->isBATFunction(Func->getAddress()))))
NumColdSamples += Count;

if (!ParentFunc)
return std::pair{Func, IsRetOrCallCont};

if (IsFrom)
NumColdSamples += Count;

return std::pair{ParentFunc, IsRetOrCallCont};
};

Expand Down Expand Up @@ -1422,9 +1423,8 @@ std::error_code DataAggregator::printLBRHeatMap() {
return std::error_code();
}

uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
bool NeedsSkylakeFix) {
uint64_t NumTraces{0};
void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
bool NeedsSkylakeFix) {
// LBRs are stored in reverse execution order. NextLBR refers to the next
// executed branch record.
const LBREntry *NextLBR = nullptr;
Expand Down Expand Up @@ -1487,19 +1487,93 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
++Info.TakenCount;
Info.MispredCount += LBR.Mispred;
}
return NumTraces;
}

void DataAggregator::printColdSamplesDiagnostic() const {
if (NumColdSamples > 0) {
const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
outs() << "PERF2BOLT: " << NumColdSamples
<< format(" (%.1f%%)", ColdSamples)
<< " samples recorded in cold regions of split functions.\n";
if (ColdSamples > 5.0f)
outs()
<< "WARNING: The BOLT-processed binary where samples were collected "
"likely used bad data or your service observed a large shift in "
"profile. You may want to audit this\n";
}
}

void DataAggregator::printLongRangeTracesDiagnostic() const {
outs() << "PERF2BOLT: out of range traces involving unknown regions: "
<< NumLongRangeTraces;
if (NumTraces > 0)
outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
outs() << "\n";
}

static float printColoredPct(uint64_t Numerator, uint64_t Denominator, float T1,
float T2) {
if (Denominator == 0) {
outs() << "\n";
return 0;
}
float Percent = Numerator * 100.0f / Denominator;
outs() << " (";
if (outs().has_colors()) {
if (Percent > T2)
outs().changeColor(raw_ostream::RED);
else if (Percent > T1)
outs().changeColor(raw_ostream::YELLOW);
else
outs().changeColor(raw_ostream::GREEN);
}
outs() << format("%.1f%%", Percent);
if (outs().has_colors())
outs().resetColor();
outs() << ")\n";
return Percent;
}

void DataAggregator::printBranchSamplesDiagnostics() const {
outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
<< NumInvalidTraces;
if (printColoredPct(NumInvalidTraces, NumTraces, 5, 10) > 10)
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
"binary is probably not the same binary used during profiling "
"collection. The generated data may be ineffective for improving "
"performance\n\n";
printLongRangeTracesDiagnostic();
printColdSamplesDiagnostic();
}

void DataAggregator::printBasicSamplesDiagnostics(
uint64_t OutOfRangeSamples) const {
outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
<< OutOfRangeSamples;
if (printColoredPct(OutOfRangeSamples, NumTotalSamples, 40, 60) > 80)
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
"binary is probably not the same binary used during profiling "
"collection. The generated data may be ineffective for improving "
"performance\n\n";
printColdSamplesDiagnostic();
}

void DataAggregator::printBranchStacksDiagnostics(
uint64_t IgnoredSamples) const {
outs() << "PERF2BOLT: ignored samples: " << IgnoredSamples;
if (printColoredPct(IgnoredSamples, NumTotalSamples, 20, 50) > 50)
errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
"were attributed to the input binary\n";
}

std::error_code DataAggregator::parseBranchEvents() {
outs() << "PERF2BOLT: parse branch events...\n";
NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);

uint64_t NumTotalSamples = 0;
uint64_t NumEntries = 0;
uint64_t NumSamples = 0;
uint64_t NumSamplesNoLBR = 0;
uint64_t NumTraces = 0;
bool NeedsSkylakeFix = false;

while (hasData() && NumTotalSamples < opts::MaxSamples) {
Expand All @@ -1526,30 +1600,14 @@ std::error_code DataAggregator::parseBranchEvents() {
NeedsSkylakeFix = true;
}

NumTraces += parseLBRSample(Sample, NeedsSkylakeFix);
parseLBRSample(Sample, NeedsSkylakeFix);
}

for (const Trace &Trace : llvm::make_first_range(BranchLBRs))
for (const uint64_t Addr : {Trace.From, Trace.To})
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
BF->setHasProfileAvailable();

auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
OS << " (";
if (OS.has_colors()) {
if (Percent > T2)
OS.changeColor(raw_ostream::RED);
else if (Percent > T1)
OS.changeColor(raw_ostream::YELLOW);
else
OS.changeColor(raw_ostream::GREEN);
}
OS << format("%.1f%%", Percent);
if (OS.has_colors())
OS.resetColor();
OS << ")";
};

outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
<< " LBR entries\n";
if (NumTotalSamples) {
Expand All @@ -1561,47 +1619,10 @@ std::error_code DataAggregator::parseBranchEvents() {
"in no-LBR mode with -nl (the performance improvement in -nl "
"mode may be limited)\n";
} else {
const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
printColored(outs(), PercentIgnored, 20, 50);
outs() << " were ignored\n";
if (PercentIgnored > 50.0f)
errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
"were attributed to the input binary\n";
printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
}
}
outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
<< NumInvalidTraces;
float Perc = 0.0f;
if (NumTraces > 0) {
Perc = NumInvalidTraces * 100.0f / NumTraces;
printColored(outs(), Perc, 5, 10);
}
outs() << "\n";
if (Perc > 10.0f)
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
"binary is probably not the same binary used during profiling "
"collection. The generated data may be ineffective for improving "
"performance.\n\n";

outs() << "PERF2BOLT: out of range traces involving unknown regions: "
<< NumLongRangeTraces;
if (NumTraces > 0)
outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
outs() << "\n";

if (NumColdSamples > 0) {
const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
outs() << "PERF2BOLT: " << NumColdSamples
<< format(" (%.1f%%)", ColdSamples)
<< " samples recorded in cold regions of split functions.\n";
if (ColdSamples > 5.0f)
outs()
<< "WARNING: The BOLT-processed binary where samples were collected "
"likely used bad data or your service observed a large shift in "
"profile. You may want to audit this.\n";
}
printBranchSamplesDiagnostics();

return std::error_code();
}
Expand Down Expand Up @@ -1658,11 +1679,10 @@ void DataAggregator::processBasicEvents() {
NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
uint64_t OutOfRangeSamples = 0;
uint64_t NumSamples = 0;
for (auto &Sample : BasicSamples) {
const uint64_t PC = Sample.first;
const uint64_t HitCount = Sample.second;
NumSamples += HitCount;
NumTotalSamples += HitCount;
BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
if (!Func) {
OutOfRangeSamples += HitCount;
Expand All @@ -1671,33 +1691,9 @@ void DataAggregator::processBasicEvents() {

doSample(*Func, PC, HitCount);
}
outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
outs() << "PERF2BOLT: read " << NumTotalSamples << " samples\n";

outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
<< OutOfRangeSamples;
float Perc = 0.0f;
if (NumSamples > 0) {
outs() << " (";
Perc = OutOfRangeSamples * 100.0f / NumSamples;
if (outs().has_colors()) {
if (Perc > 60.0f)
outs().changeColor(raw_ostream::RED);
else if (Perc > 40.0f)
outs().changeColor(raw_ostream::YELLOW);
else
outs().changeColor(raw_ostream::GREEN);
}
outs() << format("%.1f%%", Perc);
if (outs().has_colors())
outs().resetColor();
outs() << ")";
}
outs() << "\n";
if (Perc > 80.0f)
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
"binary is probably not the same binary used during profiling "
"collection. The generated data may be ineffective for improving "
"performance.\n\n";
printBasicSamplesDiagnostics(OutOfRangeSamples);
}

std::error_code DataAggregator::parseMemEvents() {
Expand Down Expand Up @@ -1775,13 +1771,13 @@ void DataAggregator::processPreAggregated() {
NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);

uint64_t NumTraces = 0;
for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
switch (AggrEntry.EntryType) {
case AggregatedLBREntry::BRANCH:
case AggregatedLBREntry::TRACE:
doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
AggrEntry.Mispreds);
NumTotalSamples += AggrEntry.Count;
break;
case AggregatedLBREntry::FT:
case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
Expand All @@ -1799,37 +1795,7 @@ void DataAggregator::processPreAggregated() {

outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
<< " aggregated LBR entries\n";
outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
<< NumInvalidTraces;
float Perc = 0.0f;
if (NumTraces > 0) {
outs() << " (";
Perc = NumInvalidTraces * 100.0f / NumTraces;
if (outs().has_colors()) {
if (Perc > 10.0f)
outs().changeColor(raw_ostream::RED);
else if (Perc > 5.0f)
outs().changeColor(raw_ostream::YELLOW);
else
outs().changeColor(raw_ostream::GREEN);
}
outs() << format("%.1f%%", Perc);
if (outs().has_colors())
outs().resetColor();
outs() << ")";
}
outs() << "\n";
if (Perc > 10.0f)
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
"binary is probably not the same binary used during profiling "
"collection. The generated data may be ineffective for improving "
"performance.\n\n";

outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
<< NumLongRangeTraces;
if (NumTraces > 0)
outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
outs() << "\n";
printBranchSamplesDiagnostics();
}

std::optional<int32_t> DataAggregator::parseCommExecEvent() {
Expand Down
Loading