-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[BOLT][NFCI] Emit uniform diagnostics in DataAggregator #136530
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BOLT][NFCI] Emit uniform diagnostics in DataAggregator #136530
Conversation
Created using spr 1.3.4
|
@llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) ChangesDataAggregator supports reading different kinds of profile data:
Make profile quality reporting uniform across all kinds of input:
Test Plan: NFCI Full diff: https://github.com/llvm/llvm-project/pull/136530.diff 2 Files Affected:
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 56eb463fc98fc..79a91861554df 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -223,7 +223,8 @@ class DataAggregator : public DataReader {
bool recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
uint64_t Count = 1) const;
- /// Aggregation statistics
+ /// Branch stacks aggregation statistics
+ uint64_t NumTraces{0};
uint64_t NumInvalidTraces{0};
uint64_t NumLongRangeTraces{0};
/// Specifies how many samples were recorded in cold areas if we are dealing
@@ -231,6 +232,7 @@ class DataAggregator : public DataReader {
/// for the source of the branch to avoid counting cold activity twice (one
/// for source and another for destination).
uint64_t NumColdSamples{0};
+ uint64_t NumTotalSamples{0};
/// Looks into system PATH for Linux Perf and set up the aggregator to use it
void findPerfExecutable();
@@ -327,8 +329,8 @@ class DataAggregator : public DataReader {
/// Parse a single LBR entry as output by perf script -Fbrstack
ErrorOr<LBREntry> parseLBREntry();
- /// Parse LBR sample, returns the number of traces.
- uint64_t parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix);
+ /// Parse LBR sample.
+ void parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix);
/// Parse and pre-aggregate branch events.
std::error_code parseBranchEvents();
@@ -487,6 +489,13 @@ class DataAggregator : public DataReader {
void dump(const PerfBranchSample &Sample) const;
void dump(const PerfMemSample &Sample) const;
+ /// Profile diagnostics print methods
+ void printColdSamplesDiagnostic() const;
+ void printLongRangeTracesDiagnostic() const;
+ void printBranchSamplesDiagnostics() const;
+ void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const;
+ void printBranchStacksDiagnostics(uint64_t IgnoredSamples) const;
+
public:
/// If perf.data was collected without build ids, the buildid-list may contain
/// incomplete entries. Return true if the buffer containing
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index f016576ff61af..1127cc1f9331a 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1422,9 +1422,8 @@ std::error_code DataAggregator::printLBRHeatMap() {
return std::error_code();
}
-uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
- bool NeedsSkylakeFix) {
- uint64_t NumTraces{0};
+void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
+ bool NeedsSkylakeFix) {
// LBRs are stored in reverse execution order. NextLBR refers to the next
// executed branch record.
const LBREntry *NextLBR = nullptr;
@@ -1487,7 +1486,83 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
++Info.TakenCount;
Info.MispredCount += LBR.Mispred;
}
- return NumTraces;
+}
+
+void DataAggregator::printColdSamplesDiagnostic() const {
+ if (NumColdSamples > 0) {
+ const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
+ outs() << "PERF2BOLT: " << NumColdSamples
+ << format(" (%.1f%%)", ColdSamples)
+ << " samples recorded in cold regions of split functions.\n";
+ if (ColdSamples > 5.0f)
+ outs()
+ << "WARNING: The BOLT-processed binary where samples were collected "
+ "likely used bad data or your service observed a large shift in "
+ "profile. You may want to audit this.\n";
+ }
+}
+
+void DataAggregator::printLongRangeTracesDiagnostic() const {
+ outs() << "PERF2BOLT: out of range traces involving unknown regions: "
+ << NumLongRangeTraces;
+ if (NumTraces > 0)
+ outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
+ outs() << "\n";
+}
+
+static float printColoredPct(uint64_t Numerator, uint64_t Denominator, float T1,
+ float T2) {
+ if (Denominator == 0) {
+ outs() << "\n";
+ return 0;
+ }
+ float Percent = Numerator * 100.0f / Denominator;
+ outs() << " (";
+ if (outs().has_colors()) {
+ if (Percent > T2)
+ outs().changeColor(raw_ostream::RED);
+ else if (Percent > T1)
+ outs().changeColor(raw_ostream::YELLOW);
+ else
+ outs().changeColor(raw_ostream::GREEN);
+ }
+ outs() << format("%.1f%%", Percent);
+ if (outs().has_colors())
+ outs().resetColor();
+ outs() << ")\n";
+ return Percent;
+}
+
+void DataAggregator::printBranchSamplesDiagnostics() const {
+ outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
+ << NumInvalidTraces;
+ if (printColoredPct(NumInvalidTraces, NumTraces, 5, 10) > 10)
+ outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
+ "binary is probably not the same binary used during profiling "
+ "collection. The generated data may be ineffective for improving "
+ "performance.\n\n";
+ printLongRangeTracesDiagnostic();
+ printColdSamplesDiagnostic();
+}
+
+void DataAggregator::printBasicSamplesDiagnostics(
+ uint64_t OutOfRangeSamples) const {
+ outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
+ << OutOfRangeSamples;
+ if (printColoredPct(OutOfRangeSamples, NumTotalSamples, 40, 60) > 80)
+ outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
+ "binary is probably not the same binary used during profiling "
+ "collection. The generated data may be ineffective for improving "
+ "performance.\n\n";
+ printColdSamplesDiagnostic();
+}
+
+void DataAggregator::printBranchStacksDiagnostics(
+ uint64_t IgnoredSamples) const {
+ outs() << "PERF2BOLT: ignored samples: " << IgnoredSamples;
+ if (printColoredPct(IgnoredSamples, NumTotalSamples, 20, 50) > 50)
+ errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
+ "were attributed to the input binary\n";
}
std::error_code DataAggregator::parseBranchEvents() {
@@ -1495,11 +1570,9 @@ std::error_code DataAggregator::parseBranchEvents() {
NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
- uint64_t NumTotalSamples = 0;
uint64_t NumEntries = 0;
uint64_t NumSamples = 0;
uint64_t NumSamplesNoLBR = 0;
- uint64_t NumTraces = 0;
bool NeedsSkylakeFix = false;
while (hasData() && NumTotalSamples < opts::MaxSamples) {
@@ -1526,7 +1599,7 @@ std::error_code DataAggregator::parseBranchEvents() {
NeedsSkylakeFix = true;
}
- NumTraces += parseLBRSample(Sample, NeedsSkylakeFix);
+ parseLBRSample(Sample, NeedsSkylakeFix);
}
for (const Trace &Trace : llvm::make_first_range(BranchLBRs))
@@ -1534,22 +1607,6 @@ std::error_code DataAggregator::parseBranchEvents() {
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
BF->setHasProfileAvailable();
- auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
- OS << " (";
- if (OS.has_colors()) {
- if (Percent > T2)
- OS.changeColor(raw_ostream::RED);
- else if (Percent > T1)
- OS.changeColor(raw_ostream::YELLOW);
- else
- OS.changeColor(raw_ostream::GREEN);
- }
- OS << format("%.1f%%", Percent);
- if (OS.has_colors())
- OS.resetColor();
- OS << ")";
- };
-
outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
<< " LBR entries\n";
if (NumTotalSamples) {
@@ -1561,47 +1618,10 @@ std::error_code DataAggregator::parseBranchEvents() {
"in no-LBR mode with -nl (the performance improvement in -nl "
"mode may be limited)\n";
} else {
- const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
- const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
- outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
- printColored(outs(), PercentIgnored, 20, 50);
- outs() << " were ignored\n";
- if (PercentIgnored > 50.0f)
- errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
- "were attributed to the input binary\n";
+ printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
}
}
- outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
- << NumInvalidTraces;
- float Perc = 0.0f;
- if (NumTraces > 0) {
- Perc = NumInvalidTraces * 100.0f / NumTraces;
- printColored(outs(), Perc, 5, 10);
- }
- outs() << "\n";
- if (Perc > 10.0f)
- outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
- "binary is probably not the same binary used during profiling "
- "collection. The generated data may be ineffective for improving "
- "performance.\n\n";
-
- outs() << "PERF2BOLT: out of range traces involving unknown regions: "
- << NumLongRangeTraces;
- if (NumTraces > 0)
- outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
- outs() << "\n";
-
- if (NumColdSamples > 0) {
- const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
- outs() << "PERF2BOLT: " << NumColdSamples
- << format(" (%.1f%%)", ColdSamples)
- << " samples recorded in cold regions of split functions.\n";
- if (ColdSamples > 5.0f)
- outs()
- << "WARNING: The BOLT-processed binary where samples were collected "
- "likely used bad data or your service observed a large shift in "
- "profile. You may want to audit this.\n";
- }
+ printBranchSamplesDiagnostics();
return std::error_code();
}
@@ -1658,11 +1678,10 @@ void DataAggregator::processBasicEvents() {
NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
TimerGroupDesc, opts::TimeAggregator);
uint64_t OutOfRangeSamples = 0;
- uint64_t NumSamples = 0;
for (auto &Sample : BasicSamples) {
const uint64_t PC = Sample.first;
const uint64_t HitCount = Sample.second;
- NumSamples += HitCount;
+ NumTotalSamples += HitCount;
BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
if (!Func) {
OutOfRangeSamples += HitCount;
@@ -1671,33 +1690,9 @@ void DataAggregator::processBasicEvents() {
doSample(*Func, PC, HitCount);
}
- outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
+ outs() << "PERF2BOLT: read " << NumTotalSamples << " samples\n";
- outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
- << OutOfRangeSamples;
- float Perc = 0.0f;
- if (NumSamples > 0) {
- outs() << " (";
- Perc = OutOfRangeSamples * 100.0f / NumSamples;
- if (outs().has_colors()) {
- if (Perc > 60.0f)
- outs().changeColor(raw_ostream::RED);
- else if (Perc > 40.0f)
- outs().changeColor(raw_ostream::YELLOW);
- else
- outs().changeColor(raw_ostream::GREEN);
- }
- outs() << format("%.1f%%", Perc);
- if (outs().has_colors())
- outs().resetColor();
- outs() << ")";
- }
- outs() << "\n";
- if (Perc > 80.0f)
- outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
- "binary is probably not the same binary used during profiling "
- "collection. The generated data may be ineffective for improving "
- "performance.\n\n";
+ printBasicSamplesDiagnostics(OutOfRangeSamples);
}
std::error_code DataAggregator::parseMemEvents() {
@@ -1775,7 +1770,6 @@ void DataAggregator::processPreAggregated() {
NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
- uint64_t NumTraces = 0;
for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
switch (AggrEntry.EntryType) {
case AggregatedLBREntry::BRANCH:
@@ -1799,37 +1793,7 @@ void DataAggregator::processPreAggregated() {
outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
<< " aggregated LBR entries\n";
- outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
- << NumInvalidTraces;
- float Perc = 0.0f;
- if (NumTraces > 0) {
- outs() << " (";
- Perc = NumInvalidTraces * 100.0f / NumTraces;
- if (outs().has_colors()) {
- if (Perc > 10.0f)
- outs().changeColor(raw_ostream::RED);
- else if (Perc > 5.0f)
- outs().changeColor(raw_ostream::YELLOW);
- else
- outs().changeColor(raw_ostream::GREEN);
- }
- outs() << format("%.1f%%", Perc);
- if (outs().has_colors())
- outs().resetColor();
- outs() << ")";
- }
- outs() << "\n";
- if (Perc > 10.0f)
- outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
- "binary is probably not the same binary used during profiling "
- "collection. The generated data may be ineffective for improving "
- "performance.\n\n";
-
- outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
- << NumLongRangeTraces;
- if (NumTraces > 0)
- outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
- outs() << "\n";
+ printBranchSamplesDiagnostics();
}
std::optional<int32_t> DataAggregator::parseCommExecEvent() {
|
Created using spr 1.3.4
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Created using spr 1.3.4
maksfb
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe drop trailing periods from messages while at it? I'm fine either way since it's NFCI.
Created using spr 1.3.4
DataAggregator supports reading different kinds of profile data: - perf data: branch records or IP samples, - pre-aggregated branch data. Make profile quality reporting uniform across all kinds of input: - out-of-range and mismatching samples, - samples in cold code in BAT mode (profiled BOLTed binary). Test Plan: NFCI
DataAggregator supports reading different kinds of profile data:
Make profile quality reporting uniform across all kinds of input:
Test Plan: NFCI