Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ class BinaryFunction {
enum {
PF_NONE = 0, /// No profile.
PF_LBR = 1, /// Profile is based on last branch records.
PF_SAMPLE = 2, /// Non-LBR sample-based profile.
PF_IP = 2, /// Non-LBR sample-based profile.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not PF_BASIC?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought of a concise counterpart to PF_LBR. PF_BASIC would be best matched by PF_BRANCH imo. But I don't feel strongly about this.

PF_MEMEVENT = 4, /// Profile has mem events.
};

Expand Down Expand Up @@ -392,7 +392,7 @@ class BinaryFunction {
float ProfileMatchRatio{0.0f};

/// Raw branch count for this function in the profile.
uint64_t RawBranchCount{0};
uint64_t RawSampleCount{0};

/// Dynamically executed function bytes, used for density computation.
uint64_t SampleCountInBytes{0};
Expand Down Expand Up @@ -1893,11 +1893,11 @@ class BinaryFunction {

/// Return the raw profile information about the number of branch
/// executions corresponding to this function.
uint64_t getRawBranchCount() const { return RawBranchCount; }
uint64_t getRawSampleCount() const { return RawSampleCount; }

/// Set the profile data about the number of branch executions corresponding
/// to this function.
void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }
void setRawSampleCount(uint64_t Count) { RawSampleCount = Count; }

/// Return the number of dynamically executed bytes, from raw perf data.
uint64_t getSampleCountInBytes() const { return SampleCountInBytes; }
Expand Down
3 changes: 2 additions & 1 deletion bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,8 @@ class DataAggregator : public DataReader {

/// Semantic actions - parser hooks to interpret parsed perf samples
/// Register a sample (non-LBR mode), i.e. a new hit at \p Address
bool doSample(BinaryFunction &Func, const uint64_t Address, uint64_t Count);
bool doBasicSample(BinaryFunction &Func, const uint64_t Address,
uint64_t Count);

/// Register an intraprocedural branch \p Branch.
bool doIntraBranch(BinaryFunction &Func, uint64_t From, uint64_t To,
Expand Down
31 changes: 18 additions & 13 deletions bolt/include/bolt/Profile/DataReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,15 +212,16 @@ struct FuncMemData {
/// Similar to BranchInfo, but instead of recording from-to address (an edge),
/// it records the address of a perf event and the number of times samples hit
/// this address.
struct SampleInfo {
struct BasicSampleInfo {
Location Loc;
int64_t Hits;

SampleInfo(Location Loc, int64_t Hits) : Loc(std::move(Loc)), Hits(Hits) {}
BasicSampleInfo(Location Loc, int64_t Hits)
: Loc(std::move(Loc)), Hits(Hits) {}

bool operator==(const SampleInfo &RHS) const { return Loc == RHS.Loc; }
bool operator==(const BasicSampleInfo &RHS) const { return Loc == RHS.Loc; }

bool operator<(const SampleInfo &RHS) const {
bool operator<(const BasicSampleInfo &RHS) const {
if (Loc < RHS.Loc)
return true;

Expand All @@ -229,23 +230,26 @@ struct SampleInfo {

void print(raw_ostream &OS) const;

void mergeWith(const SampleInfo &SI);
void mergeWith(const BasicSampleInfo &SI);
};

/// Helper class to store samples recorded in the address space of a given
/// function, analogous to FuncBranchData but for samples instead of branches.
struct FuncSampleData {
typedef std::vector<SampleInfo> ContainerTy;
struct FuncBasicSampleData {
typedef std::vector<BasicSampleInfo> ContainerTy;

StringRef Name;
ContainerTy Data;

FuncSampleData(StringRef Name, ContainerTy Data)
FuncBasicSampleData(StringRef Name, ContainerTy Data)
: Name(Name), Data(std::move(Data)) {}

/// Get the number of samples recorded in [Start, End)
uint64_t getSamples(uint64_t Start, uint64_t End) const;

/// Returns the total number of samples recorded in this function.
uint64_t getSamples() const;

/// Aggregation helper
DenseMap<uint64_t, size_t> Index;

Expand Down Expand Up @@ -305,7 +309,7 @@ class DataReader : public ProfileReaderBase {
/// The last step is to infer edge counts based on BB execution count. Note
/// this is the opposite of the LBR way, where we infer BB execution count
/// based on edge counts.
void readSampleData(BinaryFunction &BF);
void readBasicSampleData(BinaryFunction &BF);

/// Convert function-level branch data into instruction annotations.
void convertBranchData(BinaryFunction &BF) const;
Expand Down Expand Up @@ -379,7 +383,8 @@ class DataReader : public ProfileReaderBase {
/// Return mem data matching one of the names in \p FuncNames.
FuncMemData *getMemDataForNames(const std::vector<StringRef> &FuncNames);

FuncSampleData *getFuncSampleData(const std::vector<StringRef> &FuncNames);
FuncBasicSampleData *
getFuncBasicSampleData(const std::vector<StringRef> &FuncNames);

/// Return a vector of all FuncBranchData matching the list of names.
/// Internally use fuzzy matching to match special names like LTO-generated
Expand Down Expand Up @@ -422,7 +427,7 @@ class DataReader : public ProfileReaderBase {
}

using NamesToBranchesMapTy = std::map<StringRef, FuncBranchData>;
using NamesToSamplesMapTy = std::map<StringRef, FuncSampleData>;
using NamesToBasicSamplesMapTy = std::map<StringRef, FuncBasicSampleData>;
using NamesToMemEventsMapTy = std::map<StringRef, FuncMemData>;
using FuncsToBranchesMapTy =
std::unordered_map<const BinaryFunction *, FuncBranchData *>;
Expand Down Expand Up @@ -471,7 +476,7 @@ class DataReader : public ProfileReaderBase {
return parseLocation(EndChar, EndNl, true);
}
ErrorOr<BranchInfo> parseBranchInfo();
ErrorOr<SampleInfo> parseSampleInfo();
ErrorOr<BasicSampleInfo> parseSampleInfo();
ErrorOr<MemInfo> parseMemInfo();
ErrorOr<bool> maybeParseNoLBRFlag();
ErrorOr<bool> maybeParseBATFlag();
Expand All @@ -485,7 +490,7 @@ class DataReader : public ProfileReaderBase {
unsigned Line{0};
unsigned Col{0};
NamesToBranchesMapTy NamesToBranches;
NamesToSamplesMapTy NamesToSamples;
NamesToBasicSamplesMapTy NamesToBasicSamples;
NamesToMemEventsMapTy NamesToMemEvents;
FuncsToBranchesMapTy FuncsToBranches;
FuncsToMemDataMapTy FuncsToMemData;
Expand Down
2 changes: 1 addition & 1 deletion bolt/include/bolt/Profile/ProfileYAMLMapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ LLVM_YAML_STRONG_TYPEDEF(uint16_t, PROFILE_PF)
template <> struct ScalarBitSetTraits<PROFILE_PF> {
static void bitset(IO &io, PROFILE_PF &value) {
io.bitSetCase(value, "lbr", BinaryFunction::PF_LBR);
io.bitSetCase(value, "sample", BinaryFunction::PF_SAMPLE);
io.bitSetCase(value, "sample", BinaryFunction::PF_IP);
io.bitSetCase(value, "memevent", BinaryFunction::PF_MEMEVENT);
}
};
Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
OS << "\n Image : 0x" << Twine::utohexstr(getImageAddress());
if (ExecutionCount != COUNT_NO_PROFILE) {
OS << "\n Exec Count : " << ExecutionCount;
OS << "\n Branch Count: " << RawBranchCount;
OS << "\n Sample Count: " << RawSampleCount;
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
}

Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Passes/BinaryPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1445,7 +1445,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
if (!Function.hasProfile())
continue;

uint64_t SampleCount = Function.getRawBranchCount();
uint64_t SampleCount = Function.getRawSampleCount();
TotalSampleCount += SampleCount;

if (Function.hasValidProfile()) {
Expand Down
4 changes: 2 additions & 2 deletions bolt/lib/Passes/MCF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -458,15 +458,15 @@ void EstimateEdgeCounts::runOnFunction(BinaryFunction &BF) {
Error EstimateEdgeCounts::runOnFunctions(BinaryContext &BC) {
if (llvm::none_of(llvm::make_second_range(BC.getBinaryFunctions()),
[](const BinaryFunction &BF) {
return BF.getProfileFlags() == BinaryFunction::PF_SAMPLE;
return BF.getProfileFlags() == BinaryFunction::PF_IP;
}))
return Error::success();

ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
runOnFunction(BF);
};
ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
return BF.getProfileFlags() != BinaryFunction::PF_SAMPLE;
return BF.getProfileFlags() != BinaryFunction::PF_IP;
};

ParallelUtilities::runOnEachFunction(
Expand Down
50 changes: 29 additions & 21 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -565,15 +565,15 @@ void DataAggregator::processProfile(BinaryContext &BC) {
processMemEvents();

// Mark all functions with registered events as having a valid profile.
const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
: BinaryFunction::PF_LBR;
for (auto &BFI : BC.getBinaryFunctions()) {
BinaryFunction &BF = BFI.second;
FuncBranchData *FBD = getBranchData(BF);
if (FBD || getFuncSampleData(BF.getNames())) {
BF.markProfiled(Flags);
if (FBD)
BF.RawBranchCount = FBD->getNumExecutedBranches();
if (FuncBranchData *FBD = getBranchData(BF)) {
BF.markProfiled(BinaryFunction::PF_LBR);
BF.RawSampleCount = FBD->getNumExecutedBranches();
} else if (FuncBasicSampleData *FSD =
getFuncBasicSampleData(BF.getNames())) {
BF.markProfiled(BinaryFunction::PF_IP);
BF.RawSampleCount = FSD->getSamples();
}
}

Expand Down Expand Up @@ -628,20 +628,28 @@ StringRef DataAggregator::getLocationName(const BinaryFunction &Func,
return OrigFunc->getOneName();
}

bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
uint64_t Count) {
bool DataAggregator::doBasicSample(BinaryFunction &OrigFunc, uint64_t Address,
uint64_t Count) {
// To record executed bytes, use basic block size as is regardless of BAT.
uint64_t BlockSize = 0;
if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset(
Address - OrigFunc.getAddress()))
BlockSize = BB->getOriginalSize();

BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
if (ParentFunc || (BAT && !BAT->isBATFunction(OrigFunc.getAddress())))
if (ParentFunc || (BAT && !BAT->isBATFunction(Func.getAddress())))
NumColdSamples += Count;
// Attach executed bytes to parent function in case of cold fragment.
Func.SampleCountInBytes += Count * BlockSize;

auto I = NamesToSamples.find(Func.getOneName());
if (I == NamesToSamples.end()) {
auto I = NamesToBasicSamples.find(Func.getOneName());
if (I == NamesToBasicSamples.end()) {
bool Success;
StringRef LocName = getLocationName(Func, BAT);
std::tie(I, Success) = NamesToSamples.insert(
std::make_pair(Func.getOneName(),
FuncSampleData(LocName, FuncSampleData::ContainerTy())));
std::tie(I, Success) = NamesToBasicSamples.insert(std::make_pair(
Func.getOneName(),
FuncBasicSampleData(LocName, FuncBasicSampleData::ContainerTy())));
}

Address -= Func.getAddress();
Expand Down Expand Up @@ -1654,7 +1662,7 @@ void DataAggregator::processBasicEvents() {
continue;
}

doSample(*Func, PC, HitCount);
doBasicSample(*Func, PC, HitCount);
}
outs() << "PERF2BOLT: read " << NumTotalSamples << " samples\n";

Expand Down Expand Up @@ -2185,9 +2193,9 @@ DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
OutFile << " " << Entry.getKey();
OutFile << "\n";

for (const auto &KV : NamesToSamples) {
const FuncSampleData &FSD = KV.second;
for (const SampleInfo &SI : FSD.Data) {
for (const auto &KV : NamesToBasicSamples) {
const FuncBasicSampleData &FSD = KV.second;
for (const BasicSampleInfo &SI : FSD.Data) {
writeLocation(SI.Loc);
OutFile << SI.Hits << "\n";
++BranchValues;
Expand Down Expand Up @@ -2260,8 +2268,8 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames)
EventNamesOS << LS << EventEntry.first().str();

BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
: BinaryFunction::PF_LBR;
BP.Header.Flags =
opts::BasicAggregation ? BinaryFunction::PF_IP : BinaryFunction::PF_LBR;

// Add probe inline tree nodes.
YAMLProfileWriter::InlineTreeDesc InlineTree;
Expand Down
Loading
Loading