Skip to content

Commit b69c993

Browse files
resolve review feedback
1 parent 80249bc commit b69c993

File tree

6 files changed

+245
-217
lines changed

6 files changed

+245
-217
lines changed

llvm/include/llvm/ProfileData/DataAccessProf.h

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,11 @@ struct DataAccessProfRecord {
7070
/// This class provides profile look-up, serialization and deserialization.
7171
class DataAccessProfData {
7272
public:
73-
// SymbolID is either a string representing symbol name, or a uint64_t
74-
// representing the content hash of a string literal.
75-
using SymbolID = std::variant<StringRef, uint64_t>;
73+
// SymbolID is either a string representing symbol name if the symbol has
74+
// stable mangled name relative to source code, or a uint64_t representing the
75+
// content hash of a string literal (with unstable name patterns like
76+
// `.str.N[.llvm.hash]`). The StringRef is owned by the class's saver object.
77+
using SymbolHandle = std::variant<StringRef, uint64_t>;
7678
using StringToIndexMap = llvm::MapVector<StringRef, uint64_t>;
7779

7880
DataAccessProfData() : Saver(Allocator) {}
@@ -90,38 +92,32 @@ class DataAccessProfData {
9092
/// Returns a pointer of profile record for \p SymbolID, or nullptr if there
9193
/// isn't a record. Internally, this function will canonicalize the symbol
9294
/// name before the lookup.
93-
const DataAccessProfRecord *getProfileRecord(const SymbolID SymID) const;
95+
const DataAccessProfRecord *getProfileRecord(const SymbolHandle SymID) const;
9496

9597
/// Returns true if \p SymID is seen in profiled binaries and cold.
96-
bool isKnownColdSymbol(const SymbolID SymID) const;
98+
bool isKnownColdSymbol(const SymbolHandle SymID) const;
9799

98-
/// Methods to add symbolized data access profile. Returns error if duplicated
100+
/// Methods to set symbolized data access profile. Returns error if duplicated
99101
/// symbol names or content hashes are seen. The user of this class should
100102
/// aggregate counters that correspond to the same symbol name or with the
101-
/// same string literal hash before calling 'add*' methods.
102-
Error setDataAccessProfile(SymbolID SymbolID, uint64_t AccessCount);
103+
/// same string literal hash before calling 'set*' methods.
104+
Error setDataAccessProfile(SymbolHandle SymbolID, uint64_t AccessCount);
103105
/// Similar to the method above, for records with \p Locations representing
104106
/// the `filename:line` where this symbol shows up. Note because of linker's
105107
/// merge of identical symbols (e.g., unnamed_addr string literals), one
106108
/// symbol is likely to have multiple locations.
107-
Error setDataAccessProfile(SymbolID SymbolID, uint64_t AccessCount,
108-
const llvm::SmallVector<DataLocation> &Locations);
109-
Error addKnownSymbolWithoutSamples(SymbolID SymbolID);
109+
Error setDataAccessProfile(SymbolHandle SymbolID, uint64_t AccessCount,
110+
ArrayRef<DataLocation> Locations);
111+
Error addKnownSymbolWithoutSamples(SymbolHandle SymbolID);
110112

111113
/// Returns an iterable StringRef for strings in the order they are added.
112114
/// Each string may be a symbol name or a file name.
113115
auto getStrings() const {
114-
ArrayRef<std::pair<StringRef, uint64_t>> RefSymbolNames(
115-
StrToIndexMap.begin(), StrToIndexMap.end());
116-
return llvm::make_first_range(RefSymbolNames);
116+
return llvm::make_first_range(StrToIndexMap.getArrayRef());
117117
}
118118

119119
/// Returns array reference for various internal data structures.
120-
ArrayRef<
121-
std::pair<std::variant<StringRef, uint64_t>, DataAccessProfRecord>>
122-
getRecords() const {
123-
return Records.getArrayRef();
124-
}
120+
auto getRecords() const { return Records.getArrayRef(); }
125121
ArrayRef<StringRef> getKnownColdSymbols() const {
126122
return KnownColdSymbols.getArrayRef();
127123
}
@@ -137,23 +133,22 @@ class DataAccessProfData {
137133
/// start of the next payload.
138134
Error deserializeSymbolsAndFilenames(const unsigned char *&Ptr,
139135
const uint64_t NumSampledSymbols,
140-
uint64_t NumColdKnownSymbols);
136+
const uint64_t NumColdKnownSymbols);
141137

142138
/// Decode the records and increment \p Ptr to the start of the next payload.
143139
Error deserializeRecords(const unsigned char *&Ptr);
144140

145141
/// A helper function to compute a storage index for \p SymbolID.
146-
uint64_t getEncodedIndex(const SymbolID SymbolID) const;
142+
uint64_t getEncodedIndex(const SymbolHandle SymbolID) const;
147143

148144
// Keeps owned copies of the input strings.
149145
// NOTE: Keep `Saver` initialized before other class members that reference
150146
// its string copies and destructed after they are destructed.
151147
llvm::BumpPtrAllocator Allocator;
152148
llvm::UniqueStringSaver Saver;
153149

154-
// `Records` stores the records and `SymbolToRecordIndex` maps a symbol ID to
155-
// its record index.
156-
MapVector<SymbolID, DataAccessProfRecord> Records;
150+
// `Records` stores the records.
151+
MapVector<SymbolHandle, DataAccessProfRecord> Records;
157152

158153
// Use MapVector to keep input order of strings for serialization and
159154
// deserialization.

llvm/include/llvm/ProfileData/InstrProf.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -358,8 +358,9 @@ void createPGONameMetadata(GlobalObject &GO, StringRef PGOName);
358358
bool needsComdatForCounter(const GlobalObject &GV, const Module &M);
359359

360360
/// \c NameStrings is a string composed of one or more possibly encoded
361-
/// sub-strings. The substrings are separated by 0 or more zero bytes. This
362-
/// method decodes the string and calls `NameCallback` for each substring.
361+
/// sub-strings. The substrings are separated by `\01` (returned by
362+
/// InstrProf.h:getInstrProfNameSeparator). This method decodes the string and
363+
/// calls `NameCallback` for each substring.
363364
Error readAndDecodeStrings(StringRef NameStrings,
364365
std::function<Error(StringRef)> NameCallback);
365366

llvm/lib/ProfileData/DataAccessProf.cpp

Lines changed: 41 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,22 @@ saveStringToMap(MapVector<StringRef, uint64_t> &Map,
2323
return Iter;
2424
}
2525

26+
// Returns the canonical name or error.
27+
static Expected<StringRef> getCanonicalName(StringRef Name) {
28+
if (Name.empty())
29+
return make_error<StringError>("Empty symbol name",
30+
llvm::errc::invalid_argument);
31+
return InstrProfSymtab::getCanonicalName(Name);
32+
}
33+
2634
const DataAccessProfRecord *
27-
DataAccessProfData::getProfileRecord(const SymbolID SymbolID) const {
35+
DataAccessProfData::getProfileRecord(const SymbolHandle SymbolID) const {
2836
auto Key = SymbolID;
29-
if (std::holds_alternative<StringRef>(SymbolID))
30-
Key = InstrProfSymtab::getCanonicalName(std::get<StringRef>(SymbolID));
37+
if (std::holds_alternative<StringRef>(SymbolID)) {
38+
StringRef Name = std::get<StringRef>(SymbolID);
39+
assert(!Name.empty() && "Empty symbol name");
40+
Key = InstrProfSymtab::getCanonicalName(Name);
41+
}
3142

3243
auto It = Records.find(Key);
3344
if (It != Records.end())
@@ -36,30 +47,27 @@ DataAccessProfData::getProfileRecord(const SymbolID SymbolID) const {
3647
return nullptr;
3748
}
3849

39-
bool DataAccessProfData::isKnownColdSymbol(const SymbolID SymID) const {
50+
bool DataAccessProfData::isKnownColdSymbol(const SymbolHandle SymID) const {
4051
if (std::holds_alternative<uint64_t>(SymID))
4152
return KnownColdHashes.contains(std::get<uint64_t>(SymID));
4253
return KnownColdSymbols.contains(std::get<StringRef>(SymID));
4354
}
4455

45-
Error DataAccessProfData::setDataAccessProfile(SymbolID Symbol,
56+
Error DataAccessProfData::setDataAccessProfile(SymbolHandle Symbol,
4657
uint64_t AccessCount) {
4758
uint64_t RecordID = -1;
4859
bool IsStringLiteral = false;
49-
SymbolID Key;
60+
SymbolHandle Key;
5061
if (std::holds_alternative<uint64_t>(Symbol)) {
5162
RecordID = std::get<uint64_t>(Symbol);
5263
Key = RecordID;
5364
IsStringLiteral = true;
5465
} else {
55-
StringRef SymbolName = std::get<StringRef>(Symbol);
56-
if (SymbolName.empty())
57-
return make_error<StringError>("Empty symbol name",
58-
llvm::errc::invalid_argument);
59-
60-
StringRef CanonicalName = InstrProfSymtab::getCanonicalName(SymbolName);
61-
Key = CanonicalName;
62-
RecordID = saveStringToMap(StrToIndexMap, Saver, CanonicalName)->second;
66+
auto CanonicalName = getCanonicalName(std::get<StringRef>(Symbol));
67+
if (!CanonicalName)
68+
return CanonicalName.takeError();
69+
std::tie(Key, RecordID) =
70+
*saveStringToMap(StrToIndexMap, Saver, *CanonicalName);
6371
IsStringLiteral = false;
6472
}
6573

@@ -75,8 +83,8 @@ Error DataAccessProfData::setDataAccessProfile(SymbolID Symbol,
7583
}
7684

7785
Error DataAccessProfData::setDataAccessProfile(
78-
SymbolID SymbolID, uint64_t AccessCount,
79-
const llvm::SmallVector<DataLocation> &Locations) {
86+
SymbolHandle SymbolID, uint64_t AccessCount,
87+
ArrayRef<DataLocation> Locations) {
8088
if (Error E = setDataAccessProfile(SymbolID, AccessCount))
8189
return E;
8290

@@ -89,17 +97,15 @@ Error DataAccessProfData::setDataAccessProfile(
8997
return Error::success();
9098
}
9199

92-
Error DataAccessProfData::addKnownSymbolWithoutSamples(SymbolID SymbolID) {
100+
Error DataAccessProfData::addKnownSymbolWithoutSamples(SymbolHandle SymbolID) {
93101
if (std::holds_alternative<uint64_t>(SymbolID)) {
94102
KnownColdHashes.insert(std::get<uint64_t>(SymbolID));
95103
return Error::success();
96104
}
97-
StringRef SymbolName = std::get<StringRef>(SymbolID);
98-
if (SymbolName.empty())
99-
return make_error<StringError>("Empty symbol name",
100-
llvm::errc::invalid_argument);
101-
StringRef CanonicalSymName = InstrProfSymtab::getCanonicalName(SymbolName);
102-
KnownColdSymbols.insert(CanonicalSymName);
105+
auto CanonicalName = getCanonicalName(std::get<StringRef>(SymbolID));
106+
if (!CanonicalName)
107+
return CanonicalName.takeError();
108+
KnownColdSymbols.insert(*CanonicalName);
103109
return Error::success();
104110
}
105111

@@ -142,7 +148,7 @@ Error DataAccessProfData::serializeSymbolsAndFilenames(ProfOStream &OS) const {
142148
OS.write(CompressedStringLen);
143149
// Write the chars in compressed strings.
144150
for (char C : CompressedStrings)
145-
OS.writeByte(static_cast<uint8_t>(c));
151+
OS.writeByte(static_cast<uint8_t>(C));
146152
// Pad up to a multiple of 8.
147153
// InstrProfReader could read bytes according to 'CompressedStringLen'.
148154
const uint64_t PaddedLength = alignTo(CompressedStringLen, 8);
@@ -151,11 +157,15 @@ Error DataAccessProfData::serializeSymbolsAndFilenames(ProfOStream &OS) const {
151157
return Error::success();
152158
}
153159

154-
uint64_t DataAccessProfData::getEncodedIndex(const SymbolID SymbolID) const {
160+
uint64_t
161+
DataAccessProfData::getEncodedIndex(const SymbolHandle SymbolID) const {
155162
if (std::holds_alternative<uint64_t>(SymbolID))
156163
return std::get<uint64_t>(SymbolID);
157164

158-
return StrToIndexMap.find(std::get<StringRef>(SymbolID))->second;
165+
auto Iter = StrToIndexMap.find(std::get<StringRef>(SymbolID));
166+
assert(Iter != StrToIndexMap.end() &&
167+
"String literals not found in StrToIndexMap");
168+
return Iter->second;
159169
}
160170

161171
Error DataAccessProfData::serialize(ProfOStream &OS) const {
@@ -179,13 +189,13 @@ Error DataAccessProfData::serialize(ProfOStream &OS) const {
179189
}
180190

181191
Error DataAccessProfData::deserializeSymbolsAndFilenames(
182-
const unsigned char *&Ptr, uint64_t NumSampledSymbols,
183-
uint64_t NumColdKnownSymbols) {
192+
const unsigned char *&Ptr, const uint64_t NumSampledSymbols,
193+
const uint64_t NumColdKnownSymbols) {
184194
uint64_t Len =
185195
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
186196

187-
// With M=NumSampledSymbols and N=NumColdKnownSymbols, the first M strings are
188-
// symbols with samples, and next N strings are known cold symbols.
197+
// The first NumSampledSymbols strings are symbols with samples, and next
198+
// NumColdKnownSymbols strings are known cold symbols.
189199
uint64_t StringCnt = 0;
190200
std::function<Error(StringRef)> addName = [&](StringRef Name) {
191201
if (StringCnt < NumSampledSymbols)
@@ -219,7 +229,7 @@ Error DataAccessProfData::deserializeRecords(const unsigned char *&Ptr) {
219229
uint64_t AccessCount =
220230
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
221231

222-
SymbolID SymbolID;
232+
SymbolHandle SymbolID;
223233
if (IsStringLiteral)
224234
SymbolID = ID;
225235
else

llvm/unittests/ProfileData/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ set(LLVM_LINK_COMPONENTS
1010
add_llvm_unittest(ProfileDataTests
1111
BPFunctionNodeTest.cpp
1212
CoverageMappingTest.cpp
13+
DataAccessProfTest.cpp
1314
InstrProfDataTest.cpp
1415
InstrProfTest.cpp
1516
ItaniumManglingCanonicalizerTest.cpp

0 commit comments

Comments
 (0)