Skip to content

Commit 4727529

Browse files
resolve review feedback
1 parent 6cd7d8d commit 4727529

File tree

3 files changed

+57
-46
lines changed

3 files changed

+57
-46
lines changed

llvm/include/llvm/ProfileData/DataAccessProf.h

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ struct DataLocation {
4545

4646
// The data access profiles for a symbol.
4747
struct DataAccessProfRecord {
48+
DataAccessProfRecord(uint64_t SymbolID, uint64_t AccessCount,
49+
bool IsStringLiteral)
50+
: SymbolID(SymbolID), AccessCount(AccessCount),
51+
IsStringLiteral(IsStringLiteral) {}
52+
4853
// Represents a data symbol. The semantic comes in two forms: a symbol index
4954
// for symbol name if `IsStringLiteral` is false, or the hash of a string
5055
// content if `IsStringLiteral` is true. Required.
@@ -58,7 +63,7 @@ struct DataAccessProfRecord {
5863
bool IsStringLiteral;
5964

6065
// The locations of data in the source code. Optional.
61-
llvm::SmallVector<DataLocation> Locations;
66+
llvm::SmallVector<DataLocation, 0> Locations;
6267
};
6368

6469
/// Encapsulates the data access profile data and the methods to operate on it.
@@ -70,7 +75,7 @@ class DataAccessProfData {
7075
using SymbolID = std::variant<StringRef, uint64_t>;
7176
using StringToIndexMap = llvm::MapVector<StringRef, uint64_t>;
7277

73-
DataAccessProfData() : saver(Allocator) {}
78+
DataAccessProfData() : Saver(Allocator) {}
7479

7580
/// Serialize profile data to the output stream.
7681
/// Storage layout:
@@ -94,10 +99,13 @@ class DataAccessProfData {
9499
/// symbol names or content hashes are seen. The user of this class should
95100
/// aggregate counters that corresponds to the same symbol name or with the
96101
/// same string literal hash before calling 'add*' methods.
97-
Error addSymbolizedDataAccessProfile(SymbolID SymbolID, uint64_t AccessCount);
98-
Error addSymbolizedDataAccessProfile(
99-
SymbolID SymbolID, uint64_t AccessCount,
100-
const llvm::SmallVector<DataLocation> &Locations);
102+
Error setDataAccessProfile(SymbolID SymbolID, uint64_t AccessCount);
103+
/// Similar to the method above, for records with \p Locations representing
104+
/// the `filename:line` where this symbol shows up. Note because of linker's
105+
/// merge of identical symbols (e.g., unnamed_addr string literals), one
106+
/// symbol is likely to have multiple locations.
107+
Error setDataAccessProfile(SymbolID SymbolID, uint64_t AccessCount,
108+
const llvm::SmallVector<DataLocation> &Locations);
101109
Error addKnownSymbolWithoutSamples(SymbolID SymbolID);
102110

103111
/// Returns a iterable StringRef for strings in the order they are added.
@@ -122,20 +130,26 @@ class DataAccessProfData {
122130

123131
private:
124132
/// Serialize the symbol strings into the output stream.
125-
Error serializeStrings(ProfOStream &OS) const;
133+
Error serializeSymbolsAndFilenames(ProfOStream &OS) const;
126134

127135
/// Deserialize the symbol strings from \p Ptr and increment \p Ptr to the
128136
/// start of the next payload.
129-
Error deserializeStrings(const unsigned char *&Ptr,
130-
const uint64_t NumSampledSymbols,
131-
uint64_t NumColdKnownSymbols);
137+
Error deserializeSymbolsAndFilenames(const unsigned char *&Ptr,
138+
const uint64_t NumSampledSymbols,
139+
uint64_t NumColdKnownSymbols);
132140

133141
/// Decode the records and increment \p Ptr to the start of the next payload.
134142
Error deserializeRecords(const unsigned char *&Ptr);
135143

136144
/// A helper function to compute a storage index for \p SymbolID.
137145
uint64_t getEncodedIndex(const SymbolID SymbolID) const;
138146

147+
// Keeps owned copies of the input strings.
148+
// NOTE: Keep `Saver` initialized before other class members that reference
149+
// its string copies and destructed after they are destructed.
150+
llvm::BumpPtrAllocator Allocator;
151+
llvm::UniqueStringSaver Saver;
152+
139153
// `Records` stores the records and `SymbolToRecordIndex` maps a symbol ID to
140154
// its record index.
141155
MapVector<SymbolID, DataAccessProfRecord> Records;
@@ -145,9 +159,6 @@ class DataAccessProfData {
145159
StringToIndexMap StrToIndexMap;
146160
llvm::SetVector<uint64_t> KnownColdHashes;
147161
llvm::SetVector<StringRef> KnownColdSymbols;
148-
// Keeps owned copies of the input strings.
149-
llvm::BumpPtrAllocator Allocator;
150-
llvm::UniqueStringSaver saver;
151162
};
152163

153164
} // namespace data_access_prof

llvm/lib/ProfileData/DataAccessProf.cpp

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ namespace data_access_prof {
1818
// iterator.
1919
static MapVector<StringRef, uint64_t>::iterator
2020
saveStringToMap(MapVector<StringRef, uint64_t> &Map,
21-
llvm::UniqueStringSaver &saver, StringRef Str) {
22-
auto [Iter, Inserted] = Map.try_emplace(saver.save(Str), Map.size());
21+
llvm::UniqueStringSaver &Saver, StringRef Str) {
22+
auto [Iter, Inserted] = Map.try_emplace(Saver.save(Str), Map.size());
2323
return Iter;
2424
}
2525

@@ -38,12 +38,12 @@ DataAccessProfData::getProfileRecord(const SymbolID SymbolID) const {
3838

3939
bool DataAccessProfData::isKnownColdSymbol(const SymbolID SymID) const {
4040
if (std::holds_alternative<uint64_t>(SymID))
41-
return KnownColdHashes.count(std::get<uint64_t>(SymID));
42-
return KnownColdSymbols.count(std::get<StringRef>(SymID));
41+
return KnownColdHashes.contains(std::get<uint64_t>(SymID));
42+
return KnownColdSymbols.contains(std::get<StringRef>(SymID));
4343
}
4444

45-
Error DataAccessProfData::addSymbolizedDataAccessProfile(SymbolID Symbol,
46-
uint64_t AccessCount) {
45+
Error DataAccessProfData::setDataAccessProfile(SymbolID Symbol,
46+
uint64_t AccessCount) {
4747
uint64_t RecordID = -1;
4848
bool IsStringLiteral = false;
4949
SymbolID Key;
@@ -59,12 +59,12 @@ Error DataAccessProfData::addSymbolizedDataAccessProfile(SymbolID Symbol,
5959

6060
StringRef CanonicalName = InstrProfSymtab::getCanonicalName(SymbolName);
6161
Key = CanonicalName;
62-
RecordID = saveStringToMap(StrToIndexMap, saver, CanonicalName)->second;
62+
RecordID = saveStringToMap(StrToIndexMap, Saver, CanonicalName)->second;
6363
IsStringLiteral = false;
6464
}
6565

66-
auto [Iter, Inserted] = Records.try_emplace(
67-
Key, DataAccessProfRecord{RecordID, AccessCount, IsStringLiteral});
66+
auto [Iter, Inserted] =
67+
Records.try_emplace(Key, RecordID, AccessCount, IsStringLiteral);
6868
if (!Inserted)
6969
return make_error<StringError>("Duplicate symbol or string literal added. "
7070
"User of DataAccessProfData should "
@@ -74,16 +74,16 @@ Error DataAccessProfData::addSymbolizedDataAccessProfile(SymbolID Symbol,
7474
return Error::success();
7575
}
7676

77-
Error DataAccessProfData::addSymbolizedDataAccessProfile(
77+
Error DataAccessProfData::setDataAccessProfile(
7878
SymbolID SymbolID, uint64_t AccessCount,
7979
const llvm::SmallVector<DataLocation> &Locations) {
80-
if (Error E = addSymbolizedDataAccessProfile(SymbolID, AccessCount))
80+
if (Error E = setDataAccessProfile(SymbolID, AccessCount))
8181
return E;
8282

8383
auto &Record = Records.back().second;
8484
for (const auto &Location : Locations)
8585
Record.Locations.push_back(
86-
{saveStringToMap(StrToIndexMap, saver, Location.FileName)->first,
86+
{saveStringToMap(StrToIndexMap, Saver, Location.FileName)->first,
8787
Location.Line});
8888

8989
return Error::success();
@@ -108,7 +108,8 @@ Error DataAccessProfData::deserialize(const unsigned char *&Ptr) {
108108
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
109109
uint64_t NumColdKnownSymbols =
110110
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
111-
if (Error E = deserializeStrings(Ptr, NumSampledSymbols, NumColdKnownSymbols))
111+
if (Error E = deserializeSymbolsAndFilenames(Ptr, NumSampledSymbols,
112+
NumColdKnownSymbols))
112113
return E;
113114

114115
uint64_t Num =
@@ -120,7 +121,7 @@ Error DataAccessProfData::deserialize(const unsigned char *&Ptr) {
120121
return deserializeRecords(Ptr);
121122
}
122123

123-
Error DataAccessProfData::serializeStrings(ProfOStream &OS) const {
124+
Error DataAccessProfData::serializeSymbolsAndFilenames(ProfOStream &OS) const {
124125
OS.write(StrToIndexMap.size());
125126
OS.write(KnownColdSymbols.size());
126127

@@ -158,7 +159,7 @@ uint64_t DataAccessProfData::getEncodedIndex(const SymbolID SymbolID) const {
158159
}
159160

160161
Error DataAccessProfData::serialize(ProfOStream &OS) const {
161-
if (Error E = serializeStrings(OS))
162+
if (Error E = serializeSymbolsAndFilenames(OS))
162163
return E;
163164
OS.write(KnownColdHashes.size());
164165
for (const auto &Hash : KnownColdHashes)
@@ -177,9 +178,9 @@ Error DataAccessProfData::serialize(ProfOStream &OS) const {
177178
return Error::success();
178179
}
179180

180-
Error DataAccessProfData::deserializeStrings(const unsigned char *&Ptr,
181-
uint64_t NumSampledSymbols,
182-
uint64_t NumColdKnownSymbols) {
181+
Error DataAccessProfData::deserializeSymbolsAndFilenames(
182+
const unsigned char *&Ptr, uint64_t NumSampledSymbols,
183+
uint64_t NumColdKnownSymbols) {
183184
uint64_t Len =
184185
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
185186

@@ -188,9 +189,9 @@ Error DataAccessProfData::deserializeStrings(const unsigned char *&Ptr,
188189
uint64_t StringCnt = 0;
189190
std::function<Error(StringRef)> addName = [&](StringRef Name) {
190191
if (StringCnt < NumSampledSymbols)
191-
saveStringToMap(StrToIndexMap, saver, Name);
192+
saveStringToMap(StrToIndexMap, Saver, Name);
192193
else
193-
KnownColdSymbols.insert(saver.save(Name));
194+
KnownColdSymbols.insert(Saver.save(Name));
194195
++StringCnt;
195196
return Error::success();
196197
};
@@ -223,7 +224,7 @@ Error DataAccessProfData::deserializeRecords(const unsigned char *&Ptr) {
223224
SymbolID = ID;
224225
else
225226
SymbolID = Strings[ID];
226-
if (Error E = addSymbolizedDataAccessProfile(SymbolID, AccessCount))
227+
if (Error E = setDataAccessProfile(SymbolID, AccessCount))
227228
return E;
228229

229230
auto &Record = Records.back().second;

llvm/unittests/ProfileData/MemProfTest.cpp

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -764,18 +764,17 @@ static std::string ErrorToString(Error E) {
764764
TEST(MemProf, DataAccessProfileError) {
765765
// Returns error if the input symbol name is empty.
766766
llvm::data_access_prof::DataAccessProfData Data;
767-
EXPECT_THAT(ErrorToString(Data.addSymbolizedDataAccessProfile("", 100)),
767+
EXPECT_THAT(ErrorToString(Data.setDataAccessProfile("", 100)),
768768
HasSubstr("Empty symbol name"));
769769

770770
// Returns error when the same symbol gets added twice.
771-
ASSERT_FALSE(Data.addSymbolizedDataAccessProfile("foo", 100));
772-
EXPECT_THAT(ErrorToString(Data.addSymbolizedDataAccessProfile("foo", 100)),
771+
ASSERT_FALSE(Data.setDataAccessProfile("foo", 100));
772+
EXPECT_THAT(ErrorToString(Data.setDataAccessProfile("foo", 100)),
773773
HasSubstr("Duplicate symbol or string literal added"));
774774

775775
// Returns error when the same string content hash gets added twice.
776-
ASSERT_FALSE(Data.addSymbolizedDataAccessProfile((uint64_t)135246, 1000));
777-
EXPECT_THAT(ErrorToString(
778-
Data.addSymbolizedDataAccessProfile((uint64_t)135246, 1000)),
776+
ASSERT_FALSE(Data.setDataAccessProfile((uint64_t)135246, 1000));
777+
EXPECT_THAT(ErrorToString(Data.setDataAccessProfile((uint64_t)135246, 1000)),
779778
HasSubstr("Duplicate symbol or string literal added"));
780779
}
781780

@@ -788,16 +787,16 @@ TEST(MemProf, DataAccessProfile) {
788787

789788
// In the bool conversion, Error is true if it's in a failure state and false
790789
// if it's in an accept state. Use ASSERT_FALSE or EXPECT_FALSE for no error.
791-
ASSERT_FALSE(Data.addSymbolizedDataAccessProfile("foo.llvm.123", 100));
790+
ASSERT_FALSE(Data.setDataAccessProfile("foo.llvm.123", 100));
792791
ASSERT_FALSE(Data.addKnownSymbolWithoutSamples((uint64_t)789));
793792
ASSERT_FALSE(Data.addKnownSymbolWithoutSamples("sym2"));
794-
ASSERT_FALSE(Data.addSymbolizedDataAccessProfile("bar.__uniq.321", 123,
795-
{
796-
DataLocation{"file2", 3},
797-
}));
793+
ASSERT_FALSE(Data.setDataAccessProfile("bar.__uniq.321", 123,
794+
{
795+
DataLocation{"file2", 3},
796+
}));
798797
ASSERT_FALSE(Data.addKnownSymbolWithoutSamples("sym1"));
799798
ASSERT_FALSE(Data.addKnownSymbolWithoutSamples((uint64_t)678));
800-
ASSERT_FALSE(Data.addSymbolizedDataAccessProfile(
799+
ASSERT_FALSE(Data.setDataAccessProfile(
801800
(uint64_t)135246, 1000,
802801
{DataLocation{"file1", 1}, DataLocation{"file2", 2}}));
803802

0 commit comments

Comments
 (0)