Skip to content

Commit 6cd7d8d

Browse files
Add classes to construct and use data access profiles
1 parent e886ba1 commit 6cd7d8d

File tree

7 files changed

+579
-11
lines changed

7 files changed

+579
-11
lines changed

llvm/include/llvm/ADT/MapVector.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ class MapVector {
5757
return std::move(Vector);
5858
}
5959

60+
ArrayRef<value_type> getArrayRef() const { return Vector; }
61+
6062
size_type size() const { return Vector.size(); }
6163

6264
/// Grow the MapVector so that it can contain at least \p NumEntries items
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
//===- DataAccessProf.h - Data access profile format support ---------*- C++
2+
//-*-===//
3+
//
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// This file contains support to construct and use data access profiles.
11+
//
12+
// For the original RFC of this pass please see
13+
// https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744
14+
//
15+
//===----------------------------------------------------------------------===//
16+
17+
#ifndef LLVM_PROFILEDATA_DATAACCESSPROF_H_
18+
#define LLVM_PROFILEDATA_DATAACCESSPROF_H_
19+
20+
#include "llvm/ADT/DenseMap.h"
21+
#include "llvm/ADT/DenseMapInfoVariant.h"
22+
#include "llvm/ADT/MapVector.h"
23+
#include "llvm/ADT/STLExtras.h"
24+
#include "llvm/ADT/SetVector.h"
25+
#include "llvm/ADT/SmallVector.h"
26+
#include "llvm/ADT/StringRef.h"
27+
#include "llvm/ProfileData/InstrProf.h"
28+
#include "llvm/Support/Allocator.h"
29+
#include "llvm/Support/Error.h"
30+
#include "llvm/Support/StringSaver.h"
31+
32+
#include <cstdint>
33+
#include <variant>
34+
35+
namespace llvm {
36+
37+
namespace data_access_prof {
38+
// The location of data in the source code.
39+
struct DataLocation {
40+
// The filename where the data is located.
41+
StringRef FileName;
42+
// The line number in the source code.
43+
uint32_t Line;
44+
};
45+
46+
// The data access profiles for a symbol.
47+
struct DataAccessProfRecord {
48+
// Represents a data symbol. The semantic comes in two forms: a symbol index
49+
// for symbol name if `IsStringLiteral` is false, or the hash of a string
50+
// content if `IsStringLiteral` is true. Required.
51+
uint64_t SymbolID;
52+
53+
// The access count of symbol. Required.
54+
uint64_t AccessCount;
55+
56+
// True iff this is a record for string literal (symbols with name pattern
57+
// `.str.*` in the symbol table). Required.
58+
bool IsStringLiteral;
59+
60+
// The locations of data in the source code. Optional.
61+
llvm::SmallVector<DataLocation> Locations;
62+
};
63+
64+
/// Encapsulates the data access profile data and the methods to operate on it.
65+
/// This class provides profile look-up, serialization and deserialization.
66+
class DataAccessProfData {
67+
public:
68+
// SymbolID is either a string representing symbol name, or a uint64_t
69+
// representing the content hash of a string literal.
70+
using SymbolID = std::variant<StringRef, uint64_t>;
71+
using StringToIndexMap = llvm::MapVector<StringRef, uint64_t>;
72+
73+
DataAccessProfData() : saver(Allocator) {}
74+
75+
/// Serialize profile data to the output stream.
76+
/// Storage layout:
77+
/// - Serialized strings.
78+
/// - The encoded hashes.
79+
/// - Records.
80+
Error serialize(ProfOStream &OS) const;
81+
82+
/// Deserialize this class from the given buffer.
83+
Error deserialize(const unsigned char *&Ptr);
84+
85+
/// Returns a pointer of profile record for \p SymbolID, or nullptr if there
86+
/// isn't a record. Internally, this function will canonicalize the symbol
87+
/// name before the lookup.
88+
const DataAccessProfRecord *getProfileRecord(const SymbolID SymID) const;
89+
90+
/// Returns true if \p SymID is seen in profiled binaries and cold.
91+
bool isKnownColdSymbol(const SymbolID SymID) const;
92+
93+
/// Methods to add symbolized data access profile. Returns error if duplicated
94+
/// symbol names or content hashes are seen. The user of this class should
95+
/// aggregate counters that corresponds to the same symbol name or with the
96+
/// same string literal hash before calling 'add*' methods.
97+
Error addSymbolizedDataAccessProfile(SymbolID SymbolID, uint64_t AccessCount);
98+
Error addSymbolizedDataAccessProfile(
99+
SymbolID SymbolID, uint64_t AccessCount,
100+
const llvm::SmallVector<DataLocation> &Locations);
101+
Error addKnownSymbolWithoutSamples(SymbolID SymbolID);
102+
103+
/// Returns a iterable StringRef for strings in the order they are added.
104+
auto getStrings() const {
105+
ArrayRef<std::pair<StringRef, uint64_t>> RefSymbolNames(
106+
StrToIndexMap.begin(), StrToIndexMap.end());
107+
return llvm::make_first_range(RefSymbolNames);
108+
}
109+
110+
/// Returns array reference for various internal data structures.
111+
inline ArrayRef<
112+
std::pair<std::variant<StringRef, uint64_t>, DataAccessProfRecord>>
113+
getRecords() const {
114+
return Records.getArrayRef();
115+
}
116+
inline ArrayRef<StringRef> getKnownColdSymbols() const {
117+
return KnownColdSymbols.getArrayRef();
118+
}
119+
inline ArrayRef<uint64_t> getKnownColdHashes() const {
120+
return KnownColdHashes.getArrayRef();
121+
}
122+
123+
private:
124+
/// Serialize the symbol strings into the output stream.
125+
Error serializeStrings(ProfOStream &OS) const;
126+
127+
/// Deserialize the symbol strings from \p Ptr and increment \p Ptr to the
128+
/// start of the next payload.
129+
Error deserializeStrings(const unsigned char *&Ptr,
130+
const uint64_t NumSampledSymbols,
131+
uint64_t NumColdKnownSymbols);
132+
133+
/// Decode the records and increment \p Ptr to the start of the next payload.
134+
Error deserializeRecords(const unsigned char *&Ptr);
135+
136+
/// A helper function to compute a storage index for \p SymbolID.
137+
uint64_t getEncodedIndex(const SymbolID SymbolID) const;
138+
139+
// `Records` stores the records and `SymbolToRecordIndex` maps a symbol ID to
140+
// its record index.
141+
MapVector<SymbolID, DataAccessProfRecord> Records;
142+
143+
// Use MapVector to keep input order of strings for serialization and
144+
// deserialization.
145+
StringToIndexMap StrToIndexMap;
146+
llvm::SetVector<uint64_t> KnownColdHashes;
147+
llvm::SetVector<StringRef> KnownColdSymbols;
148+
// Keeps owned copies of the input strings.
149+
llvm::BumpPtrAllocator Allocator;
150+
llvm::UniqueStringSaver saver;
151+
};
152+
153+
} // namespace data_access_prof
154+
} // namespace llvm
155+
156+
#endif // LLVM_PROFILEDATA_DATAACCESSPROF_H_

llvm/include/llvm/ProfileData/InstrProf.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,12 @@ void createPGONameMetadata(GlobalObject &GO, StringRef PGOName);
357357
/// the duplicated profile variables for Comdat functions.
358358
bool needsComdatForCounter(const GlobalObject &GV, const Module &M);
359359

360+
/// \c NameStrings is a string composed of one of more possibly encoded
361+
/// sub-strings. The substrings are separated by 0 or more zero bytes. This
362+
/// method decodes the string and calls `NameCallback` for each substring.
363+
Error readAndDecodeStrings(StringRef NameStrings,
364+
std::function<Error(StringRef)> NameCallback);
365+
360366
/// An enum describing the attributes of an instrumented profile.
361367
enum class InstrProfKind {
362368
Unknown = 0x0,
@@ -493,6 +499,11 @@ class InstrProfSymtab {
493499
public:
494500
using AddrHashMap = std::vector<std::pair<uint64_t, uint64_t>>;
495501

502+
// Returns the canonial name of the given PGOName. In a canonical name, all
503+
// suffixes that begins with "." except ".__uniq." are stripped.
504+
// FIXME: Unify this with `FunctionSamples::getCanonicalFnName`.
505+
static StringRef getCanonicalName(StringRef PGOName);
506+
496507
private:
497508
using AddrIntervalMap =
498509
IntervalMap<uint64_t, uint64_t, 4, IntervalMapHalfOpenInfo<uint64_t>>;
@@ -528,11 +539,6 @@ class InstrProfSymtab {
528539

529540
static StringRef getExternalSymbol() { return "** External Symbol **"; }
530541

531-
// Returns the canonial name of the given PGOName. In a canonical name, all
532-
// suffixes that begins with "." except ".__uniq." are stripped.
533-
// FIXME: Unify this with `FunctionSamples::getCanonicalFnName`.
534-
static StringRef getCanonicalName(StringRef PGOName);
535-
536542
// Add the function into the symbol table, by creating the following
537543
// map entries:
538544
// name-set = {PGOFuncName} union {getCanonicalName(PGOFuncName)}

llvm/lib/ProfileData/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
add_llvm_component_library(LLVMProfileData
2+
DataAccessProf.cpp
23
GCOV.cpp
34
IndexedMemProfData.cpp
45
InstrProf.cpp

0 commit comments

Comments
 (0)