3030#include  " llvm/Support/StringSaver.h" 
3131
3232#include  < cstdint> 
33+ #include  < optional> 
3334#include  < variant> 
3435
3536namespace  llvm  {
3637
3738namespace  data_access_prof  {
38- //  The location of data in the source code.
39- struct  DataLocation  {
39+ 
40+ // / The location of data in the source code. Used by profile lookup API.
41+ struct  SourceLocation  {
42+   SourceLocation (StringRef FileNameRef, uint32_t  Line)
43+       : FileName(FileNameRef.str()), Line(Line) {}
44+   // / The filename where the data is located.
45+   std::string FileName;
46+   // / The line number in the source code.
47+   uint32_t  Line;
48+ };
49+ 
50+ namespace  internal  {
51+ 
52+ //  Conceptually similar to SourceLocation except that FileNames are StringRef of
53+ //  which strings are owned by `DataAccessProfData`. Used by `DataAccessProfData`
54+ //  to represent data locations internally.
55+ struct  SourceLocationRef  {
4056  //  The filename where the data is located.
4157  StringRef FileName;
4258  //  The line number in the source code.
4359  uint32_t  Line;
4460};
4561
46- //  The data access profiles for a symbol.
47- struct  DataAccessProfRecord  {
48-   DataAccessProfRecord (uint64_t  SymbolID, uint64_t  AccessCount,
49-                        bool  IsStringLiteral)
62+ //  The data access profiles for a symbol. Used by `DataAccessProfData`
63+ //  to represent records internally.
64+ struct  DataAccessProfRecordRef  {
65+   DataAccessProfRecordRef (uint64_t  SymbolID, uint64_t  AccessCount,
66+                           bool  IsStringLiteral)
5067      : SymbolID(SymbolID), AccessCount(AccessCount),
5168        IsStringLiteral (IsStringLiteral) {}
5269
@@ -67,18 +84,43 @@ struct DataAccessProfRecord {
6784  bool  IsStringLiteral;
6885
6986  //  The locations of data in the source code. Optional.
70-   llvm::SmallVector<DataLocation , 0 > Locations;
87+   llvm::SmallVector<SourceLocationRef , 0 > Locations;
7188};
89+ } //  namespace internal
90+ 
91+ //  SymbolID is either a string representing symbol name if the symbol has
92+ //  stable mangled name relative to source code, or a uint64_t representing the
93+ //  content hash of a string literal (with unstable name patterns like
94+ //  `.str.N[.llvm.hash]`). The StringRef is owned by the class's saver object.
95+ using  SymbolHandleRef = std::variant<StringRef, uint64_t >;
7296
73- // / Encapsulates the data access profile data and the methods to operate on it.
74- // / This class provides profile look-up, serialization and deserialization.
97+ //  The senamtic is the same as `SymbolHandleRef` above. The strings are owned.
98+ using  SymbolHandle = std::variant<std::string, uint64_t >;
99+ 
100+ // / The data access profiles for a symbol.
101+ struct  DataAccessProfRecord  {
102+ public: 
103+   DataAccessProfRecord (SymbolHandleRef SymHandleRef,
104+                        ArrayRef<internal::SourceLocationRef> LocRefs) {
105+     if  (std::holds_alternative<StringRef>(SymHandleRef)) {
106+       SymHandle = std::get<StringRef>(SymHandleRef).str ();
107+     } else 
108+       SymHandle = std::get<uint64_t >(SymHandleRef);
109+ 
110+     for  (auto  Loc : LocRefs)
111+       Locations.push_back (SourceLocation (Loc.FileName , Loc.Line ));
112+   }
113+   SymbolHandle SymHandle;
114+ 
115+   //  The locations of data in the source code. Optional.
116+   SmallVector<SourceLocation> Locations;
117+ };
118+ 
119+ // / Encapsulates the data access profile data and the methods to operate on
120+ // / it. This class provides profile look-up, serialization and
121+ // / deserialization.
75122class  DataAccessProfData  {
76123public: 
77-   //  SymbolID is either a string representing symbol name if the symbol has
78-   //  stable mangled name relative to source code, or a uint64_t representing the
79-   //  content hash of a string literal (with unstable name patterns like
80-   //  `.str.N[.llvm.hash]`). The StringRef is owned by the class's saver object.
81-   using  SymbolHandle = std::variant<StringRef, uint64_t >;
82124  using  StringToIndexMap = llvm::MapVector<StringRef, uint64_t >;
83125
84126  DataAccessProfData () : Saver(Allocator) {}
@@ -93,35 +135,39 @@ class DataAccessProfData {
93135  // / Deserialize this class from the given buffer.
94136  Error deserialize (const  unsigned  char  *&Ptr);
95137
96-   // / Returns a pointer of  profile record for \p SymbolID, or nullptr  if there
138+   // / Returns a profile record for \p SymbolID, or std::nullopt  if there
97139  // / isn't a record. Internally, this function will canonicalize the symbol
98140  // / name before the lookup.
99-   const  DataAccessProfRecord *getProfileRecord (const  SymbolHandle SymID) const ;
141+   std::optional<DataAccessProfRecord>
142+   getProfileRecord (const  SymbolHandleRef SymID) const ;
100143
101144  // / Returns true if \p SymID is seen in profiled binaries and cold.
102-   bool  isKnownColdSymbol (const  SymbolHandle  SymID) const ;
145+   bool  isKnownColdSymbol (const  SymbolHandleRef  SymID) const ;
103146
104-   // / Methods to set symbolized data access profile. Returns error if duplicated 
105-   // / symbol names or content hashes are seen. The user of this class should 
106-   // / aggregate counters that correspond to the same symbol name or with the 
107-   // / same string literal hash before calling 'set*' methods.
108-   Error setDataAccessProfile (SymbolHandle  SymbolID, uint64_t  AccessCount);
147+   // / Methods to set symbolized data access profile. Returns error if
148+   // / duplicated  symbol names or content hashes are seen. The user of this
149+   // / class should  aggregate counters that correspond to the same symbol name
150+   // / or with the  same string literal hash before calling 'set*' methods.
151+   Error setDataAccessProfile (SymbolHandleRef  SymbolID, uint64_t  AccessCount);
109152  // / Similar to the method above, for records with \p Locations representing
110153  // / the `filename:line` where this symbol shows up. Note because of linker's
111154  // / merge of identical symbols (e.g., unnamed_addr string literals), one
112155  // / symbol is likely to have multiple locations.
113-   Error setDataAccessProfile (SymbolHandle SymbolID, uint64_t  AccessCount,
114-                              ArrayRef<DataLocation> Locations);
115-   Error addKnownSymbolWithoutSamples (SymbolHandle SymbolID);
116- 
117-   // / Returns an iterable StringRef for strings in the order they are added.
118-   // / Each string may be a symbol name or a file name.
119-   auto  getStrings () const  {
120-     return  llvm::make_first_range (StrToIndexMap.getArrayRef ());
156+   Error setDataAccessProfile (SymbolHandleRef SymbolID, uint64_t  AccessCount,
157+                              ArrayRef<SourceLocation> Locations);
158+   // / Add a symbol that's seen in the profiled binary without samples.
159+   Error addKnownSymbolWithoutSamples (SymbolHandleRef SymbolID);
160+ 
161+   // / The following methods return array reference for various internal data
162+   // / structures.
163+   ArrayRef<StringToIndexMap::value_type> getStrToIndexMapRef () const  {
164+     return  StrToIndexMap.getArrayRef ();
165+   }
166+   ArrayRef<
167+       MapVector<SymbolHandleRef, internal::DataAccessProfRecordRef>::value_type>
168+   getRecords () const  {
169+     return  Records.getArrayRef ();
121170  }
122- 
123-   // / Returns array reference for various internal data structures.
124-   auto  getRecords () const  { return  Records.getArrayRef (); }
125171  ArrayRef<StringRef> getKnownColdSymbols () const  {
126172    return  KnownColdSymbols.getArrayRef ();
127173  }
@@ -139,11 +185,12 @@ class DataAccessProfData {
139185                                       const  uint64_t  NumSampledSymbols,
140186                                       const  uint64_t  NumColdKnownSymbols);
141187
142-   // / Decode the records and increment \p Ptr to the start of the next payload.
188+   // / Decode the records and increment \p Ptr to the start of the next
189+   // / payload.
143190  Error deserializeRecords (const  unsigned  char  *&Ptr);
144191
145192  // / A helper function to compute a storage index for \p SymbolID.
146-   uint64_t  getEncodedIndex (const  SymbolHandle  SymbolID) const ;
193+   uint64_t  getEncodedIndex (const  SymbolHandleRef  SymbolID) const ;
147194
148195  //  Keeps owned copies of the input strings.
149196  //  NOTE: Keep `Saver` initialized before other class members that reference
@@ -152,7 +199,7 @@ class DataAccessProfData {
152199  llvm::UniqueStringSaver Saver;
153200
154201  //  `Records` stores the records.
155-   MapVector<SymbolHandle, DataAccessProfRecord > Records;
202+   MapVector<SymbolHandleRef, internal::DataAccessProfRecordRef > Records;
156203
157204  //  Use MapVector to keep input order of strings for serialization and
158205  //  deserialization.
0 commit comments