- 
                Notifications
    
You must be signed in to change notification settings  - Fork 15.1k
 
[StaticDataLayout][PGO]Implement reader and writer change for data access profiles #139997
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
6cd7d8d
              4727529
              80249bc
              b69c993
              6fe9b48
              df08094
              6dd04e4
              4045c94
              f63fea6
              c5d8fcf
              0fb3e6a
              d3443c7
              c9f1f2b
              5d0e236
              b14f1e2
              09eab64
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| 
          
            
          
           | 
    @@ -41,6 +41,8 @@ namespace data_access_prof { | |
| struct SourceLocation { | ||
| 
         There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [Re: line +38] Should we change this to memprof? Reasoning is that this is closely tied to the memprof profile data format and we don't intend to use it elsewhere. Wdyt? See this comment inline on Graphite. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this makes sense. Renamed namespace in this patch and I'm open to split renames to a separate patch before this one. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm fine with keeping the rename in this patch.  | 
||
| SourceLocation(StringRef FileNameRef, uint32_t Line) | ||
| : FileName(FileNameRef.str()), Line(Line) {} | ||
| 
     | 
||
| SourceLocation() {} | ||
                
      
                  snehasish marked this conversation as resolved.
               
          
            Show resolved
            Hide resolved
         | 
||
| /// The filename where the data is located. | ||
| std::string FileName; | ||
| /// The line number in the source code. | ||
| 
        
          
        
         | 
    @@ -53,6 +55,8 @@ namespace internal { | |
| // which strings are owned by `DataAccessProfData`. Used by `DataAccessProfData` | ||
| // to represent data locations internally. | ||
| struct SourceLocationRef { | ||
| SourceLocationRef(StringRef FileNameRef, uint32_t Line) | ||
| : FileName(FileNameRef), Line(Line) {} | ||
| // The filename where the data is located. | ||
| StringRef FileName; | ||
| // The line number in the source code. | ||
| 
          
            
          
           | 
    @@ -100,8 +104,9 @@ using SymbolHandle = std::variant<std::string, uint64_t>; | |
| /// The data access profiles for a symbol. | ||
| struct DataAccessProfRecord { | ||
| public: | ||
| DataAccessProfRecord(SymbolHandleRef SymHandleRef, | ||
| ArrayRef<internal::SourceLocationRef> LocRefs) { | ||
| DataAccessProfRecord(SymbolHandleRef SymHandleRef, uint64_t AccessCount, | ||
| ArrayRef<internal::SourceLocationRef> LocRefs) | ||
| : AccessCount(AccessCount) { | ||
| if (std::holds_alternative<StringRef>(SymHandleRef)) { | ||
| SymHandle = std::get<StringRef>(SymHandleRef).str(); | ||
| } else | ||
| 
        
          
        
         | 
    @@ -110,8 +115,9 @@ struct DataAccessProfRecord { | |
| for (auto Loc : LocRefs) | ||
| Locations.push_back(SourceLocation(Loc.FileName, Loc.Line)); | ||
                
       | 
||
| } | ||
| DataAccessProfRecord() {} | ||
| SymbolHandle SymHandle; | ||
| 
     | 
||
| uint64_t AccessCount; | ||
| // The locations of data in the source code. Optional. | ||
| SmallVector<SourceLocation> Locations; | ||
| }; | ||
| 
          
            
          
           | 
    ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| 
          
            
          
           | 
    @@ -10,6 +10,7 @@ | |
| // | ||
| //===----------------------------------------------------------------------===// | ||
| 
     | 
||
| #include "llvm/ProfileData/DataAccessProf.h" | ||
| #include "llvm/ProfileData/InstrProf.h" | ||
| #include "llvm/ProfileData/InstrProfReader.h" | ||
| #include "llvm/ProfileData/MemProf.h" | ||
| 
          
            
          
           | 
    @@ -216,7 +217,9 @@ static Error writeMemProfV2(ProfOStream &OS, | |
| 
     | 
||
| static Error writeMemProfRadixTreeBased( | ||
| ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, | ||
| memprof::IndexedVersion Version, bool MemProfFullSchema) { | ||
| memprof::IndexedVersion Version, bool MemProfFullSchema, | ||
| std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>> | ||
| DataAccessProfileData) { | ||
| assert((Version == memprof::Version3 || Version == memprof::Version4) && | ||
| "Unsupported version for radix tree format"); | ||
| 
     | 
||
| 
        
          
        
         | 
    @@ -225,6 +228,8 @@ static Error writeMemProfRadixTreeBased( | |
| OS.write(0ULL); // Reserve space for the memprof call stack payload offset. | ||
| OS.write(0ULL); // Reserve space for the memprof record payload offset. | ||
| OS.write(0ULL); // Reserve space for the memprof record table offset. | ||
| if (Version >= memprof::Version4) | ||
| OS.write(0ULL); // Reserve space for the data access profile offset. | ||
| 
     | 
||
| auto Schema = memprof::getHotColdSchema(); | ||
| if (MemProfFullSchema) | ||
| 
        
          
        
         | 
    @@ -251,17 +256,28 @@ static Error writeMemProfRadixTreeBased( | |
| uint64_t RecordTableOffset = writeMemProfRecords( | ||
| OS, MemProfData.Records, &Schema, Version, &MemProfCallStackIndexes); | ||
| 
     | 
||
| uint64_t DataAccessProfOffset = 0; | ||
| if (DataAccessProfileData.has_value()) { | ||
| assert(Version >= memprof::Version4 && | ||
| "Data access profiles are added starting from v4"); | ||
| DataAccessProfOffset = OS.tell(); | ||
                
      
                  mingmingl-llvm marked this conversation as resolved.
               
          
            Show resolved
            Hide resolved
         | 
||
| if (Error E = (*DataAccessProfileData).get().serialize(OS)) | ||
                
      
                  snehasish marked this conversation as resolved.
               
              
                Outdated
          
            Show resolved
            Hide resolved
         | 
||
| return E; | ||
| } | ||
| 
     | 
||
| // Verify that the computation for the number of elements in the call stack | ||
| // array works. | ||
| assert(CallStackPayloadOffset + | ||
| NumElements * sizeof(memprof::LinearFrameId) == | ||
| RecordPayloadOffset); | ||
| 
     | 
||
| uint64_t Header[] = { | ||
| SmallVector<uint64_t, 4> Header = { | ||
| CallStackPayloadOffset, | ||
| RecordPayloadOffset, | ||
| RecordTableOffset, | ||
| }; | ||
| if (Version >= memprof::Version4) | ||
| Header.push_back(DataAccessProfOffset); | ||
| OS.patch({{HeaderUpdatePos, Header}}); | ||
| 
     | 
||
| return Error::success(); | ||
| 
        
          
        
         | 
    @@ -272,28 +288,33 @@ static Error writeMemProfV3(ProfOStream &OS, | |
| memprof::IndexedMemProfData &MemProfData, | ||
| bool MemProfFullSchema) { | ||
| return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version3, | ||
| MemProfFullSchema); | ||
| MemProfFullSchema, std::nullopt); | ||
| } | ||
| 
     | 
||
| // Write out MemProf Version4 | ||
| static Error writeMemProfV4(ProfOStream &OS, | ||
| memprof::IndexedMemProfData &MemProfData, | ||
| bool MemProfFullSchema) { | ||
| static Error writeMemProfV4( | ||
| ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, | ||
| bool MemProfFullSchema, | ||
| std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>> | ||
| DataAccessProfileData) { | ||
| return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version4, | ||
| MemProfFullSchema); | ||
| MemProfFullSchema, DataAccessProfileData); | ||
| } | ||
| 
     | 
||
| // Write out the MemProf data in a requested version. | ||
| Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, | ||
| memprof::IndexedVersion MemProfVersionRequested, | ||
| bool MemProfFullSchema) { | ||
| Error writeMemProf( | ||
| ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, | ||
| memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema, | ||
| std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>> | ||
| DataAccessProfileData) { | ||
| switch (MemProfVersionRequested) { | ||
| case memprof::Version2: | ||
| return writeMemProfV2(OS, MemProfData, MemProfFullSchema); | ||
| case memprof::Version3: | ||
| return writeMemProfV3(OS, MemProfData, MemProfFullSchema); | ||
| case memprof::Version4: | ||
| return writeMemProfV4(OS, MemProfData, MemProfFullSchema); | ||
| return writeMemProfV4(OS, MemProfData, MemProfFullSchema, | ||
| DataAccessProfileData); | ||
| } | ||
| 
     | 
||
| return make_error<InstrProfError>( | ||
| 
          
            
          
           | 
    @@ -357,7 +378,10 @@ Error IndexedMemProfReader::deserializeV2(const unsigned char *Start, | |
| } | ||
| 
     | 
||
| Error IndexedMemProfReader::deserializeRadixTreeBased( | ||
| const unsigned char *Start, const unsigned char *Ptr) { | ||
| const unsigned char *Start, const unsigned char *Ptr, | ||
| memprof::IndexedVersion Version) { | ||
| assert((Version == memprof::Version3 || Version == memprof::Version4) && | ||
| "Unsupported version for radix tree format"); | ||
| // The offset in the stream right before invoking | ||
| // CallStackTableGenerator.Emit. | ||
| const uint64_t CallStackPayloadOffset = | ||
| 
        
          
        
         | 
    @@ -369,6 +393,11 @@ Error IndexedMemProfReader::deserializeRadixTreeBased( | |
| const uint64_t RecordTableOffset = | ||
| support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); | ||
| 
     | 
||
| uint64_t DataAccessProfOffset = 0; | ||
| if (Version == memprof::Version4) | ||
| DataAccessProfOffset = | ||
| support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); | ||
| 
     | 
||
| // Read the schema. | ||
| auto SchemaOr = memprof::readMemProfSchema(Ptr); | ||
| if (!SchemaOr) | ||
| 
        
          
        
         | 
    @@ -390,6 +419,14 @@ Error IndexedMemProfReader::deserializeRadixTreeBased( | |
| /*Payload=*/Start + RecordPayloadOffset, | ||
| /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema))); | ||
| 
     | 
||
| if (DataAccessProfOffset > RecordTableOffset) { | ||
| 
         There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it possible to have an non-zero DataAccessProfOffset that is <= RecordTableOffset? Perhaps assert (!DataAccessProfOffset || ... > RecordTableOffset), and use if (DataAccessProfOffset !=0) as the guard. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The idea to tighten the check resonates with me. Maybe I can record both DAP offset and DAP length in the memprof header, and handle empty input in the  There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I realized checking the length of data access profile payload before creating the class or calling its 'deserialize' method will do it, and there is no need to update the signature of  I'm open to make the length field more general (e.g., recording the length of the total memprof profile will make it useful when there are new payloads after data access profiles) and would like to hear thoughts on it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't really follow the benefit of adding the length for this particular section? Not completely against it, but we don't seem to have the length for the other sections. And it isn't clear to me that guarding the deserialization by the length is inherently better than comparing the offsets. I do think that some assertion checking could be added in either case though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Un-do the length changes and added the  Recording and checking length makes the code a little more straightforward and readable, but the implementation is a little more complex. I don't feel strong about it.  | 
||
| DataAccessProfileData = | ||
| std::make_unique<data_access_prof::DataAccessProfData>(); | ||
| const unsigned char *DAPPtr = Start + DataAccessProfOffset; | ||
| if (Error E = DataAccessProfileData->deserialize(DAPPtr)) | ||
| return E; | ||
| } | ||
| 
     | 
||
| return Error::success(); | ||
| } | ||
| 
     | 
||
| 
          
            
          
           | 
    @@ -423,7 +460,7 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start, | |
| case memprof::Version3: | ||
| case memprof::Version4: | ||
| // V3 and V4 share the same high-level structure (radix tree, linear IDs). | ||
| if (Error E = deserializeRadixTreeBased(Start, Ptr)) | ||
| if (Error E = deserializeRadixTreeBased(Start, Ptr, Version)) | ||
| return E; | ||
| break; | ||
| } | ||
| 
          
            
          
           | 
    ||
Uh oh!
There was an error while loading. Please reload this page.