From dab9c01c6e8181c49ff654d0b7eb61e3768afdf0 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Sun, 18 May 2025 18:24:08 -0700 Subject: [PATCH 1/2] Support data access profile in llvm-profgen --- llvm/tools/llvm-profgen/CMakeLists.txt | 1 + .../llvm-profgen/DataAccessPerfReader.cpp | 129 ++++++++++++++++++ .../tools/llvm-profgen/DataAccessPerfReader.h | 57 ++++++++ llvm/tools/llvm-profgen/PerfReader.cpp | 2 + llvm/tools/llvm-profgen/ProfiledBinary.cpp | 10 ++ llvm/tools/llvm-profgen/llvm-profgen.cpp | 84 ++++++++---- 6 files changed, 255 insertions(+), 28 deletions(-) create mode 100644 llvm/tools/llvm-profgen/DataAccessPerfReader.cpp create mode 100644 llvm/tools/llvm-profgen/DataAccessPerfReader.h diff --git a/llvm/tools/llvm-profgen/CMakeLists.txt b/llvm/tools/llvm-profgen/CMakeLists.txt index 354c63f409ffe..d11579145a517 100644 --- a/llvm/tools/llvm-profgen/CMakeLists.txt +++ b/llvm/tools/llvm-profgen/CMakeLists.txt @@ -18,6 +18,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_tool(llvm-profgen llvm-profgen.cpp PerfReader.cpp + DataAccessPerfReader.cpp CSPreInliner.cpp ProfiledBinary.cpp ProfileGenerator.cpp diff --git a/llvm/tools/llvm-profgen/DataAccessPerfReader.cpp b/llvm/tools/llvm-profgen/DataAccessPerfReader.cpp new file mode 100644 index 0000000000000..9c0858ef49521 --- /dev/null +++ b/llvm/tools/llvm-profgen/DataAccessPerfReader.cpp @@ -0,0 +1,129 @@ +#include "DataAccessPerfReader.h" +#include "ErrorHandling.h" +#include "PerfReader.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" + +#include + +static llvm::Regex IPSampleRegex(": 0x[a-fA-F0-9]+ period:"); +static llvm::Regex DataAddressRegex("addr: 0x[a-fA-F0-9]+"); + +namespace llvm { + +void DataAccessPerfReader::parsePerfTraces() { + parsePerfTrace(PerfTraceFilename); +} + +static void testPerfSampleRecordRegex() { + std::regex logRegex( + R"(^.*?PERF_RECORD_SAMPLE\(.*?\):\s*(\d+)\/(\d+):\s*(0x[0-9a-fA-F]+)\s+period:\s*\d+\s+addr:\s*(0x[0-9a-fA-F]+)$)"); + + std::smatch testMatch; + const std::string testLine = + "2193330181938979 0xa88 [0x48]: PERF_RECORD_SAMPLE(IP, 0x4002): " + "1807344/1807344: 0x260b45 period: 100 addr: 0x200630"; + if (std::regex_search(testLine, testMatch, logRegex)) { + if (testMatch.size() != 5) { + exitWithError("Regex did not match expected number of groups."); + } + for (size_t i = 0; i < testMatch.size(); ++i) { + errs() << "Group " << i << ": " << testMatch[i] << "\n"; + } + // errs() << "Test line matched successfully.\n"; + } else { + exitWithError("Test line did not match regex."); + } +} + +// Ignore mmap events. +void DataAccessPerfReader::parsePerfTrace(StringRef PerfTrace) { + std::regex logRegex( + R"(^.*?PERF_RECORD_SAMPLE\(.*?\):\s*(\d+)\/(\d+):\s*(0x[0-9a-fA-F]+)\s+period:\s*\d+\s+addr:\s*(0x[0-9a-fA-F]+)$)"); + uint64_t UnmatchedLine = 0, MatchedLine = 0; + + auto BufferOrErr = MemoryBuffer::getFile(PerfTrace); + std::error_code EC = BufferOrErr.getError(); + if (EC) + exitWithError("Failed to open perf trace file: " + PerfTrace); + + line_iterator LineIt(*BufferOrErr.get(), true); + for (; !LineIt.is_at_eof(); ++LineIt) { + StringRef Line = *LineIt; + + // Parse MMAP event from perf trace. + // Construct a binary from the binary file path. + PerfScriptReader::MMapEvent MMap; + if (Line.contains("PERF_RECORD_MMAP2")) { + if (PerfScriptReader::extractMMapEventForBinary(Binary, Line, MMap)) { + errs() << "MMap event found: " + << "PID: " << MMap.PID + << ", Address: " << format("0x%llx", MMap.Address) + << ", Size: " << MMap.Size << ", Offset: " << MMap.Offset + << ", Binary Path: " << MMap.BinaryPath << "\n"; + if (MMap.Offset == 0) { + updateBinaryAddress(MMap); + } + } + continue; + } + + if (!Line.contains("PERF_RECORD_SAMPLE")) { + // Skip lines that do not contain "PERF_RECORD_SAMPLE". + continue; + } + // errs() << "Processing line: " << Line << "\n"; + + // if (IPSampleRegex.match(Line, &Matches)) { + // errs() << "IP Captured: " << Matches.size() << "\n"; + // } + // if (DataAddressRegex.match(Line, &Matches)) { + // errs() << "Data Address Captured: " << Matches.size() << "\n"; + // } + + std::smatch matches; + const std::string LineStr = Line.str(); + + if (std::regex_search(LineStr.begin(), LineStr.end(), matches, logRegex)) { + if (matches.size() != 5) + continue; + + uint64_t DataAddress = std::stoull(matches[4].str(), nullptr, 16); + uint64_t IP = std::stoull(matches[3].str(), nullptr, 16); + int32_t PID = std::stoi(matches[1].str()); + // if (DataAddress == 0x200630) { + // errs() << "Find data address at 0x200630, IP: " << format("0x%llx", + // IP) + // << " pid is " << PID << "\n"; + // } + + // errs() << matches.size() << " matches found in line: " << LineStr << + // "\n"; for (const auto &Match : matches) { + // errs() << "Match: " << Match.str() << "\n"; + // } + // Check if the PID matches the filter. + + if (PIDFilter && *PIDFilter != PID) { + continue; + } + + // Extract the address and count. + + uint64_t CanonicalDataAddress = + Binary->canonicalizeVirtualAddress(DataAddress); + // errs() << "Data address is " << format("0x" PRIx64 ":", DataAddress) + // << " Canonical data address is " + // << format("0x" PRIx64 ":", CanonicalDataAddress) << "\n"; + AddressToCount[CanonicalDataAddress] += 1; + MatchedLine++; + } else { + // errs() << "\tNo match found for line: " << Line << "\n"; + UnmatchedLine++; + } + } + + errs() << "Total unmatched lines: " << UnmatchedLine << "\t" + << "Matched lines: " << MatchedLine << "\n"; +} + +} // namespace llvm diff --git a/llvm/tools/llvm-profgen/DataAccessPerfReader.h b/llvm/tools/llvm-profgen/DataAccessPerfReader.h new file mode 100644 index 0000000000000..5e2dc4da5bc6f --- /dev/null +++ b/llvm/tools/llvm-profgen/DataAccessPerfReader.h @@ -0,0 +1,57 @@ +//===-- DataAccessPerfReader.h - perfscript reader for data access profiles -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_PROFGEN_DATAACCESSPERFREADER_H +#define LLVM_TOOLS_LLVM_PROFGEN_DATAACCESSPERFREADER_H + +#include "PerfReader.h" +#include "ProfiledBinary.h" +#include "llvm/ADT/MapVector.h" + +namespace llvm { + +class DataAccessPerfReader : public PerfScriptReader { +public: + DataAccessPerfReader(ProfiledBinary *Binary, StringRef PerfTrace, + std::optional PID) + : PerfScriptReader(Binary, PerfTrace, PID), PerfTraceFilename(PerfTrace) { + } + + // Entry of the reader to parse multiple perf traces + void parsePerfTraces() override; + + auto getAddressToCount() const { + return AddressToCount.getArrayRef(); + } + + void print() const { + auto addrCountArray = AddressToCount.getArrayRef(); + std::vector> SortedEntries( + addrCountArray.begin(), addrCountArray.end()); + llvm::sort(SortedEntries, [](const auto &A, const auto &B) { + return A.second > B.second; + }); + for (const auto &Entry : SortedEntries) { + if (Entry.second == 0) + continue; // Skip entries with zero count + dbgs() << "Address: " << format("0x%llx", Entry.first) + << ", Count: " << Entry.second << "\n"; + } + } + +private: + void parsePerfTrace(StringRef PerfTrace); + + MapVector AddressToCount; + + StringRef PerfTraceFilename; +}; + +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_PROFGEN_DATAACCESSPERFREADER_H diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index ad113eda27914..ecacd42576dee 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -478,6 +478,8 @@ void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) { // Only update for the first executable segment and assume all other // segments are loaded at consecutive memory addresses, which is the case on // X64. + errs() << "Setting " << Binary->getPath() << " base address to " + << format("0x%" PRIx64, Event.Address) << "\n"; Binary->setBaseAddress(Event.Address); Binary->setIsLoadedByMMap(true); } else { diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 6847ba1b21b1f..3507389922500 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -60,6 +60,10 @@ static cl::opt KernelBinary("kernel", cl::desc("Generate the profile for Linux kernel binary.")); +static cl::opt RecordDataSegment("record-data-segment", cl::init(false), + cl::desc("Record data segment size " + "in the profile.")); + extern cl::opt ShowDetailedWarning; extern cl::opt InferMissingFrames; @@ -337,6 +341,12 @@ void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile &Obj, ~(PageSize - 1U)); TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U)); } + // else if ((Phdr.p_flags & ELF::PF_R) && !TextSegmentOffsets.empty()) { + // if (RecordDataSegment) { + // ReadOnlyDataSegmentOffsets.push_back(Phdr.p_offset & + // ~(PageSize - 1U)); + // } + // } } } diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp index 3b974e25103ad..acdadf7ba0c4b 100644 --- a/llvm/tools/llvm-profgen/llvm-profgen.cpp +++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "DataAccessPerfReader.h" #include "ErrorHandling.h" #include "PerfReader.h" #include "ProfileGenerator.h" @@ -21,6 +22,13 @@ #include "llvm/Support/TargetSelect.h" #include "llvm/Support/VirtualFileSystem.h" +namespace { +enum ProfileKinds { + SamplePGO, + DataAccessProfile, +}; +} // namespace + static cl::OptionCategory ProfGenCategory("ProfGen Options"); static cl::opt PerfScriptFilename( @@ -67,6 +75,11 @@ static cl::opt DebugBinPath( "from it instead of the executable binary."), cl::cat(ProfGenCategory)); +static cl::opt ProfileKind( + "profile-kind", cl::value_desc("profile-kind"), + cl::desc("Profile kind to be generated, default is sample profile."), + cl::init(DataAccessProfile), cl::cat(ProfGenCategory)); + extern cl::opt ShowDisassemblyOnly; extern cl::opt ShowSourceLocations; extern cl::opt SkipSymbolization; @@ -156,37 +169,52 @@ int main(int argc, const char *argv[]) { if (ShowDisassemblyOnly) return EXIT_SUCCESS; - if (SampleProfFilename.getNumOccurrences()) { - LLVMContext Context; - auto FS = vfs::getRealFileSystem(); - auto ReaderOrErr = - SampleProfileReader::create(SampleProfFilename, Context, *FS); - std::unique_ptr Reader = - std::move(ReaderOrErr.get()); - Reader->read(); - std::unique_ptr Generator = - ProfileGeneratorBase::create(Binary.get(), Reader->getProfiles(), - Reader->profileIsCS()); - Generator->generateProfile(); - Generator->write(); + if (ProfileKind == SamplePGO) { + if (SampleProfFilename.getNumOccurrences()) { + LLVMContext Context; + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = + SampleProfileReader::create(SampleProfFilename, Context, *FS); + std::unique_ptr Reader = + std::move(ReaderOrErr.get()); + Reader->read(); + std::unique_ptr Generator = + ProfileGeneratorBase::create(Binary.get(), Reader->getProfiles(), + Reader->profileIsCS()); + Generator->generateProfile(); + Generator->write(); + } else { + std::optional PIDFilter; + if (ProcessId.getNumOccurrences()) + PIDFilter = ProcessId; + PerfInputFile PerfFile = getPerfInputFile(); + std::unique_ptr Reader = + PerfReaderBase::create(Binary.get(), PerfFile, PIDFilter); + // Parse perf events and samples + Reader->parsePerfTraces(); + + if (SkipSymbolization) + return EXIT_SUCCESS; + + std::unique_ptr Generator = + ProfileGeneratorBase::create(Binary.get(), + &Reader->getSampleCounters(), + Reader->profileIsCS()); + Generator->generateProfile(); + Generator->write(); + } } else { - std::optional PIDFilter; - if (ProcessId.getNumOccurrences()) - PIDFilter = ProcessId; - PerfInputFile PerfFile = getPerfInputFile(); - std::unique_ptr Reader = - PerfReaderBase::create(Binary.get(), PerfFile, PIDFilter); - // Parse perf events and samples + assert(Binary.get() && + "Binary should be initialized for data access profile"); + errs() << "binary text segment offset is " + << format("0x%" PRIx64 ":", Binary->getTextSegmentOffset()) << "\n"; + // data access profile. + SmallVector PerfTraces{PerfScriptFilename}; + auto Reader = std::make_unique( + Binary.get(), PerfScriptFilename, std::nullopt); Reader->parsePerfTraces(); - if (SkipSymbolization) - return EXIT_SUCCESS; - - std::unique_ptr Generator = - ProfileGeneratorBase::create(Binary.get(), &Reader->getSampleCounters(), - Reader->profileIsCS()); - Generator->generateProfile(); - Generator->write(); + Reader->print(); } return EXIT_SUCCESS; From 52465e6c52f1afcff6509d2a67967701135e05fa Mon Sep 17 00:00:00 2001 From: mingmingl Date: Wed, 28 May 2025 21:14:32 -0700 Subject: [PATCH 2/2] remove comment --- .../llvm-profgen/DataAccessPerfReader.cpp | 81 +++++-------------- .../tools/llvm-profgen/DataAccessPerfReader.h | 79 ++++++++++++++---- llvm/tools/llvm-profgen/ProfiledBinary.cpp | 6 -- llvm/tools/llvm-profgen/ProfiledBinary.h | 2 + llvm/tools/llvm-profgen/llvm-profgen.cpp | 3 +- 5 files changed, 90 insertions(+), 81 deletions(-) diff --git a/llvm/tools/llvm-profgen/DataAccessPerfReader.cpp b/llvm/tools/llvm-profgen/DataAccessPerfReader.cpp index 9c0858ef49521..6a02b2d242d2d 100644 --- a/llvm/tools/llvm-profgen/DataAccessPerfReader.cpp +++ b/llvm/tools/llvm-profgen/DataAccessPerfReader.cpp @@ -15,32 +15,10 @@ void DataAccessPerfReader::parsePerfTraces() { parsePerfTrace(PerfTraceFilename); } -static void testPerfSampleRecordRegex() { - std::regex logRegex( - R"(^.*?PERF_RECORD_SAMPLE\(.*?\):\s*(\d+)\/(\d+):\s*(0x[0-9a-fA-F]+)\s+period:\s*\d+\s+addr:\s*(0x[0-9a-fA-F]+)$)"); - - std::smatch testMatch; - const std::string testLine = - "2193330181938979 0xa88 [0x48]: PERF_RECORD_SAMPLE(IP, 0x4002): " - "1807344/1807344: 0x260b45 period: 100 addr: 0x200630"; - if (std::regex_search(testLine, testMatch, logRegex)) { - if (testMatch.size() != 5) { - exitWithError("Regex did not match expected number of groups."); - } - for (size_t i = 0; i < testMatch.size(); ++i) { - errs() << "Group " << i << ": " << testMatch[i] << "\n"; - } - // errs() << "Test line matched successfully.\n"; - } else { - exitWithError("Test line did not match regex."); - } -} - // Ignore mmap events. void DataAccessPerfReader::parsePerfTrace(StringRef PerfTrace) { std::regex logRegex( R"(^.*?PERF_RECORD_SAMPLE\(.*?\):\s*(\d+)\/(\d+):\s*(0x[0-9a-fA-F]+)\s+period:\s*\d+\s+addr:\s*(0x[0-9a-fA-F]+)$)"); - uint64_t UnmatchedLine = 0, MatchedLine = 0; auto BufferOrErr = MemoryBuffer::getFile(PerfTrace); std::error_code EC = BufferOrErr.getError(); @@ -51,18 +29,23 @@ void DataAccessPerfReader::parsePerfTrace(StringRef PerfTrace) { for (; !LineIt.is_at_eof(); ++LineIt) { StringRef Line = *LineIt; + // Parse MMAP event from perf trace. // Parse MMAP event from perf trace. // Construct a binary from the binary file path. PerfScriptReader::MMapEvent MMap; if (Line.contains("PERF_RECORD_MMAP2")) { if (PerfScriptReader::extractMMapEventForBinary(Binary, Line, MMap)) { - errs() << "MMap event found: " - << "PID: " << MMap.PID - << ", Address: " << format("0x%llx", MMap.Address) - << ", Size: " << MMap.Size << ", Offset: " << MMap.Offset - << ", Binary Path: " << MMap.BinaryPath << "\n"; + // TODO: This is a hack to avoid mapping binary address for data section + // mappings. if (MMap.Offset == 0) { updateBinaryAddress(MMap); + errs() << "Binary base address is " + << format("0x%" PRIx64, Binary->getBaseAddress()) + << " and preferred base address is " + << format("0x%" PRIx64, Binary->getPreferredBaseAddress()) + << " and first loadable address is " + << format("0x%" PRIx64, Binary->getFirstLoadableAddress()) + << "\n"; } } continue; @@ -72,14 +55,6 @@ void DataAccessPerfReader::parsePerfTrace(StringRef PerfTrace) { // Skip lines that do not contain "PERF_RECORD_SAMPLE". continue; } - // errs() << "Processing line: " << Line << "\n"; - - // if (IPSampleRegex.match(Line, &Matches)) { - // errs() << "IP Captured: " << Matches.size() << "\n"; - // } - // if (DataAddressRegex.match(Line, &Matches)) { - // errs() << "Data Address Captured: " << Matches.size() << "\n"; - // } std::smatch matches; const std::string LineStr = Line.str(); @@ -89,41 +64,29 @@ void DataAccessPerfReader::parsePerfTrace(StringRef PerfTrace) { continue; uint64_t DataAddress = std::stoull(matches[4].str(), nullptr, 16); - uint64_t IP = std::stoull(matches[3].str(), nullptr, 16); + + // Skip addresses out of the specified PT_LOAD section for data. + if (DataAddress < DataMMap.Address || + DataAddress >= DataMMap.Address + DataMMap.Size) + continue; + int32_t PID = std::stoi(matches[1].str()); - // if (DataAddress == 0x200630) { - // errs() << "Find data address at 0x200630, IP: " << format("0x%llx", - // IP) - // << " pid is " << PID << "\n"; - // } - - // errs() << matches.size() << " matches found in line: " << LineStr << - // "\n"; for (const auto &Match : matches) { - // errs() << "Match: " << Match.str() << "\n"; - // } // Check if the PID matches the filter. if (PIDFilter && *PIDFilter != PID) { continue; } + uint64_t IP = std::stoull(matches[3].str(), nullptr, 16); // Extract the address and count. - uint64_t CanonicalDataAddress = - Binary->canonicalizeVirtualAddress(DataAddress); - // errs() << "Data address is " << format("0x" PRIx64 ":", DataAddress) - // << " Canonical data address is " - // << format("0x" PRIx64 ":", CanonicalDataAddress) << "\n"; - AddressToCount[CanonicalDataAddress] += 1; - MatchedLine++; - } else { - // errs() << "\tNo match found for line: " << Line << "\n"; - UnmatchedLine++; + canonicalizeDataAddress(DataAddress, *Binary, DataMMap, DataSegment); + + uint64_t CanonicalIPAddress = Binary->canonicalizeVirtualAddress(IP); + + AddressMap[CanonicalIPAddress][CanonicalDataAddress] += 1; } } - - errs() << "Total unmatched lines: " << UnmatchedLine << "\t" - << "Matched lines: " << MatchedLine << "\n"; } } // namespace llvm diff --git a/llvm/tools/llvm-profgen/DataAccessPerfReader.h b/llvm/tools/llvm-profgen/DataAccessPerfReader.h index 5e2dc4da5bc6f..31961159fbc30 100644 --- a/llvm/tools/llvm-profgen/DataAccessPerfReader.h +++ b/llvm/tools/llvm-profgen/DataAccessPerfReader.h @@ -17,39 +17,90 @@ namespace llvm { class DataAccessPerfReader : public PerfScriptReader { public: + class DataSegment { + public: + uint64_t FileOffset; + uint64_t VirtualAddress; + }; DataAccessPerfReader(ProfiledBinary *Binary, StringRef PerfTrace, std::optional PID) : PerfScriptReader(Binary, PerfTrace, PID), PerfTraceFilename(PerfTrace) { + hackMMapEventAndDataSegment(DataMMap, DataSegment, *Binary); + } + + // The MMapEvent is hard-coded as a hack to illustrate the change. + static void + hackMMapEventAndDataSegment(PerfScriptReader::MMapEvent &MMap, + DataSegment &DataSegment, + const ProfiledBinary &ProfiledBinary) { + // The PERF_RECORD_MMAP2 event is + // 0 0x4e8 [0xa0]: PERF_RECORD_MMAP2 1849842/1849842: + // [0x55d977426000(0x1000) @ 0x1000 fd:01 20869534 0]: r--p /path/to/binary + MMap.PID = 1849842; // Example PID + MMap.BinaryPath = ProfiledBinary.getPath(); + MMap.Address = 0x55d977426000; + MMap.Size = 0x1000; + MMap.Offset = 0x1000; // File Offset in the binary. + + // TODO: Set binary fields to do address canonicalization, and compute + // static data address range. + DataSegment.FileOffset = + 0x1180; // The byte offset of the segment start in the binary. + DataSegment.VirtualAddress = + 0x3180; // The virtual address of the segment start in the binary. + } + + uint64_t canonicalizeDataAddress(uint64_t Address, + const ProfiledBinary &ProfiledBinary, + const PerfScriptReader::MMapEvent &MMap, + const DataSegment &DataSegment) { + // virtual-addr = segment.virtual-addr (0x3180) + (runtime-addr - + // map.adddress - segment.file-offset (0x1180) + map.file-offset (0x1000)) + return DataSegment.VirtualAddress + + (Address - MMap.Address - (DataSegment.FileOffset - MMap.Offset)); } // Entry of the reader to parse multiple perf traces void parsePerfTraces() override; - auto getAddressToCount() const { - return AddressToCount.getArrayRef(); - } + struct ProfiledInfo { + ProfiledInfo(uint64_t InstructionAddr, uint64_t DataAddr, uint64_t Count) + : InstructionAddr(InstructionAddr), DataAddr(DataAddr), Count(Count) {} + uint64_t InstructionAddr; + uint64_t DataAddr; + uint64_t Count; + }; + // A hack to demonstrate the symbolized output of vtable type profiling. void print() const { - auto addrCountArray = AddressToCount.getArrayRef(); - std::vector> SortedEntries( - addrCountArray.begin(), addrCountArray.end()); - llvm::sort(SortedEntries, [](const auto &A, const auto &B) { - return A.second > B.second; - }); - for (const auto &Entry : SortedEntries) { - if (Entry.second == 0) + + std::vector Entries; + Entries.reserve(AddressMap.size()); + for (const auto &[IpAddr, DataCount] : AddressMap) { + for (const auto [DataAddr, Count] : DataCount) { + Entries.emplace_back(ProfiledInfo(IpAddr, DataAddr, Count)); + } + } + llvm::sort(Entries, + [](const auto &A, const auto &B) { return A.Count > B.Count; }); + for (const auto &Entry : Entries) { + if (Entry.Count == 0) continue; // Skip entries with zero count - dbgs() << "Address: " << format("0x%llx", Entry.first) - << ", Count: " << Entry.second << "\n"; + dbgs() << "Address: " << format("0x%llx", Entry.InstructionAddr) + << " Data Address: " << format("0x%llx", Entry.DataAddr) + << " Count: " << Entry.Count << "\n"; } } private: void parsePerfTrace(StringRef PerfTrace); - MapVector AddressToCount; + DenseMap> AddressMap; StringRef PerfTraceFilename; + + PerfScriptReader::MMapEvent DataMMap; + DataSegment DataSegment; }; } // namespace llvm diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 3507389922500..d26b7ce6ed657 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -341,12 +341,6 @@ void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile &Obj, ~(PageSize - 1U)); TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U)); } - // else if ((Phdr.p_flags & ELF::PF_R) && !TextSegmentOffsets.empty()) { - // if (RecordDataSegment) { - // ReadOnlyDataSegmentOffsets.push_back(Phdr.p_offset & - // ~(PageSize - 1U)); - // } - // } } } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index 0588cb48b2af6..449b7ae81e896 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -205,6 +205,8 @@ class ProfiledBinary { // The file offset of each executable segment. std::vector TextSegmentOffsets; + std::vector ReadOnlyDataSegmentOffsets; + // Mutiple MC component info std::unique_ptr MRI; std::unique_ptr AsmInfo; diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp index acdadf7ba0c4b..55972ec961790 100644 --- a/llvm/tools/llvm-profgen/llvm-profgen.cpp +++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp @@ -206,8 +206,7 @@ int main(int argc, const char *argv[]) { } else { assert(Binary.get() && "Binary should be initialized for data access profile"); - errs() << "binary text segment offset is " - << format("0x%" PRIx64 ":", Binary->getTextSegmentOffset()) << "\n"; + // data access profile. SmallVector PerfTraces{PerfScriptFilename}; auto Reader = std::make_unique(