From ac48f2c4d76cefd27ce40d39762ce54633fee3f4 Mon Sep 17 00:00:00 2001 From: Alex B Date: Tue, 24 Sep 2024 03:16:47 -0700 Subject: [PATCH 01/12] [GSYM] Callsites: Add data format support and loading from YAML --- .../llvm/DebugInfo/GSYM/CallSiteInfo.h | 224 +++++ .../llvm/DebugInfo/GSYM/FunctionInfo.h | 10 +- .../include/llvm/DebugInfo/GSYM/GsymCreator.h | 11 +- llvm/include/llvm/DebugInfo/GSYM/GsymReader.h | 20 + llvm/lib/DebugInfo/GSYM/CMakeLists.txt | 1 + llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp | 293 ++++++ llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp | 31 +- llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 6 + llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 45 + .../macho-gsym-callsite-info-dsym.yaml | 950 ++++++++++++++++++ .../macho-gsym-callsite-info-exe.yaml | 558 ++++++++++ .../macho-gsym-callsite-info-obj.test | 304 ++++++ llvm/tools/llvm-gsymutil/Opts.td | 2 + llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 19 + llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp | 4 +- 15 files changed, 2471 insertions(+), 7 deletions(-) create mode 100644 llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h create mode 100644 llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp create mode 100644 llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml create mode 100644 llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml create mode 100644 llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h new file mode 100644 index 0000000000000..45257f0e11578 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h @@ -0,0 +1,224 @@ +//===- CallSiteInfo.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_CALLSITEINFO_H +#define LLVM_DEBUGINFO_GSYM_CALLSITEINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/DebugInfo/GSYM/ExtractRanges.h" +#include "llvm/Support/YAMLParser.h" +#include +#include +#include + +namespace llvm { +class DataExtractor; +class raw_ostream; +class StringTableBuilder; +class CachedHashStringRef; + +namespace yaml { +struct CallSiteYAML; +struct FunctionYAML; +struct FunctionsYAML; +} // namespace yaml + +namespace gsym { +class FileWriter; +struct FunctionInfo; +struct CallSiteInfo { +public: + enum Flags : uint8_t { + None = 0, + // This flag specifies that the call site can only call a function within + // the same link unit as the call site. + InternalCall = 1 << 0, + // This flag specifies that the call site can only call a function outside + // the link unit that the call site is in. + ExternalCall = 1 << 1, + }; + + /// The return address of the call site. + uint64_t ReturnAddress; + + /// Offsets into the string table for function names regex patterns. + std::vector MatchRegex; + + /// Bitwise OR of CallSiteInfo::Flags values + uint8_t Flags; + + /// Decode a CallSiteInfo object from a binary data stream. + /// + /// \param Data The binary stream to read the data from. + /// \param Offset The current offset within the data stream. + /// \param BaseAddr The base address for decoding (unused here but included + /// for consistency). + /// + /// \returns A CallSiteInfo or an error describing the issue. + static llvm::Expected + decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr); + + /// Encode this CallSiteInfo object into a FileWriter stream. + /// + /// \param O The binary stream to write the data to. + /// \returns An error object that indicates success or failure. + llvm::Error encode(FileWriter &O) const; +}; + +struct CallSiteInfoCollection { +public: + std::vector CallSites; + + void clear() { CallSites.clear(); } + + /// Query if a CallSiteInfoCollection object is valid. + /// + /// \returns True if the collection is not empty. + bool isValid() const { return !CallSites.empty(); } + + /// Decode a CallSiteInfoCollection object from a binary data stream. + /// + /// \param Data The binary stream to read the data from. + /// \param BaseAddr The base address for decoding (unused here but included + /// for consistency). + /// + /// \returns A CallSiteInfoCollection or an error describing the issue. + static llvm::Expected decode(DataExtractor &Data, + uint64_t BaseAddr); + + /// Encode this CallSiteInfoCollection object into a FileWriter stream. + /// + /// \param O The binary stream to write the data to. + /// \returns An error object that indicates success or failure. + llvm::Error encode(FileWriter &O) const; +}; + +bool operator==(const CallSiteInfoCollection &LHS, + const CallSiteInfoCollection &RHS); + +bool operator==(const CallSiteInfo &LHS, const CallSiteInfo &RHS); + +class CallSiteInfoLoader { +public: + /// Constructor that initializes the CallSiteInfoLoader with necessary data + /// structures. + /// + /// \param StringOffsetMap A reference to a DenseMap that maps existing string + /// offsets to CachedHashStringRef. \param StrTab A reference to a + /// StringTableBuilder used for managing looking up and creating new strings. + /// \param StringStorage A reference to a StringSet for storing the data for + /// generated strings. + CallSiteInfoLoader(DenseMap &StringOffsetMap, + StringTableBuilder &StrTab, StringSet<> &StringStorage) + : StringOffsetMap(StringOffsetMap), StrTab(StrTab), + StringStorage(StringStorage) {} + + /// Loads call site information from a YAML file and populates the provided + /// FunctionInfo vector. + /// + /// This method reads the specified YAML file, parses its content, and updates + /// the `Funcs` vector with call site information based on the YAML data. + /// + /// \param Funcs A reference to a vector of FunctionInfo objects to be + /// populated. + /// \param YAMLFile A StringRef representing the path to the YAML + /// file to be loaded. + /// + /// \returns An `llvm::Error` indicating success or describing any issues + /// encountered during the loading process. + llvm::Error loadYAML(std::vector &Funcs, StringRef YAMLFile); + +private: + /// Retrieves an existing string from the StringOffsetMap using the provided + /// offset. + /// + /// \param offset A 32-bit unsigned integer representing the offset of the + /// string. + /// + /// \returns A StringRef corresponding to the string for the given offset. + /// + /// \note This method asserts that the offset exists in the StringOffsetMap. + StringRef stringFromOffset(uint32_t offset) const; + + /// Obtains the offset corresponding to a given string in the StrTab. If the + /// string does not already exist, it is created. + /// + /// \param str A StringRef representing the string for which the offset is + /// requested. + /// + /// \returns A 32-bit unsigned integer representing the offset of the string. + uint32_t offsetFromString(StringRef str); + + /// Reads the content of the YAML file specified by `YAMLFile` into + /// `yamlContent`. + /// + /// \param YAMLFile A StringRef representing the path to the YAML file. + /// \param Buffer The memory buffer containing the YAML content. + /// + /// \returns An `llvm::Error` indicating success or describing any issues + /// encountered while reading the file. + llvm::Error readYAMLFile(StringRef YAMLFile, + std::unique_ptr &Buffer); + + /// Parses the YAML content and populates `functionsYAML` with the parsed + /// data. + /// + /// \param Buffer The memory buffer containing the YAML content. + /// \param functionsYAML A reference to an llvm::yaml::FunctionsYAML object to + /// be populated. + /// + /// \returns An `llvm::Error` indicating success or describing any issues + /// encountered during parsing. + llvm::Error parseYAML(llvm::MemoryBuffer &Buffer, + llvm::yaml::FunctionsYAML &functionsYAML); + + /// Builds a map from function names to FunctionInfo pointers based on the + /// provided `Funcs` vector. + /// + /// \param Funcs A reference to a vector of FunctionInfo objects. + /// + /// \returns An unordered_map mapping function names (std::string) to their + /// corresponding FunctionInfo pointers. + std::unordered_map + buildFunctionMap(std::vector &Funcs); + + /// Processes the parsed YAML functions and updates the `FuncMap` accordingly. + /// + /// \param functionsYAML A constant reference to an llvm::yaml::FunctionsYAML + /// object containing parsed YAML data. + /// \param FuncMap A reference to an unordered_map mapping function names to + /// FunctionInfo pointers. + /// \param YAMLFile A StringRef representing the name of the YAML file (used + /// for error messages). + /// + /// \returns An `llvm::Error` indicating success or describing any issues + /// encountered during processing. + llvm::Error + processYAMLFunctions(const llvm::yaml::FunctionsYAML &functionsYAML, + std::unordered_map &FuncMap, + StringRef YAMLFile); + + /// Map of existing string offsets to CachedHashStringRef. + DenseMap &StringOffsetMap; + + /// The gSYM string table builder. + StringTableBuilder &StrTab; + + /// The gSYM string storage - we store generated strings here. + StringSet<> &StringStorage; +}; + +raw_ostream &operator<<(raw_ostream &OS, const CallSiteInfo &CSI); +raw_ostream &operator<<(raw_ostream &OS, const CallSiteInfoCollection &CSIC); + +} // namespace gsym +} // namespace llvm + +#endif // LLVM_DEBUGINFO_GSYM_CALLSITEINFO_H diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h index 71209b6b5c9cd..fd4ac3164c686 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h @@ -10,6 +10,7 @@ #define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H #include "llvm/ADT/SmallString.h" +#include "llvm/DebugInfo/GSYM/CallSiteInfo.h" #include "llvm/DebugInfo/GSYM/ExtractRanges.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" #include "llvm/DebugInfo/GSYM/LineTable.h" @@ -63,7 +64,9 @@ class GsymReader; /// enum InfoType { /// EndOfList = 0u, /// LineTableInfo = 1u, -/// InlineInfo = 2u +/// InlineInfo = 2u, +/// MergedFunctionsInfo = 3u, +/// CallSiteInfo = 4u /// }; /// /// This stream of tuples is terminated by a "InfoType" whose value is @@ -73,7 +76,7 @@ class GsymReader; /// clients to still parse the format and skip over any data that they don't /// understand or want to parse. /// -/// So the function information encoding essientially looks like: +/// So the function information encoding essentially looks like: /// /// struct { /// uint32_t Size; @@ -92,6 +95,7 @@ struct FunctionInfo { std::optional OptLineTable; std::optional Inline; std::optional MergedFunctions; + std::optional CallSites; /// If we encode a FunctionInfo during segmenting so we know its size, we can /// cache that encoding here so we don't need to re-encode it when saving the /// GSYM file. @@ -107,7 +111,7 @@ struct FunctionInfo { /// debug info, we might end up with multiple FunctionInfo objects for the /// same range and we need to be able to tell which one is the better object /// to use. - bool hasRichInfo() const { return OptLineTable || Inline; } + bool hasRichInfo() const { return OptLineTable || Inline || CallSites; } /// Query if a FunctionInfo object is valid. /// diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h index 48808fb7b71e1..9e5b3c1f8d92d 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h @@ -352,13 +352,22 @@ class GsymCreator { /// \param FI The function info object to emplace into our functions list. void addFunctionInfo(FunctionInfo &&FI); + /// Load call site information from a YAML file. + /// + /// This function reads call site information from a specified YAML file and + /// adds it to the GSYM data. + /// + /// \param YAMLFile The path to the YAML file containing call site + /// information. + llvm::Error loadCallSitesFromYAML(StringRef YAMLFile); + /// Organize merged FunctionInfo's /// /// This method processes the list of function infos (Funcs) to identify and /// group functions with overlapping address ranges. /// /// \param Out Output stream to report information about how merged - /// FunctionInfo's were handeled. + /// FunctionInfo's were handled. void prepareMergedFunctions(OutputAggregator &Out); /// Finalize the data in the GSYM creator prior to saving the data out. diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h index 89f8c043b9151..72b7f3e7bfc42 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h @@ -181,6 +181,26 @@ class GsymReader { /// \param MFI The object to dump. void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI); + /// Dump a CallSiteInfo object. + /// + /// This function will output the details of a CallSiteInfo object in a + /// human-readable format. + /// + /// \param OS The output stream to dump to. + /// + /// \param CSI The CallSiteInfo object to dump. + void dump(raw_ostream &OS, const CallSiteInfo &CSI); + + /// Dump a CallSiteInfoCollection object. + /// + /// This function will iterate over a collection of CallSiteInfo objects and + /// dump each one. + /// + /// \param OS The output stream to dump to. + /// + /// \param CSIC The CallSiteInfoCollection object to dump. + void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC); + /// Dump a LineTable object. /// /// This function will convert any string table indexes and file indexes diff --git a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt index be90bfdaa7fd2..c27d648db62f6 100644 --- a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt +++ b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_component_library(LLVMDebugInfoGSYM InlineInfo.cpp LineTable.cpp LookupResult.cpp + CallSiteInfo.cpp MergedFunctionsInfo.cpp ObjectFileTransformer.cpp ExtractRanges.cpp diff --git a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp new file mode 100644 index 0000000000000..4ed3d3f67a44f --- /dev/null +++ b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp @@ -0,0 +1,293 @@ +//===- CallSiteInfo.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/CallSiteInfo.h" +#include "llvm/ADT/CachedHashString.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/YAMLParser.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +using namespace llvm; +using namespace gsym; + +llvm::Error CallSiteInfo::encode(FileWriter &O) const { + O.writeU64(ReturnAddress); + O.writeU8(Flags); + O.writeU32(MatchRegex.size()); + for (uint32_t Entry : MatchRegex) + O.writeU32(Entry); + return llvm::Error::success(); +} + +llvm::Expected +CallSiteInfo::decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr) { + CallSiteInfo CSI; + + // Read ReturnAddress + if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint64_t))) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing ReturnAddress", Offset); + CSI.ReturnAddress = Data.getU64(&Offset); + + // Read Flags + if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint8_t))) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing Flags", Offset); + CSI.Flags = Data.getU8(&Offset); + + // Read number of MatchRegex entries + if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint32_t))) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing MatchRegex count", + Offset); + uint32_t NumEntries = Data.getU32(&Offset); + + CSI.MatchRegex.reserve(NumEntries); + for (uint32_t i = 0; i < NumEntries; ++i) { + if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint32_t))) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing MatchRegex entry", + Offset); + uint32_t Entry = Data.getU32(&Offset); + CSI.MatchRegex.push_back(Entry); + } + + return CSI; +} + +llvm::Error CallSiteInfoCollection::encode(FileWriter &O) const { + O.writeU32(CallSites.size()); + for (const CallSiteInfo &CSI : CallSites) { + if (llvm::Error Err = CSI.encode(O)) + return Err; + } + return llvm::Error::success(); +} + +llvm::Expected +CallSiteInfoCollection::decode(DataExtractor &Data, uint64_t BaseAddr) { + CallSiteInfoCollection CSC; + uint64_t Offset = 0; + + // Read number of CallSiteInfo entries + if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint32_t))) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing CallSiteInfo count", + Offset); + uint32_t NumCallSites = Data.getU32(&Offset); + + CSC.CallSites.reserve(NumCallSites); + for (uint32_t i = 0; i < NumCallSites; ++i) { + llvm::Expected ECSI = + CallSiteInfo::decode(Data, Offset, BaseAddr); + if (!ECSI) + return ECSI.takeError(); + CSC.CallSites.emplace_back(*ECSI); + } + + return CSC; +} + +/// Structures necessary for reading CallSiteInfo from YAML. +namespace llvm { +namespace yaml { + +struct CallSiteYAML { + // The offset of the return address of the call site - relative to the start + // of the function. + llvm::yaml::Hex64 return_offset; + std::vector match_regex; + std::vector flags; +}; + +struct FunctionYAML { + std::string name; + std::vector callsites; +}; + +struct FunctionsYAML { + std::vector functions; +}; + +template <> struct MappingTraits { + static void mapping(IO &io, CallSiteYAML &callsite) { + io.mapRequired("return_offset", callsite.return_offset); + io.mapRequired("match_regex", callsite.match_regex); + io.mapOptional("flags", callsite.flags); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &io, FunctionYAML &func) { + io.mapRequired("name", func.name); + io.mapOptional("callsites", func.callsites); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &io, FunctionsYAML &functionsYAML) { + io.mapRequired("functions", functionsYAML.functions); + } +}; + +} // namespace yaml +} // namespace llvm + +LLVM_YAML_IS_SEQUENCE_VECTOR(CallSiteYAML) +LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionYAML) + +// Implementation of CallSiteInfoLoader +StringRef CallSiteInfoLoader::stringFromOffset(uint32_t offset) const { + assert(StringOffsetMap.count(offset) && + "expected function name offset to already be in StringOffsetMap"); + return StringOffsetMap.find(offset)->second.val(); +} + +uint32_t CallSiteInfoLoader::offsetFromString(StringRef str) { + return StrTab.add(StringStorage.insert(str).first->getKey()); +} + +llvm::Error CallSiteInfoLoader::loadYAML(std::vector &Funcs, + StringRef YAMLFile) { + std::unique_ptr Buffer; + // Step 1: Read YAML file + if (auto Err = readYAMLFile(YAMLFile, Buffer)) + return Err; + + // Step 2: Parse YAML content + llvm::yaml::FunctionsYAML functionsYAML; + if (auto Err = parseYAML(*Buffer, functionsYAML)) + return Err; + + // Step 3: Build function map from Funcs + auto FuncMap = buildFunctionMap(Funcs); + + // Step 4: Process parsed YAML functions and update FuncMap + return processYAMLFunctions(functionsYAML, FuncMap, YAMLFile); +} + +llvm::Error +CallSiteInfoLoader::readYAMLFile(StringRef YAMLFile, + std::unique_ptr &Buffer) { + auto BufferOrError = llvm::MemoryBuffer::getFile(YAMLFile); + if (!BufferOrError) + return errorCodeToError(BufferOrError.getError()); + Buffer = std::move(*BufferOrError); + return llvm::Error::success(); +} + +llvm::Error +CallSiteInfoLoader::parseYAML(llvm::MemoryBuffer &Buffer, + llvm::yaml::FunctionsYAML &functionsYAML) { + // Use the MemoryBufferRef constructor + llvm::yaml::Input yin(Buffer.getMemBufferRef()); + yin >> functionsYAML; + if (yin.error()) { + return llvm::createStringError(yin.error(), "Error parsing YAML file: %s\n", + Buffer.getBufferIdentifier().str().c_str()); + } + return llvm::Error::success(); +} + +std::unordered_map +CallSiteInfoLoader::buildFunctionMap(std::vector &Funcs) { + std::unordered_map FuncMap; + auto insertFunc = [&](auto &Function) { + std::string FuncName = stringFromOffset(Function.Name).str(); + // If the function name is already in the map, don't update it. This way we + // preferentially use the first encountered function. Since symbols are + // loaded from dSYM first, we end up preferring keeping track of symbols + // from dSYM rather than from the symbol table - which is what we want to + // do. + if (FuncMap.count(FuncName)) + return; + FuncMap[FuncName] = &Function; + }; + for (auto &Func : Funcs) { + insertFunc(Func); + if (Func.MergedFunctions.has_value()) + for (auto &MFunc : Func.MergedFunctions->MergedFunctions) + insertFunc(MFunc); + } + return FuncMap; +} + +llvm::Error CallSiteInfoLoader::processYAMLFunctions( + const llvm::yaml::FunctionsYAML &functionsYAML, + std::unordered_map &FuncMap, + StringRef YAMLFile) { + // For each function in the YAML file + for (const auto &FuncYAML : functionsYAML.functions) { + auto it = FuncMap.find(FuncYAML.name); + if (it == FuncMap.end()) { + return llvm::createStringError( + std::errc::invalid_argument, + "Can't find function '%s' specified in callsite YAML\n", + FuncYAML.name.c_str()); + } + FunctionInfo *FuncInfo = it->second; + // Create a CallSiteInfoCollection if not already present + if (!FuncInfo->CallSites) + FuncInfo->CallSites = CallSiteInfoCollection(); + for (const auto &CallSiteYAML : FuncYAML.callsites) { + CallSiteInfo CSI; + // Since YAML has specifies relative return offsets, add the function + // start address to make the offset absolute. + CSI.ReturnAddress = FuncInfo->Range.start() + CallSiteYAML.return_offset; + for (const auto ®ex : CallSiteYAML.match_regex) { + CSI.MatchRegex.push_back(offsetFromString(regex)); + } + // Initialize flags to None + CSI.Flags = CallSiteInfo::None; + // Parse flags and combine them + for (const auto &FlagStr : CallSiteYAML.flags) { + if (FlagStr == "InternalCall") { + CSI.Flags |= static_cast(CallSiteInfo::InternalCall); + } else if (FlagStr == "ExternalCall") { + CSI.Flags |= static_cast(CallSiteInfo::ExternalCall); + } else { + return llvm::createStringError(std::errc::invalid_argument, + "Unknown flag in callsite YAML: %s\n", + FlagStr.c_str()); + } + } + FuncInfo->CallSites->CallSites.push_back(CSI); + } + } + return llvm::Error::success(); +} + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const CallSiteInfo &CSI) { + OS << " Return=" << HEX64(CSI.ReturnAddress); + OS << " Flags=" << HEX8(CSI.Flags); + + OS << " RegEx="; + for (uint32_t i = 0; i < CSI.MatchRegex.size(); ++i) { + if (i > 0) + OS << ","; + OS << CSI.MatchRegex[i]; + } + return OS; +} + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, + const CallSiteInfoCollection &CSIC) { + for (const auto &CS : CSIC.CallSites) { + OS << CS; + OS << "\n"; + } + return OS; +} diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp index 2cd85ef2398f9..9dc9c241168b2 100644 --- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -24,6 +24,7 @@ enum InfoType : uint32_t { LineTableInfo = 1u, InlineInfo = 2u, MergedFunctionsInfo = 3u, + CallSiteInfo = 4u, }; raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) { @@ -32,6 +33,8 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) { OS << FI.OptLineTable << '\n'; if (FI.Inline) OS << FI.Inline << '\n'; + if (FI.CallSites) + OS << *FI.CallSites << '\n'; return OS; } @@ -95,6 +98,14 @@ llvm::Expected FunctionInfo::decode(DataExtractor &Data, return MI.takeError(); break; + case InfoType::CallSiteInfo: + if (Expected CI = + llvm::gsym::CallSiteInfoCollection::decode(InfoData, BaseAddr)) + FI.CallSites = std::move(CI.get()); + else + return CI.takeError(); + break; + default: return createStringError(std::errc::io_error, "0x%8.8" PRIx64 ": unsupported InfoType %u", @@ -200,7 +211,25 @@ llvm::Expected FunctionInfo::encode(FileWriter &Out, Out.fixup32(static_cast(Length), StartOffset - 4); } - // Terminate the data chunks with and end of list with zero size + // Write out the call sites if we have any and if they are valid. + if (CallSites) { + Out.writeU32(InfoType::CallSiteInfo); + // Write a uint32_t length as zero for now, we will fix this up after + // writing the CallSites out with the number of bytes that were written. + Out.writeU32(0); + const auto StartOffset = Out.tell(); + llvm::Error err = CallSites->encode(Out); + if (err) + return std::move(err); + const auto Length = Out.tell() - StartOffset; + if (Length > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "CallSites length is greater than UINT32_MAX"); + // Fixup the size of the CallSites data with the correct size. + Out.fixup32(static_cast(Length), StartOffset - 4); + } + + // Terminate the data chunks with an end of list with zero size. Out.writeU32(InfoType::EndOfList); Out.writeU32(0); return FuncInfoOffset; diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp index 3227fa5400fb5..0df84ee256aef 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -189,6 +189,12 @@ llvm::Error GsymCreator::encode(FileWriter &O) const { return ErrorSuccess(); } +llvm::Error GsymCreator::loadCallSitesFromYAML(StringRef YAMLFile) { + // Use the loader to load call site information from the YAML file. + CallSiteInfoLoader Loader(StringOffsetMap, StrTab, StringStorage); + return Loader.loadYAML(Funcs, YAMLFile); +} + void GsymCreator::prepareMergedFunctions(OutputAggregator &Out) { // Nothing to do if we have less than 2 functions. if (Funcs.size() < 2) diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp index ddfc92e1a8a40..4f645714480e6 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp @@ -411,6 +411,10 @@ void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI, assert(Indent == 0 && "MergedFunctionsInfo should only exist at top level"); dump(OS, *FI.MergedFunctions); } + + if (FI.CallSites) { + dump(OS, *FI.CallSites); + } } void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) { @@ -420,6 +424,47 @@ void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) { } } +void GsymReader::dump(raw_ostream &OS, const CallSiteInfo &CSI) { + OS << HEX64(CSI.ReturnAddress); + + std::string Flags; + auto addFlag = [&](const char *Flag) { + if (!Flags.empty()) + Flags += " | "; + Flags += Flag; + }; + + if (CSI.Flags == CallSiteInfo::Flags::None) + Flags = "None"; + else { + if (CSI.Flags & CallSiteInfo::Flags::InternalCall) + addFlag("InternalCall"); + + if (CSI.Flags & CallSiteInfo::Flags::ExternalCall) + addFlag("ExternalCall"); + } + OS << " Flags[" << Flags << "]"; + + if (!CSI.MatchRegex.empty()) { + OS << " MatchRegex["; + for (uint32_t i = 0; i < CSI.MatchRegex.size(); ++i) { + if (i > 0) + OS << ";"; + OS << getString(CSI.MatchRegex[i]); + } + OS << "]"; + } +} + +void GsymReader::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC) { + OS << "CallSites (by return address):\n"; + for (const auto &CS : CSIC.CallSites) { + OS.indent(2); + dump(OS, CS); + OS << "\n"; + } +} + void GsymReader::dump(raw_ostream &OS, const LineTable <, uint32_t Indent) { OS.indent(Indent); OS << "LineTable:\n"; diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml new file mode 100644 index 0000000000000..5c31d60962669 --- /dev/null +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml @@ -0,0 +1,950 @@ +## Test that reconstructs a dSYM file from YAML and generates a callsite-enabled gsym from it - and then verifies the gsym. +## See llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info.test for the script to generate this yaml file + +# RUN: split-file %s %t +# RUN: yaml2obj %t/call_sites.dSYM.yaml -o %t/call_sites.dSYM + +# RUN: llvm-gsymutil --convert=%t/call_sites.dSYM --callsites-from-yaml=%t/callsites.yaml -o %t/call_sites_dSYM.gsym + +# Dump the GSYM file and check the output for callsite information +# RUN: llvm-gsymutil %t/call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-GSYM %s + + +# CHECK-GSYM: FunctionInfo @ 0x[[#%x,]]: [0x[[#%x,]] - 0x[[#%x,]]) "func_mainBin_dec_call_everything" +# CHECK-GSYM-NEXT: LineTable: +# // func_mainBin_dec_call_everything() { +# CHECK-GSYM-NEXT: 0x[[#%x,]] {{.*}}/call_sites.cpp:16 +# // func_mainBin_dec_01(); +# CHECK-GSYM-NEXT: 0x[[ADDR_dec_01_call:[0-9a-f]+]] {{.*}}/call_sites.cpp:17 +# // func_mainBin_dec_02(); +# CHECK-GSYM-NEXT: 0x[[ADDR_dec_02_call:[0-9a-f]+]] {{.*}}/call_sites.cpp:18 +# // func_mainBin_dec_03(); +# CHECK-GSYM-NEXT: [[ADDR_dec_03_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:19 +# // func_mainBin_inc_01(); +# CHECK-GSYM-NEXT: [[ADDR_inc_01_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:21 +# // func_mainBin_inc_02(); +# CHECK-GSYM-NEXT: [[ADDR_inc_02_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:22 +# // func_mainBin_inc_03(); +# CHECK-GSYM-NEXT: [[ADDR_inc_03_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:23 +# // g_func_ptr(); +# CHECK-GSYM-NEXT: [[ADDR_func_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:25 +# // g_extern_func_ptr(); +# CHECK-GSYM-NEXT: [[ADDR_extern_func_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:26 +# // g_volatile_var = 0; +# CHECK-GSYM-NEXT: [[ADDR_var_assign:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:28 +# // } +# CHECK-GSYM-NEXT: [[#%x,]] {{.*}}/call_sites.cpp:29 +# CHECK-GSYM-NEXT: CallSites (by return address): +# CHECK-GSYM-NEXT: [[ADDR_dec_02_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_01] +# CHECK-GSYM-NEXT: [[ADDR_dec_03_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_02] +# CHECK-GSYM-NEXT: [[ADDR_inc_01_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_03] +# CHECK-GSYM-NEXT: [[ADDR_inc_02_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_01] +# CHECK-GSYM-NEXT: [[ADDR_inc_03_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_02] +# CHECK-GSYM-NEXT: [[ADDR_func_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_03] +# CHECK-GSYM-NEXT: [[ADDR_extern_func_call]] Flags[None] MatchRegex[.*func.*] +# CHECK-GSYM-NEXT: [[ADDR_var_assign]] Flags[ExternalCall] MatchRegex[.*extern_func.*] + + +#--- callsites.yaml +functions: + - name: func_mainBin_dec_call_everything + callsites: + - return_offset: 0x0C + match_regex: ["func_mainBin_dec_01"] + flags: + - "InternalCall" + - return_offset: 0x10 + match_regex: ["func_mainBin_dec_02"] + flags: + - "InternalCall" + - return_offset: 0x14 + match_regex: ["func_mainBin_dec_03"] + flags: + - "InternalCall" + - return_offset: 24 + match_regex: ["func_mainBin_inc_01"] + flags: + - "InternalCall" + - return_offset: 28 + match_regex: ["func_mainBin_inc_02"] + flags: + - "InternalCall" + - return_offset: 32 + match_regex: ["func_mainBin_inc_03"] + flags: + - "InternalCall" + - return_offset: 44 + match_regex: [".*func.*"] + - return_offset: 56 + match_regex: [".*extern_func.*"] + flags: + - "ExternalCall" + + +#--- call_sites.dSYM.yaml +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x100000C + cpusubtype: 0x0 + filetype: 0xA + ncmds: 8 + sizeofcmds: 1392 + flags: 0x0 + reserved: 0x0 +LoadCommands: + - cmd: LC_UUID + cmdsize: 24 + uuid: 4C4C44E9-5555-3144-A1D3-328233D00078 + - cmd: LC_BUILD_VERSION + cmdsize: 24 + platform: 1 + minos: 720896 + sdk: 720896 + ntools: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 4096 + nsyms: 12 + stroff: 4288 + strsize: 235 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 16384 + fileoff: 0 + filesize: 0 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100000338 + size: 216 + offset: 0x0 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: CFFAEDFE0C000001000000000A000000080000007005000000000000000000001B000000180000004C4C44E955553144A1D3328233D0007832000000180000000100000000000B0000000B00000000000200000018000000001000000C000000C0100000EB00000019000000480000005F5F504147455A45524F00000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000019000000980000005F5F544558540000000000000000000000000000010000000040000000000000 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DATA + vmaddr: 4294983680 + vmsize: 16384 + fileoff: 0 + filesize: 0 + maxprot: 3 + initprot: 3 + nsects: 1 + flags: 0 + Sections: + - sectname: __common + segname: __DATA + addr: 0x100004000 + size: 24 + offset: 0x0 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x1 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4295000064 + vmsize: 4096 + fileoff: 4096 + filesize: 427 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 872 + segname: __DWARF + vmaddr: 4295004160 + vmsize: 4096 + fileoff: 8192 + filesize: 1894 + maxprot: 7 + initprot: 3 + nsects: 10 + flags: 0 + Sections: + - sectname: __debug_line + segname: __DWARF + addr: 0x100009000 + size: 150 + offset: 0x2000 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + - sectname: __debug_aranges + segname: __DWARF + addr: 0x100009096 + size: 48 + offset: 0x2096 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + - sectname: __debug_info + segname: __DWARF + addr: 0x1000090C6 + size: 424 + offset: 0x20C6 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + - sectname: __debug_frame + segname: __DWARF + addr: 0x10000926E + size: 232 + offset: 0x226E + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 14000000FFFFFFFF0400080001781E0C1F0000000000000014000000000000003803000001000000140000000000000014000000000000004C03000001000000140000000000000014000000000000006003000001000000140000000000000014000000000000007403000001000000140000000000000014000000000000008803000001000000140000000000000014000000000000009C0300000100000014000000000000001C00000000000000B0030000010000004800000000000000480C1D109E019D021C00000000000000F8030000010000001800000000000000480C1D109E019D02 + - sectname: __debug_abbrev + segname: __DWARF + addr: 0x100009356 + size: 171 + offset: 0x2356 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + - sectname: __debug_str + segname: __DWARF + addr: 0x100009401 + size: 378 + offset: 0x2401 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + - sectname: __apple_namespac + segname: __DWARF + addr: 0x10000957B + size: 36 + offset: 0x257B + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 485341480100000001000000000000000C000000000000000100000001000600FFFFFFFF + - sectname: __apple_names + segname: __DWARF + addr: 0x10000959F + size: 340 + offset: 0x259F + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 48534148010000000B0000000B0000000C000000000000000100000001000600FFFFFFFF00000000FFFFFFFFFFFFFFFFFFFFFFFF01000000FFFFFFFF0300000004000000060000000900000073A83B36215E623FACBB81686A7F9A7C1939EE6AC7E03A771A39EE6AC8E03A773856D6801B39EE6AC9E03A77A4000000B4000000C4000000D4000000E4000000F40000000401000014010000240100003401000044010000AC000000010000002E00000000000000CA000000010000007300000000000000BF000000010000004F0000000000000075010000010000008001000000000000DC0000000100000088000000000000001801000001000000C700000000000000F0000000010000009D000000000000002C01000001000000DC00000000000000540100000100000006010000000000000401000001000000B2000000000000004001000001000000F100000000000000 + - sectname: __apple_types + segname: __DWARF + addr: 0x1000096F3 + size: 79 + offset: 0x26F3 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 48534148010000000100000001000000180000000000000004000000010006000300050005000B0006000600000000003080880B38000000BB0000000100000048000000240000A4283A0C00000000 + - sectname: __apple_objc + segname: __DWARF + addr: 0x100009742 + size: 36 + offset: 0x2742 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 485341480100000001000000000000000C000000000000000100000001000600FFFFFFFF +LinkEditData: + NameList: + - n_strx: 2 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968312 + - n_strx: 8 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968120 + - n_strx: 29 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968140 + - n_strx: 50 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968160 + - n_strx: 71 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968180 + - n_strx: 92 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968200 + - n_strx: 113 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968220 + - n_strx: 134 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968240 + - n_strx: 168 + n_type: 0xF + n_sect: 2 + n_desc: 0 + n_value: 4294983680 + - n_strx: 184 + n_type: 0xF + n_sect: 2 + n_desc: 0 + n_value: 4294983688 + - n_strx: 196 + n_type: 0xF + n_sect: 2 + n_desc: 0 + n_value: 4294983696 + - n_strx: 215 + n_type: 0xF + n_sect: 1 + n_desc: 16 + n_value: 4294967296 + StringTable: + - '' + - '' + - _main + - _func_mainBin_dec_01 + - _func_mainBin_dec_02 + - _func_mainBin_dec_03 + - _func_mainBin_inc_01 + - _func_mainBin_inc_02 + - _func_mainBin_inc_03 + - _func_mainBin_dec_call_everything + - _g_volatile_var + - _g_func_ptr + - _g_extern_func_ptr + - __mh_execute_header +DWARF: + debug_str: + - '' + - 'clang version 20.0.0git (https://github.com/alx32/llvm-project.git f41f6ea1f33c4f5e7c94f3d155e44292d1809c50)' + - call_sites.cpp + - '/' + - '/tmp/___________________________________/tst' + - g_volatile_var + - int + - g_func_ptr + - g_extern_func_ptr + - func_mainBin_dec_01 + - func_mainBin_dec_02 + - func_mainBin_dec_03 + - func_mainBin_inc_01 + - func_mainBin_inc_02 + - func_mainBin_inc_03 + - func_mainBin_dec_call_everything + - main + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_producer + Form: DW_FORM_strp + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_LLVM_sysroot + Form: DW_FORM_strp + - Attribute: DW_AT_stmt_list + Form: DW_FORM_sec_offset + - Attribute: DW_AT_comp_dir + Form: DW_FORM_strp + - Attribute: DW_AT_APPLE_optimized + Form: DW_FORM_flag_present + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Code: 0x2 + Tag: DW_TAG_variable + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_type + Form: DW_FORM_ref_addr + - Attribute: DW_AT_external + Form: DW_FORM_flag_present + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Attribute: DW_AT_location + Form: DW_FORM_exprloc + - Code: 0x3 + Tag: DW_TAG_volatile_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_type + Form: DW_FORM_ref_addr + - Code: 0x4 + Tag: DW_TAG_base_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_encoding + Form: DW_FORM_data1 + - Attribute: DW_AT_byte_size + Form: DW_FORM_data1 + - Code: 0x5 + Tag: DW_TAG_pointer_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_type + Form: DW_FORM_ref_addr + - Code: 0x6 + Tag: DW_TAG_subroutine_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_type + Form: DW_FORM_ref_addr + - Code: 0x7 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_APPLE_omit_frame_ptr + Form: DW_FORM_flag_present + - Attribute: DW_AT_frame_base + Form: DW_FORM_exprloc + - Attribute: DW_AT_call_all_calls + Form: DW_FORM_flag_present + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Attribute: DW_AT_external + Form: DW_FORM_flag_present + - Attribute: DW_AT_APPLE_optimized + Form: DW_FORM_flag_present + - Code: 0x8 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_frame_base + Form: DW_FORM_exprloc + - Attribute: DW_AT_call_all_calls + Form: DW_FORM_flag_present + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Attribute: DW_AT_external + Form: DW_FORM_flag_present + - Attribute: DW_AT_APPLE_optimized + Form: DW_FORM_flag_present + - Code: 0x9 + Tag: DW_TAG_call_site + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_call_origin + Form: DW_FORM_ref4 + - Attribute: DW_AT_call_return_pc + Form: DW_FORM_addr + - Code: 0xA + Tag: DW_TAG_call_site + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_call_target + Form: DW_FORM_exprloc + - Attribute: DW_AT_call_return_pc + Form: DW_FORM_addr + - Code: 0xB + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data4 + - Attribute: DW_AT_frame_base + Form: DW_FORM_exprloc + - Attribute: DW_AT_call_all_calls + Form: DW_FORM_flag_present + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_decl_file + Form: DW_FORM_data1 + - Attribute: DW_AT_decl_line + Form: DW_FORM_data1 + - Attribute: DW_AT_type + Form: DW_FORM_ref_addr + - Attribute: DW_AT_external + Form: DW_FORM_flag_present + - Attribute: DW_AT_APPLE_optimized + Form: DW_FORM_flag_present + debug_aranges: + - Length: 0x2C + Version: 2 + CuOffset: 0x0 + AddressSize: 0x8 + Descriptors: + - Address: 0x100000338 + Length: 0xD8 + debug_info: + - Length: 0x1A4 + Version: 4 + AbbrevTableID: 0 + AbbrOffset: 0x0 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + Values: + - Value: 0x1 + - Value: 0x21 + - Value: 0x6E + - Value: 0x7D + - Value: 0x0 + - Value: 0x7F + - Value: 0x1 + - Value: 0x100000338 + - Value: 0xD8 + - AbbrCode: 0x2 + Values: + - Value: 0xAC + - Value: 0x43 + - Value: 0x1 + - Value: 0x1 + - Value: 0x4 + - Value: 0x9 + BlockData: [ 0x3, 0x0, 0x40, 0x0, 0x0, 0x1, 0x0, 0x0, + 0x0 ] + - AbbrCode: 0x3 + Values: + - Value: 0x48 + - AbbrCode: 0x4 + Values: + - Value: 0xBB + - Value: 0x5 + - Value: 0x4 + - AbbrCode: 0x2 + Values: + - Value: 0xBF + - Value: 0x64 + - Value: 0x1 + - Value: 0x1 + - Value: 0x5 + - Value: 0x9 + BlockData: [ 0x3, 0x8, 0x40, 0x0, 0x0, 0x1, 0x0, 0x0, + 0x0 ] + - AbbrCode: 0x3 + Values: + - Value: 0x69 + - AbbrCode: 0x5 + Values: + - Value: 0x6E + - AbbrCode: 0x6 + Values: + - Value: 0x48 + - AbbrCode: 0x2 + Values: + - Value: 0xCA + - Value: 0x64 + - Value: 0x1 + - Value: 0x1 + - Value: 0x6 + - Value: 0x9 + BlockData: [ 0x3, 0x10, 0x40, 0x0, 0x0, 0x1, 0x0, 0x0, + 0x0 ] + - AbbrCode: 0x7 + Values: + - Value: 0x100000338 + - Value: 0x14 + - Value: 0x1 + - Value: 0x1 + BlockData: [ 0x6F ] + - Value: 0x1 + - Value: 0xDC + - Value: 0x1 + - Value: 0x8 + - Value: 0x1 + - Value: 0x1 + - AbbrCode: 0x7 + Values: + - Value: 0x10000034C + - Value: 0x14 + - Value: 0x1 + - Value: 0x1 + BlockData: [ 0x6F ] + - Value: 0x1 + - Value: 0xF0 + - Value: 0x1 + - Value: 0x9 + - Value: 0x1 + - Value: 0x1 + - AbbrCode: 0x7 + Values: + - Value: 0x100000360 + - Value: 0x14 + - Value: 0x1 + - Value: 0x1 + BlockData: [ 0x6F ] + - Value: 0x1 + - Value: 0x104 + - Value: 0x1 + - Value: 0xA + - Value: 0x1 + - Value: 0x1 + - AbbrCode: 0x7 + Values: + - Value: 0x100000374 + - Value: 0x14 + - Value: 0x1 + - Value: 0x1 + BlockData: [ 0x6F ] + - Value: 0x1 + - Value: 0x118 + - Value: 0x1 + - Value: 0xC + - Value: 0x1 + - Value: 0x1 + - AbbrCode: 0x7 + Values: + - Value: 0x100000388 + - Value: 0x14 + - Value: 0x1 + - Value: 0x1 + BlockData: [ 0x6F ] + - Value: 0x1 + - Value: 0x12C + - Value: 0x1 + - Value: 0xD + - Value: 0x1 + - Value: 0x1 + - AbbrCode: 0x7 + Values: + - Value: 0x10000039C + - Value: 0x14 + - Value: 0x1 + - Value: 0x1 + BlockData: [ 0x6F ] + - Value: 0x1 + - Value: 0x140 + - Value: 0x1 + - Value: 0xE + - Value: 0x1 + - Value: 0x1 + - AbbrCode: 0x8 + Values: + - Value: 0x1000003B0 + - Value: 0x48 + - Value: 0x1 + BlockData: [ 0x6D ] + - Value: 0x1 + - Value: 0x154 + - Value: 0x1 + - Value: 0x10 + - Value: 0x1 + - Value: 0x1 + - AbbrCode: 0x9 + Values: + - Value: 0x88 + - Value: 0x1000003BC + - AbbrCode: 0x9 + Values: + - Value: 0x9D + - Value: 0x1000003C0 + - AbbrCode: 0x9 + Values: + - Value: 0xB2 + - Value: 0x1000003C4 + - AbbrCode: 0x9 + Values: + - Value: 0xC7 + - Value: 0x1000003C8 + - AbbrCode: 0x9 + Values: + - Value: 0xDC + - Value: 0x1000003CC + - AbbrCode: 0x9 + Values: + - Value: 0xF1 + - Value: 0x1000003D0 + - AbbrCode: 0xA + Values: + - Value: 0x1 + BlockData: [ 0x58 ] + - Value: 0x1000003DC + - AbbrCode: 0xA + Values: + - Value: 0x1 + BlockData: [ 0x58 ] + - Value: 0x1000003E8 + - AbbrCode: 0x0 + - AbbrCode: 0xB + Values: + - Value: 0x1000003F8 + - Value: 0x18 + - Value: 0x1 + BlockData: [ 0x6D ] + - Value: 0x1 + - Value: 0x175 + - Value: 0x1 + - Value: 0x1F + - Value: 0x48 + - Value: 0x1 + - Value: 0x1 + - AbbrCode: 0x9 + Values: + - Value: 0x106 + - Value: 0x100000404 + - AbbrCode: 0x0 + - AbbrCode: 0x0 + debug_line: + - Length: 146 + Version: 4 + PrologueLength: 38 + MinInstLength: 1 + MaxOpsPerInst: 1 + DefaultIsStmt: 1 + LineBase: 251 + LineRange: 14 + OpcodeBase: 13 + StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ] + Files: + - Name: call_sites.cpp + DirIdx: 0 + ModTime: 0 + Length: 0 + Opcodes: + - Opcode: DW_LNS_extended_op + ExtLen: 9 + SubOpcode: DW_LNE_set_address + Data: 4294968120 + - Opcode: DW_LNS_set_column + Data: 54 + - Opcode: DW_LNS_set_prologue_end + Data: 0 + - Opcode: 0x19 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 58 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: 0xF2 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 54 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: DW_LNS_set_prologue_end + Data: 0 + - Opcode: 0x4B + Data: 0 + - Opcode: DW_LNS_set_column + Data: 58 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: 0xF2 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 54 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: DW_LNS_set_prologue_end + Data: 0 + - Opcode: 0x4B + Data: 0 + - Opcode: DW_LNS_set_column + Data: 58 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: 0xF2 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 54 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: DW_LNS_set_prologue_end + Data: 0 + - Opcode: 0x4C + Data: 0 + - Opcode: DW_LNS_set_column + Data: 58 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: 0xF2 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 54 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: DW_LNS_set_prologue_end + Data: 0 + - Opcode: 0x4B + Data: 0 + - Opcode: DW_LNS_set_column + Data: 58 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: 0xF2 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 54 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: DW_LNS_set_prologue_end + Data: 0 + - Opcode: 0x4B + Data: 0 + - Opcode: DW_LNS_set_column + Data: 58 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: 0xF2 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 0 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: 0x4C + Data: 0 + - Opcode: DW_LNS_set_column + Data: 5 + - Opcode: DW_LNS_set_prologue_end + Data: 0 + - Opcode: 0x83 + Data: 0 + - Opcode: 0x4B + Data: 0 + - Opcode: 0x4B + Data: 0 + - Opcode: 0x4C + Data: 0 + - Opcode: 0x4B + Data: 0 + - Opcode: 0x4B + Data: 0 + - Opcode: 0x4C + Data: 0 + - Opcode: 0xBB + Data: 0 + - Opcode: DW_LNS_set_column + Data: 20 + - Opcode: 0xBC + Data: 0 + - Opcode: DW_LNS_set_column + Data: 1 + - Opcode: DW_LNS_set_epilogue_begin + Data: 0 + - Opcode: 0x83 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 0 + - Opcode: 0x84 + Data: 0 + - Opcode: DW_LNS_set_column + Data: 3 + - Opcode: DW_LNS_set_prologue_end + Data: 0 + - Opcode: 0x83 + Data: 0 + - Opcode: 0x4B + Data: 0 + - Opcode: DW_LNS_negate_stmt + Data: 0 + - Opcode: DW_LNS_set_epilogue_begin + Data: 0 + - Opcode: 0x4A + Data: 0 + - Opcode: DW_LNS_advance_pc + Data: 8 + - Opcode: DW_LNS_extended_op + ExtLen: 1 + SubOpcode: DW_LNE_end_sequence + Data: 0 +... diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml new file mode 100644 index 0000000000000..b454a9e14699a --- /dev/null +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml @@ -0,0 +1,558 @@ +## Test that reconstructs a MachO binary from YAML and generates a callsite-enabled gsym from it - and then verifies the gsym. +## See llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info.test for the script to generate this yaml file + +# RUN: split-file %s %t +# RUN: yaml2obj %t/call_sites.exe.yaml -o %t/call_sites.exe + +# RUN: llvm-gsymutil --convert=%t/call_sites.exe --callsites-from-yaml=%t/callsites.yaml -o %t/call_sites_exe.gsym + +# Dump the GSYM file and check the output for callsite information +# RUN: llvm-gsymutil %t/call_sites_exe.gsym | FileCheck --check-prefix=CHECK-GSYM %s + + +# CHECK-GSYM: FunctionInfo @ 0x[[#%x,]]: [0x[[#%x,]] - 0x[[#%x,]]) "func_mainBin_dec_call_everything" +# CHECK-GSYM-NEXT: CallSites (by return address): +# CHECK-GSYM-NEXT: 0x[[#%x,]] Flags[InternalCall] MatchRegex[func_mainBin_dec_01] +# CHECK-GSYM-NEXT: 0x[[#%x,]] Flags[InternalCall] MatchRegex[func_mainBin_dec_02] +# CHECK-GSYM-NEXT: 0x[[#%x,]] Flags[InternalCall] MatchRegex[func_mainBin_dec_03] +# CHECK-GSYM-NEXT: 0x[[#%x,]] Flags[InternalCall] MatchRegex[func_mainBin_inc_01] +# CHECK-GSYM-NEXT: 0x[[#%x,]] Flags[InternalCall] MatchRegex[func_mainBin_inc_02] +# CHECK-GSYM-NEXT: 0x[[#%x,]] Flags[InternalCall] MatchRegex[func_mainBin_inc_03] +# CHECK-GSYM-NEXT: 0x[[#%x,]] Flags[None] MatchRegex[.*func.*] +# CHECK-GSYM-NEXT: 0x[[#%x,]] Flags[ExternalCall] MatchRegex[.*extern_func.*] + + +#--- callsites.yaml +functions: + - name: func_mainBin_dec_call_everything + callsites: + - return_offset: 0x0C + match_regex: ["func_mainBin_dec_01"] + flags: + - "InternalCall" + - return_offset: 0x10 + match_regex: ["func_mainBin_dec_02"] + flags: + - "InternalCall" + - return_offset: 0x14 + match_regex: ["func_mainBin_dec_03"] + flags: + - "InternalCall" + - return_offset: 24 + match_regex: ["func_mainBin_inc_01"] + flags: + - "InternalCall" + - return_offset: 28 + match_regex: ["func_mainBin_inc_02"] + flags: + - "InternalCall" + - return_offset: 32 + match_regex: ["func_mainBin_inc_03"] + flags: + - "InternalCall" + - return_offset: 44 + match_regex: [".*func.*"] + - return_offset: 56 + match_regex: [".*extern_func.*"] + flags: + - "ExternalCall" + + +#--- call_sites.exe.yaml +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x100000C + cpusubtype: 0x0 + filetype: 0x2 + ncmds: 14 + sizeofcmds: 760 + flags: 0x200085 + reserved: 0x0 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 16384 + fileoff: 0 + filesize: 16384 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100000338 + size: 216 + offset: 0x338 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 28000090090140B929050051090100B9C0035FD628000090090140B929050051090100B9C0035FD628000090090140B929050051090100B9C0035FD628000090090140B929050011090100B9C0035FD628000090090140B929050011090100B9C0035FD628000090090140B929050011090100B9C0035FD6FD7BBFA9FD030091E0FFFF97E4FFFF97E8FFFF97ECFFFF97F0FFFF97F4FFFF971F2003D5A8E1015800013FD61F2003D588E1015800013FD6280000901F0100B9FD7BC1A8C0035FD6FD7BBFA9FD030091ECFFFF9700008052FD7BC1A8C0035FD6 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DATA + vmaddr: 4294983680 + vmsize: 16384 + fileoff: 16384 + filesize: 0 + maxprot: 3 + initprot: 3 + nsects: 1 + flags: 0 + Sections: + - sectname: __common + segname: __DATA + addr: 0x100004000 + size: 24 + offset: 0x0 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x1 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4295000064 + vmsize: 1648 + fileoff: 16384 + filesize: 1648 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_DYLD_INFO_ONLY + cmdsize: 48 + rebase_off: 0 + rebase_size: 0 + bind_off: 0 + bind_size: 0 + weak_bind_off: 0 + weak_bind_size: 0 + lazy_bind_off: 0 + lazy_bind_size: 0 + export_off: 16384 + export_size: 232 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 16632 + nsyms: 34 + stroff: 17176 + strsize: 568 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 22 + iextdefsym: 22 + nextdefsym: 12 + iundefsym: 34 + nundefsym: 0 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_LOAD_DYLINKER + cmdsize: 32 + name: 12 + Content: '/usr/lib/dyld' + ZeroPadBytes: 7 + - cmd: LC_UUID + cmdsize: 24 + uuid: 4C4C44E9-5555-3144-A1D3-328233D00078 + - cmd: LC_BUILD_VERSION + cmdsize: 32 + platform: 1 + minos: 720896 + sdk: 720896 + ntools: 1 + Tools: + - tool: 4 + version: 1310720 + - cmd: LC_MAIN + cmdsize: 24 + entryoff: 1016 + stacksize: 0 + - cmd: LC_FUNCTION_STARTS + cmdsize: 16 + dataoff: 16616 + datasize: 16 + - cmd: LC_DATA_IN_CODE + cmdsize: 16 + dataoff: 16632 + datasize: 0 + - cmd: LC_CODE_SIGNATURE + cmdsize: 16 + dataoff: 17744 + datasize: 288 +LinkEditData: + ExportTrie: + TerminalSize: 0 + NodeOffset: 0 + Name: '' + Flags: 0x0 + Address: 0x0 + Other: 0x0 + ImportName: '' + Children: + - TerminalSize: 0 + NodeOffset: 5 + Name: _ + Flags: 0x0 + Address: 0x0 + Other: 0x0 + ImportName: '' + Children: + - TerminalSize: 2 + NodeOffset: 52 + Name: _mh_execute_header + Flags: 0x0 + Address: 0x0 + Other: 0x0 + ImportName: '' + - TerminalSize: 3 + NodeOffset: 56 + Name: main + Flags: 0x0 + Address: 0x3F8 + Other: 0x0 + ImportName: '' + - TerminalSize: 0 + NodeOffset: 61 + Name: g_ + Flags: 0x0 + Address: 0x0 + Other: 0x0 + ImportName: '' + Children: + - TerminalSize: 4 + NodeOffset: 104 + Name: volatile_var + Flags: 0x0 + Address: 0x4000 + Other: 0x0 + ImportName: '' + - TerminalSize: 4 + NodeOffset: 110 + Name: func_ptr + Flags: 0x0 + Address: 0x4008 + Other: 0x0 + ImportName: '' + - TerminalSize: 4 + NodeOffset: 116 + Name: extern_func_ptr + Flags: 0x0 + Address: 0x4010 + Other: 0x0 + ImportName: '' + - TerminalSize: 0 + NodeOffset: 122 + Name: func_mainBin_ + Flags: 0x0 + Address: 0x0 + Other: 0x0 + ImportName: '' + Children: + - TerminalSize: 0 + NodeOffset: 139 + Name: dec_ + Flags: 0x0 + Address: 0x0 + Other: 0x0 + ImportName: '' + Children: + - TerminalSize: 3 + NodeOffset: 163 + Name: call_everything + Flags: 0x0 + Address: 0x3B0 + Other: 0x0 + ImportName: '' + - TerminalSize: 0 + NodeOffset: 168 + Name: '0' + Flags: 0x0 + Address: 0x0 + Other: 0x0 + ImportName: '' + Children: + - TerminalSize: 3 + NodeOffset: 182 + Name: '1' + Flags: 0x0 + Address: 0x338 + Other: 0x0 + ImportName: '' + - TerminalSize: 3 + NodeOffset: 187 + Name: '3' + Flags: 0x0 + Address: 0x360 + Other: 0x0 + ImportName: '' + - TerminalSize: 3 + NodeOffset: 192 + Name: '2' + Flags: 0x0 + Address: 0x34C + Other: 0x0 + ImportName: '' + - TerminalSize: 0 + NodeOffset: 197 + Name: inc_0 + Flags: 0x0 + Address: 0x0 + Other: 0x0 + ImportName: '' + Children: + - TerminalSize: 3 + NodeOffset: 211 + Name: '2' + Flags: 0x0 + Address: 0x388 + Other: 0x0 + ImportName: '' + - TerminalSize: 3 + NodeOffset: 216 + Name: '1' + Flags: 0x0 + Address: 0x374 + Other: 0x0 + ImportName: '' + - TerminalSize: 3 + NodeOffset: 221 + Name: '3' + Flags: 0x0 + Address: 0x39C + Other: 0x0 + ImportName: '' + NameList: + - n_strx: 235 + n_type: 0x64 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 295 + n_type: 0x66 + n_sect: 0 + n_desc: 1 + n_value: 0 + - n_strx: 353 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294968312 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 24 + - n_strx: 359 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294968120 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 20 + - n_strx: 380 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294968140 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 20 + - n_strx: 401 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294968160 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 20 + - n_strx: 422 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294968180 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 20 + - n_strx: 443 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294968200 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 20 + - n_strx: 464 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294968220 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 20 + - n_strx: 485 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294968240 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 72 + - n_strx: 519 + n_type: 0x20 + n_sect: 2 + n_desc: 0 + n_value: 4294983680 + - n_strx: 535 + n_type: 0x20 + n_sect: 2 + n_desc: 0 + n_value: 4294983688 + - n_strx: 547 + n_type: 0x20 + n_sect: 2 + n_desc: 0 + n_value: 4294983696 + - n_strx: 1 + n_type: 0x64 + n_sect: 1 + n_desc: 0 + n_value: 0 + - n_strx: 2 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968312 + - n_strx: 8 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968120 + - n_strx: 29 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968140 + - n_strx: 50 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968160 + - n_strx: 71 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968180 + - n_strx: 92 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968200 + - n_strx: 113 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968220 + - n_strx: 134 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294968240 + - n_strx: 168 + n_type: 0xF + n_sect: 2 + n_desc: 0 + n_value: 4294983680 + - n_strx: 184 + n_type: 0xF + n_sect: 2 + n_desc: 0 + n_value: 4294983688 + - n_strx: 196 + n_type: 0xF + n_sect: 2 + n_desc: 0 + n_value: 4294983696 + - n_strx: 215 + n_type: 0xF + n_sect: 1 + n_desc: 16 + n_value: 4294967296 + StringTable: + - ' ' + - _main + - _func_mainBin_dec_01 + - _func_mainBin_dec_02 + - _func_mainBin_dec_03 + - _func_mainBin_inc_01 + - _func_mainBin_inc_02 + - _func_mainBin_inc_03 + - _func_mainBin_dec_call_everything + - _g_volatile_var + - _g_func_ptr + - _g_extern_func_ptr + - __mh_execute_header + - '/tmp/_______________________________________/call_sites.cpp' + - '/tmp/_______________________________________/call_sites.o' + - _main + - _func_mainBin_dec_01 + - _func_mainBin_dec_02 + - _func_mainBin_dec_03 + - _func_mainBin_inc_01 + - _func_mainBin_inc_02 + - _func_mainBin_inc_03 + - _func_mainBin_dec_call_everything + - _g_volatile_var + - _g_func_ptr + - _g_extern_func_ptr + - '' + - '' + FunctionStarts: [ 0x338, 0x34C, 0x360, 0x374, 0x388, 0x39C, 0x3B0, 0x3F8 ] +... diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test new file mode 100644 index 0000000000000..255b68a343b8d --- /dev/null +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test @@ -0,0 +1,304 @@ +// RUN: split-file %s %t + +// Assemble the input assembly code into an object file +// RUN: llc -enable-machine-outliner=never -mtriple arm64-apple-darwin -filetype=obj %t/call_sites.ll -o %t/call_sites.o +// RUN: llvm-gsymutil --convert=%t/call_sites.o --callsites-from-yaml=%t/callsites.yaml -o %t/call_sites_obj.gsym + +// Dump the GSYM file and check the output for callsite information +// RUN: llvm-gsymutil %t/call_sites_obj.gsym | FileCheck --check-prefix=CHECK-GSYM %s + + +// CHECK-GSYM: FunctionInfo @ 0x[[#%x,]]: [0x[[#%x,]] - 0x[[#%x,]]) "func_mainBin_dec_call_everything" +// CHECK-GSYM-NEXT: LineTable: + // func_mainBin_dec_call_everything() { +// CHECK-GSYM-NEXT: 0x[[#%x,]] {{.*}}/call_sites.cpp:16 + // func_mainBin_dec_01(); +// CHECK-GSYM-NEXT: 0x[[ADDR_dec_01_call:[0-9a-f]+]] {{.*}}/call_sites.cpp:17 + // func_mainBin_dec_02(); +// CHECK-GSYM-NEXT: 0x[[ADDR_dec_02_call:[0-9a-f]+]] {{.*}}/call_sites.cpp:18 + // func_mainBin_dec_03(); +// CHECK-GSYM-NEXT: [[ADDR_dec_03_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:19 + // func_mainBin_inc_01(); +// CHECK-GSYM-NEXT: [[ADDR_inc_01_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:21 + // func_mainBin_inc_02(); +// CHECK-GSYM-NEXT: [[ADDR_inc_02_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:22 + // func_mainBin_inc_03(); +// CHECK-GSYM-NEXT: [[ADDR_inc_03_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:23 + // g_func_ptr(); +// CHECK-GSYM-NEXT: [[ADDR_func_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:25 + // g_extern_func_ptr(); +// CHECK-GSYM-NEXT: [[ADDR_extern_func_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:26 + // g_volatile_var = 0; +// CHECK-GSYM-NEXT: [[ADDR_var_assign:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:28 + // } +// CHECK-GSYM-NEXT: [[#%x,]] {{.*}}/call_sites.cpp:29 +// CHECK-GSYM-NEXT: CallSites (by return address): +// CHECK-GSYM-NEXT: [[ADDR_dec_02_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_01] +// CHECK-GSYM-NEXT: [[ADDR_dec_03_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_02] +// CHECK-GSYM-NEXT: [[ADDR_inc_01_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_03] +// CHECK-GSYM-NEXT: [[ADDR_inc_02_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_01] +// CHECK-GSYM-NEXT: [[ADDR_inc_03_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_02] +// CHECK-GSYM-NEXT: [[ADDR_func_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_03] +// CHECK-GSYM-NEXT: [[ADDR_extern_func_call]] Flags[None] MatchRegex[.*func.*] +// CHECK-GSYM-NEXT: [[ADDR_var_assign]] Flags[ExternalCall] MatchRegex[.*extern_func.*] + + +//--- callsites.yaml +functions: + - name: func_mainBin_dec_call_everything + callsites: + - return_offset: 0x0C + match_regex: ["func_mainBin_dec_01"] + flags: + - "InternalCall" + - return_offset: 0x10 + match_regex: ["func_mainBin_dec_02"] + flags: + - "InternalCall" + - return_offset: 0x14 + match_regex: ["func_mainBin_dec_03"] + flags: + - "InternalCall" + - return_offset: 24 + match_regex: ["func_mainBin_inc_01"] + flags: + - "InternalCall" + - return_offset: 28 + match_regex: ["func_mainBin_inc_02"] + flags: + - "InternalCall" + - return_offset: 32 + match_regex: ["func_mainBin_inc_03"] + flags: + - "InternalCall" + - return_offset: 44 + match_regex: [".*func.*"] + - return_offset: 56 + match_regex: [".*extern_func.*"] + flags: + - "ExternalCall" + + +//--- call_sites.ll +; ModuleID = 'call_sites.cpp' +source_filename = "call_sites.cpp" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-unknown-macosx10.4.0" + +@g_volatile_var = global i32 0, align 4, !dbg !0 +@g_func_ptr = global ptr null, align 8, !dbg !5 +@g_extern_func_ptr = global ptr null, align 8, !dbg !12 + +; Function Attrs: minsize mustprogress nofree noinline norecurse nounwind optsize ssp memory(readwrite, argmem: none) +define void @func_mainBin_dec_01() local_unnamed_addr #0 !dbg !21 { +entry: + %0 = load volatile i32, ptr @g_volatile_var, align 4, !dbg !24, !tbaa !25 + %dec = add nsw i32 %0, -1, !dbg !24 + store volatile i32 %dec, ptr @g_volatile_var, align 4, !dbg !24, !tbaa !25 + ret void, !dbg !29 +} + +; Function Attrs: minsize mustprogress nofree noinline norecurse nounwind optsize ssp memory(readwrite, argmem: none) +define void @func_mainBin_dec_02() local_unnamed_addr #0 !dbg !30 { +entry: + %0 = load volatile i32, ptr @g_volatile_var, align 4, !dbg !31, !tbaa !25 + %dec = add nsw i32 %0, -1, !dbg !31 + store volatile i32 %dec, ptr @g_volatile_var, align 4, !dbg !31, !tbaa !25 + ret void, !dbg !32 +} + +; Function Attrs: minsize mustprogress nofree noinline norecurse nounwind optsize ssp memory(readwrite, argmem: none) +define void @func_mainBin_dec_03() local_unnamed_addr #0 !dbg !33 { +entry: + %0 = load volatile i32, ptr @g_volatile_var, align 4, !dbg !34, !tbaa !25 + %dec = add nsw i32 %0, -1, !dbg !34 + store volatile i32 %dec, ptr @g_volatile_var, align 4, !dbg !34, !tbaa !25 + ret void, !dbg !35 +} + +; Function Attrs: minsize mustprogress nofree noinline norecurse nounwind optsize ssp memory(readwrite, argmem: none) +define void @func_mainBin_inc_01() local_unnamed_addr #0 !dbg !36 { +entry: + %0 = load volatile i32, ptr @g_volatile_var, align 4, !dbg !37, !tbaa !25 + %inc = add nsw i32 %0, 1, !dbg !37 + store volatile i32 %inc, ptr @g_volatile_var, align 4, !dbg !37, !tbaa !25 + ret void, !dbg !38 +} + +; Function Attrs: minsize mustprogress nofree noinline norecurse nounwind optsize ssp memory(readwrite, argmem: none) +define void @func_mainBin_inc_02() local_unnamed_addr #0 !dbg !39 { +entry: + %0 = load volatile i32, ptr @g_volatile_var, align 4, !dbg !40, !tbaa !25 + %inc = add nsw i32 %0, 1, !dbg !40 + store volatile i32 %inc, ptr @g_volatile_var, align 4, !dbg !40, !tbaa !25 + ret void, !dbg !41 +} + +; Function Attrs: minsize mustprogress nofree noinline norecurse nounwind optsize ssp memory(readwrite, argmem: none) +define void @func_mainBin_inc_03() local_unnamed_addr #0 !dbg !42 { +entry: + %0 = load volatile i32, ptr @g_volatile_var, align 4, !dbg !43, !tbaa !25 + %inc = add nsw i32 %0, 1, !dbg !43 + store volatile i32 %inc, ptr @g_volatile_var, align 4, !dbg !43, !tbaa !25 + ret void, !dbg !44 +} + +; Function Attrs: minsize mustprogress noinline nounwind optsize ssp +define void @func_mainBin_dec_call_everything() local_unnamed_addr #1 !dbg !45 { +entry: + tail call void @func_mainBin_dec_01() #3, !dbg !46 + tail call void @func_mainBin_dec_02() #3, !dbg !47 + tail call void @func_mainBin_dec_03() #3, !dbg !48 + tail call void @func_mainBin_inc_01() #3, !dbg !49 + tail call void @func_mainBin_inc_02() #3, !dbg !50 + tail call void @func_mainBin_inc_03() #3, !dbg !51 + %0 = load volatile ptr, ptr @g_func_ptr, align 8, !dbg !52, !tbaa !53 + %call = tail call noundef i32 %0() #4, !dbg !52 + %1 = load volatile ptr, ptr @g_extern_func_ptr, align 8, !dbg !55, !tbaa !53 + %call1 = tail call noundef i32 %1() #4, !dbg !55 + store volatile i32 0, ptr @g_volatile_var, align 4, !dbg !56, !tbaa !25 + ret void, !dbg !57 +} + +; Function Attrs: minsize mustprogress norecurse nounwind optsize ssp +define noundef i32 @main() local_unnamed_addr #2 !dbg !58 { +entry: + tail call void @func_mainBin_dec_call_everything() #3, !dbg !59 + ret i32 0, !dbg !60 +} + +attributes #0 = { minsize mustprogress nofree noinline norecurse nounwind optsize ssp memory(readwrite, argmem: none) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" } +attributes #1 = { minsize mustprogress noinline nounwind optsize ssp "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" } +attributes #2 = { minsize mustprogress norecurse nounwind optsize ssp "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" } +attributes #3 = { minsize optsize } +attributes #4 = { minsize nounwind optsize } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!15, !16, !17, !18, !19} +!llvm.ident = !{!20} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "g_volatile_var", scope: !2, file: !3, line: 4, type: !14, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 20.0.0git (https://github.com/alx32/llvm-project.git f41f6ea1f33c4f5e7c94f3d155e44292d1809c50)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None, sysroot: "/") +!3 = !DIFile(filename: "call_sites.cpp", directory: "/tmp/tst") +!4 = !{!0, !5, !12} +!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression()) +!6 = distinct !DIGlobalVariable(name: "g_func_ptr", scope: !2, file: !3, line: 5, type: !7, isLocal: false, isDefinition: true) +!7 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !8) +!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64) +!9 = !DISubroutineType(types: !10) +!10 = !{!11} +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !DIGlobalVariableExpression(var: !13, expr: !DIExpression()) +!13 = distinct !DIGlobalVariable(name: "g_extern_func_ptr", scope: !2, file: !3, line: 6, type: !7, isLocal: false, isDefinition: true) +!14 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !11) +!15 = !{i32 7, !"Dwarf Version", i32 4} +!16 = !{i32 2, !"Debug Info Version", i32 3} +!17 = !{i32 1, !"wchar_size", i32 4} +!18 = !{i32 8, !"PIC Level", i32 2} +!19 = !{i32 7, !"frame-pointer", i32 1} +!20 = !{!"clang version 20.0.0git (https://github.com/alx32/llvm-project.git f41f6ea1f33c4f5e7c94f3d155e44292d1809c50)"} +!21 = distinct !DISubprogram(name: "func_mainBin_dec_01", scope: !3, file: !3, line: 8, type: !22, scopeLine: 8, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!22 = !DISubroutineType(types: !23) +!23 = !{null} +!24 = !DILocation(line: 8, column: 54, scope: !21) +!25 = !{!26, !26, i64 0} +!26 = !{!"int", !27, i64 0} +!27 = !{!"omnipotent char", !28, i64 0} +!28 = !{!"Simple C++ TBAA"} +!29 = !DILocation(line: 8, column: 58, scope: !21) +!30 = distinct !DISubprogram(name: "func_mainBin_dec_02", scope: !3, file: !3, line: 9, type: !22, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!31 = !DILocation(line: 9, column: 54, scope: !30) +!32 = !DILocation(line: 9, column: 58, scope: !30) +!33 = distinct !DISubprogram(name: "func_mainBin_dec_03", scope: !3, file: !3, line: 10, type: !22, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!34 = !DILocation(line: 10, column: 54, scope: !33) +!35 = !DILocation(line: 10, column: 58, scope: !33) +!36 = distinct !DISubprogram(name: "func_mainBin_inc_01", scope: !3, file: !3, line: 12, type: !22, scopeLine: 12, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!37 = !DILocation(line: 12, column: 54, scope: !36) +!38 = !DILocation(line: 12, column: 58, scope: !36) +!39 = distinct !DISubprogram(name: "func_mainBin_inc_02", scope: !3, file: !3, line: 13, type: !22, scopeLine: 13, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!40 = !DILocation(line: 13, column: 54, scope: !39) +!41 = !DILocation(line: 13, column: 58, scope: !39) +!42 = distinct !DISubprogram(name: "func_mainBin_inc_03", scope: !3, file: !3, line: 14, type: !22, scopeLine: 14, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!43 = !DILocation(line: 14, column: 54, scope: !42) +!44 = !DILocation(line: 14, column: 58, scope: !42) +!45 = distinct !DISubprogram(name: "func_mainBin_dec_call_everything", scope: !3, file: !3, line: 16, type: !22, scopeLine: 16, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!46 = !DILocation(line: 17, column: 5, scope: !45) +!47 = !DILocation(line: 18, column: 5, scope: !45) +!48 = !DILocation(line: 19, column: 5, scope: !45) +!49 = !DILocation(line: 21, column: 5, scope: !45) +!50 = !DILocation(line: 22, column: 5, scope: !45) +!51 = !DILocation(line: 23, column: 5, scope: !45) +!52 = !DILocation(line: 25, column: 5, scope: !45) +!53 = !{!54, !54, i64 0} +!54 = !{!"any pointer", !27, i64 0} +!55 = !DILocation(line: 26, column: 5, scope: !45) +!56 = !DILocation(line: 28, column: 20, scope: !45) +!57 = !DILocation(line: 29, column: 1, scope: !45) +!58 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 31, type: !9, scopeLine: 31, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!59 = !DILocation(line: 32, column: 3, scope: !58) +!60 = !DILocation(line: 33, column: 3, scope: !58) + + +//--- generate-callsite-test-data.sh +#!/bin/bash +## This is provided for reference only, if we need to modify the file and regenerate the assembly code +set -ex + +TOOLCHAIN_DIR="llvm-project/build/Debug/bin" + +# Create call_sites.cpp +cat > call_sites.cpp < call_sites_binary.yaml + +# Dump the dSYM to YAML +"$TOOLCHAIN_DIR"/obj2yaml call_sites.dSYM/Contents/Resources/DWARF/call_sites > call_sites_dsym.yaml diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td index e3001f3fe53f1..8dcce1f738875 100644 --- a/llvm/tools/llvm-gsymutil/Opts.td +++ b/llvm/tools/llvm-gsymutil/Opts.td @@ -18,6 +18,8 @@ defm convert : "Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">; def merged_functions : FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">; +defm callsites_from_yaml : + Eq<"callsites-from-yaml", "Load call site info from YAML file.">; defm arch : Eq<"arch", "Process debug information for the specified CPU architecture only.\nArchitectures may be specified by name or by number.\nThis option can be specified multiple times, once for each desired architecture">; diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp index 10bbdf215736d..5935015746325 100644 --- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp +++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp @@ -9,6 +9,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/GSYM/CallSiteInfo.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachOUniversal.h" @@ -96,6 +97,7 @@ static bool Quiet; static std::vector LookupAddresses; static bool LookupAddressesFromStdin; static bool StoreMergedFunctionInfo = false; +static std::vector CallSiteYamlPaths; static void parseArgs(int argc, char **argv) { GSYMUtilOptTable Tbl; @@ -177,6 +179,16 @@ static void parseArgs(int argc, char **argv) { LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin); StoreMergedFunctionInfo = Args.hasArg(OPT_merged_functions); + + for (const llvm::opt::Arg *A : Args.filtered(OPT_callsites_from_yaml_EQ)) + if (A->getValue() && A->getValue()[0] != '\0') + CallSiteYamlPaths.emplace_back(A->getValue()); + else { + llvm::errs() + << ToolName + << ": --callsites-from-yaml option requires a non-empty argument.\n"; + std::exit(1); + } } /// @} @@ -370,6 +382,13 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile, if (auto Err = ObjectFileTransformer::convert(Obj, Out, Gsym)) return Err; + // If any call site YAML files were specified, load them now. + for (const auto &yamlPath : CallSiteYamlPaths) { + if (auto Err = Gsym.loadCallSitesFromYAML(yamlPath)) { + return Err; + } + } + // Finalize the GSYM to make it ready to save to disk. This will remove // duplicate FunctionInfo entries where we might have found an entry from // debug info and also a symbol table entry from the object file. diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp index c6218e5004d17..33f53de2e77bc 100644 --- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp +++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -214,10 +214,10 @@ TEST(GSYMTest, TestFunctionInfoDecodeErrors) { FW.writeU32(1); // InfoType::LineTableInfo. TestFunctionInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr, "0x0000000c: missing FunctionInfo InfoType length"); - FW.fixup32(4, FixupOffset); // Write an invalid InfoType enumeration value + FW.fixup32(7, FixupOffset); // Write an invalid InfoType enumeration value FW.writeU32(0); // LineTableInfo InfoType data length. TestFunctionInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr, - "0x00000008: unsupported InfoType 4"); + "0x00000008: unsupported InfoType 7"); } static void TestFunctionInfoEncodeError(llvm::endianness ByteOrder, From fe23779dd6ecb7acd835577d68824001a2dcc608 Mon Sep 17 00:00:00 2001 From: Alex B Date: Thu, 26 Sep 2024 16:10:23 -0700 Subject: [PATCH 02/12] Address Feedback nr.1 --- .../llvm/DebugInfo/GSYM/CallSiteInfo.h | 46 ++-------------- llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp | 54 +++++++------------ llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 3 +- 3 files changed, 25 insertions(+), 78 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h index 45257f0e11578..c4e97a599e988 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h @@ -34,7 +34,6 @@ namespace gsym { class FileWriter; struct FunctionInfo; struct CallSiteInfo { -public: enum Flags : uint8_t { None = 0, // This flag specifies that the call site can only call a function within @@ -52,7 +51,7 @@ struct CallSiteInfo { std::vector MatchRegex; /// Bitwise OR of CallSiteInfo::Flags values - uint8_t Flags; + uint8_t Flags = CallSiteInfo::Flags::None; /// Decode a CallSiteInfo object from a binary data stream. /// @@ -73,16 +72,8 @@ struct CallSiteInfo { }; struct CallSiteInfoCollection { -public: std::vector CallSites; - void clear() { CallSites.clear(); } - - /// Query if a CallSiteInfoCollection object is valid. - /// - /// \returns True if the collection is not empty. - bool isValid() const { return !CallSites.empty(); } - /// Decode a CallSiteInfoCollection object from a binary data stream. /// /// \param Data The binary stream to read the data from. @@ -156,54 +147,27 @@ class CallSiteInfoLoader { /// \returns A 32-bit unsigned integer representing the offset of the string. uint32_t offsetFromString(StringRef str); - /// Reads the content of the YAML file specified by `YAMLFile` into - /// `yamlContent`. - /// - /// \param YAMLFile A StringRef representing the path to the YAML file. - /// \param Buffer The memory buffer containing the YAML content. - /// - /// \returns An `llvm::Error` indicating success or describing any issues - /// encountered while reading the file. - llvm::Error readYAMLFile(StringRef YAMLFile, - std::unique_ptr &Buffer); - - /// Parses the YAML content and populates `functionsYAML` with the parsed - /// data. - /// - /// \param Buffer The memory buffer containing the YAML content. - /// \param functionsYAML A reference to an llvm::yaml::FunctionsYAML object to - /// be populated. - /// - /// \returns An `llvm::Error` indicating success or describing any issues - /// encountered during parsing. - llvm::Error parseYAML(llvm::MemoryBuffer &Buffer, - llvm::yaml::FunctionsYAML &functionsYAML); - /// Builds a map from function names to FunctionInfo pointers based on the /// provided `Funcs` vector. /// /// \param Funcs A reference to a vector of FunctionInfo objects. /// - /// \returns An unordered_map mapping function names (std::string) to their + /// \returns A StringMap mapping function names (StringRef) to their /// corresponding FunctionInfo pointers. - std::unordered_map - buildFunctionMap(std::vector &Funcs); + StringMap buildFunctionMap(std::vector &Funcs); /// Processes the parsed YAML functions and updates the `FuncMap` accordingly. /// /// \param functionsYAML A constant reference to an llvm::yaml::FunctionsYAML /// object containing parsed YAML data. - /// \param FuncMap A reference to an unordered_map mapping function names to + /// \param FuncMap A reference to a StringMap mapping function names to /// FunctionInfo pointers. - /// \param YAMLFile A StringRef representing the name of the YAML file (used - /// for error messages). /// /// \returns An `llvm::Error` indicating success or describing any issues /// encountered during processing. llvm::Error processYAMLFunctions(const llvm::yaml::FunctionsYAML &functionsYAML, - std::unordered_map &FuncMap, - StringRef YAMLFile); + StringMap &FuncMap); /// Map of existing string offsets to CachedHashStringRef. DenseMap &StringOffsetMap; diff --git a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp index 4ed3d3f67a44f..3fe7fb48cb0de 100644 --- a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp @@ -162,49 +162,32 @@ uint32_t CallSiteInfoLoader::offsetFromString(StringRef str) { llvm::Error CallSiteInfoLoader::loadYAML(std::vector &Funcs, StringRef YAMLFile) { - std::unique_ptr Buffer; // Step 1: Read YAML file - if (auto Err = readYAMLFile(YAMLFile, Buffer)) - return Err; - - // Step 2: Parse YAML content - llvm::yaml::FunctionsYAML functionsYAML; - if (auto Err = parseYAML(*Buffer, functionsYAML)) - return Err; - - // Step 3: Build function map from Funcs - auto FuncMap = buildFunctionMap(Funcs); - - // Step 4: Process parsed YAML functions and update FuncMap - return processYAMLFunctions(functionsYAML, FuncMap, YAMLFile); -} - -llvm::Error -CallSiteInfoLoader::readYAMLFile(StringRef YAMLFile, - std::unique_ptr &Buffer) { auto BufferOrError = llvm::MemoryBuffer::getFile(YAMLFile); if (!BufferOrError) return errorCodeToError(BufferOrError.getError()); - Buffer = std::move(*BufferOrError); - return llvm::Error::success(); -} -llvm::Error -CallSiteInfoLoader::parseYAML(llvm::MemoryBuffer &Buffer, - llvm::yaml::FunctionsYAML &functionsYAML) { - // Use the MemoryBufferRef constructor - llvm::yaml::Input yin(Buffer.getMemBufferRef()); + std::unique_ptr Buffer = std::move(*BufferOrError); + + // Step 2: Parse YAML content + llvm::yaml::FunctionsYAML functionsYAML; + llvm::yaml::Input yin(Buffer->getMemBufferRef()); yin >> functionsYAML; if (yin.error()) { return llvm::createStringError(yin.error(), "Error parsing YAML file: %s\n", - Buffer.getBufferIdentifier().str().c_str()); + Buffer->getBufferIdentifier().str().c_str()); } - return llvm::Error::success(); + + // Step 3: Build function map from Funcs + auto FuncMap = buildFunctionMap(Funcs); + + // Step 4: Process parsed YAML functions and update FuncMap + return processYAMLFunctions(functionsYAML, FuncMap); } -std::unordered_map +StringMap CallSiteInfoLoader::buildFunctionMap(std::vector &Funcs) { - std::unordered_map FuncMap; + StringMap FuncMap; auto insertFunc = [&](auto &Function) { std::string FuncName = stringFromOffset(Function.Name).str(); // If the function name is already in the map, don't update it. This way we @@ -227,8 +210,7 @@ CallSiteInfoLoader::buildFunctionMap(std::vector &Funcs) { llvm::Error CallSiteInfoLoader::processYAMLFunctions( const llvm::yaml::FunctionsYAML &functionsYAML, - std::unordered_map &FuncMap, - StringRef YAMLFile) { + StringMap &FuncMap) { // For each function in the YAML file for (const auto &FuncYAML : functionsYAML.functions) { auto it = FuncMap.find(FuncYAML.name); @@ -247,9 +229,9 @@ llvm::Error CallSiteInfoLoader::processYAMLFunctions( // Since YAML has specifies relative return offsets, add the function // start address to make the offset absolute. CSI.ReturnAddress = FuncInfo->Range.start() + CallSiteYAML.return_offset; - for (const auto ®ex : CallSiteYAML.match_regex) { - CSI.MatchRegex.push_back(offsetFromString(regex)); - } + for (const auto &Regex : CallSiteYAML.match_regex) + CSI.MatchRegex.push_back(offsetFromString(Regex)); + // Initialize flags to None CSI.Flags = CallSiteInfo::None; // Parse flags and combine them diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp index 5935015746325..bb107c17f6f96 100644 --- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp +++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp @@ -180,7 +180,7 @@ static void parseArgs(int argc, char **argv) { LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin); StoreMergedFunctionInfo = Args.hasArg(OPT_merged_functions); - for (const llvm::opt::Arg *A : Args.filtered(OPT_callsites_from_yaml_EQ)) + if (const llvm::opt::Arg *A = Args.getLastArg(OPT_callsites_from_yaml_EQ)) { if (A->getValue() && A->getValue()[0] != '\0') CallSiteYamlPaths.emplace_back(A->getValue()); else { @@ -189,6 +189,7 @@ static void parseArgs(int argc, char **argv) { << ": --callsites-from-yaml option requires a non-empty argument.\n"; std::exit(1); } + } } /// @} From cd7c30ab8cac2b2be6446c74c4542dd8321d7439 Mon Sep 17 00:00:00 2001 From: Alex B Date: Fri, 27 Sep 2024 14:00:36 -0700 Subject: [PATCH 03/12] Address feedback nr.2 --- .../llvm/DebugInfo/GSYM/CallSiteInfo.h | 72 +++------------ .../include/llvm/DebugInfo/GSYM/GsymCreator.h | 9 ++ llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp | 92 ++++++++----------- llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp | 2 +- llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 8 +- llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 15 ++- 6 files changed, 76 insertions(+), 122 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h index c4e97a599e988..496679fdf9bc8 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h @@ -32,6 +32,7 @@ struct FunctionsYAML; namespace gsym { class FileWriter; +class GsymCreator; struct FunctionInfo; struct CallSiteInfo { enum Flags : uint8_t { @@ -45,7 +46,7 @@ struct CallSiteInfo { }; /// The return address of the call site. - uint64_t ReturnAddress; + uint64_t ReturnAddress = 0; /// Offsets into the string table for function names regex patterns. std::vector MatchRegex; @@ -57,12 +58,9 @@ struct CallSiteInfo { /// /// \param Data The binary stream to read the data from. /// \param Offset The current offset within the data stream. - /// \param BaseAddr The base address for decoding (unused here but included - /// for consistency). - /// /// \returns A CallSiteInfo or an error describing the issue. - static llvm::Expected - decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr); + static llvm::Expected decode(DataExtractor &Data, + uint64_t &Offset); /// Encode this CallSiteInfo object into a FileWriter stream. /// @@ -77,12 +75,8 @@ struct CallSiteInfoCollection { /// Decode a CallSiteInfoCollection object from a binary data stream. /// /// \param Data The binary stream to read the data from. - /// \param BaseAddr The base address for decoding (unused here but included - /// for consistency). - /// /// \returns A CallSiteInfoCollection or an error describing the issue. - static llvm::Expected decode(DataExtractor &Data, - uint64_t BaseAddr); + static llvm::Expected decode(DataExtractor &Data); /// Encode this CallSiteInfoCollection object into a FileWriter stream. /// @@ -91,29 +85,18 @@ struct CallSiteInfoCollection { llvm::Error encode(FileWriter &O) const; }; -bool operator==(const CallSiteInfoCollection &LHS, - const CallSiteInfoCollection &RHS); - -bool operator==(const CallSiteInfo &LHS, const CallSiteInfo &RHS); - class CallSiteInfoLoader { public: /// Constructor that initializes the CallSiteInfoLoader with necessary data /// structures. /// /// \param StringOffsetMap A reference to a DenseMap that maps existing string - /// offsets to CachedHashStringRef. \param StrTab A reference to a - /// StringTableBuilder used for managing looking up and creating new strings. - /// \param StringStorage A reference to a StringSet for storing the data for - /// generated strings. - CallSiteInfoLoader(DenseMap &StringOffsetMap, - StringTableBuilder &StrTab, StringSet<> &StringStorage) - : StringOffsetMap(StringOffsetMap), StrTab(StrTab), - StringStorage(StringStorage) {} - - /// Loads call site information from a YAML file and populates the provided - /// FunctionInfo vector. - /// + /// offsets to CachedHashStringRef. + /// \param StrTab A reference to a StringTableBuilder used for managing + /// looking up and creating new strings. \param StringStorage A reference to a + /// StringSet for storing the data for generated strings. + CallSiteInfoLoader(GsymCreator &GCreator) : GCreator(GCreator) {} + /// This method reads the specified YAML file, parses its content, and updates /// the `Funcs` vector with call site information based on the YAML data. /// @@ -121,37 +104,15 @@ class CallSiteInfoLoader { /// populated. /// \param YAMLFile A StringRef representing the path to the YAML /// file to be loaded. - /// /// \returns An `llvm::Error` indicating success or describing any issues /// encountered during the loading process. llvm::Error loadYAML(std::vector &Funcs, StringRef YAMLFile); private: - /// Retrieves an existing string from the StringOffsetMap using the provided - /// offset. - /// - /// \param offset A 32-bit unsigned integer representing the offset of the - /// string. - /// - /// \returns A StringRef corresponding to the string for the given offset. - /// - /// \note This method asserts that the offset exists in the StringOffsetMap. - StringRef stringFromOffset(uint32_t offset) const; - - /// Obtains the offset corresponding to a given string in the StrTab. If the - /// string does not already exist, it is created. - /// - /// \param str A StringRef representing the string for which the offset is - /// requested. - /// - /// \returns A 32-bit unsigned integer representing the offset of the string. - uint32_t offsetFromString(StringRef str); - /// Builds a map from function names to FunctionInfo pointers based on the /// provided `Funcs` vector. /// /// \param Funcs A reference to a vector of FunctionInfo objects. - /// /// \returns A StringMap mapping function names (StringRef) to their /// corresponding FunctionInfo pointers. StringMap buildFunctionMap(std::vector &Funcs); @@ -162,21 +123,14 @@ class CallSiteInfoLoader { /// object containing parsed YAML data. /// \param FuncMap A reference to a StringMap mapping function names to /// FunctionInfo pointers. - /// /// \returns An `llvm::Error` indicating success or describing any issues /// encountered during processing. llvm::Error processYAMLFunctions(const llvm::yaml::FunctionsYAML &functionsYAML, StringMap &FuncMap); - /// Map of existing string offsets to CachedHashStringRef. - DenseMap &StringOffsetMap; - - /// The gSYM string table builder. - StringTableBuilder &StrTab; - - /// The gSYM string storage - we store generated strings here. - StringSet<> &StringStorage; + /// Reference to the parent Gsym Creator object. + GsymCreator &GCreator; }; raw_ostream &operator<<(raw_ostream &OS, const CallSiteInfo &CSI); diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h index 9e5b3c1f8d92d..8e9e500f267c1 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h @@ -329,6 +329,15 @@ class GsymCreator { /// \returns The unique 32 bit offset into the string table. uint32_t insertString(StringRef S, bool Copy = true); + /// Retrieve a string fromt he GSYM string table given its offset. + /// + /// The offset is assumed to be a valid offset into the string table. + /// otherwise an assert will be triggered. + /// + /// \param offset The offset of the string to retrieve, previously returned by + /// insertString. \returns The string at the given offset in the string table. + StringRef getString(uint32_t offset); + /// Insert a file into this GSYM creator. /// /// Inserts a file by adding a FileEntry into the "Files" member variable if diff --git a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp index 3fe7fb48cb0de..5f8f622043bd8 100644 --- a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp @@ -10,6 +10,7 @@ #include "llvm/ADT/CachedHashString.h" #include "llvm/DebugInfo/GSYM/FileWriter.h" #include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/YAMLParser.h" @@ -23,17 +24,17 @@ using namespace llvm; using namespace gsym; -llvm::Error CallSiteInfo::encode(FileWriter &O) const { +Error CallSiteInfo::encode(FileWriter &O) const { O.writeU64(ReturnAddress); O.writeU8(Flags); O.writeU32(MatchRegex.size()); for (uint32_t Entry : MatchRegex) O.writeU32(Entry); - return llvm::Error::success(); + return Error::success(); } -llvm::Expected -CallSiteInfo::decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr) { +Expected CallSiteInfo::decode(DataExtractor &Data, + uint64_t &Offset) { CallSiteInfo CSI; // Read ReturnAddress @@ -68,17 +69,17 @@ CallSiteInfo::decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr) { return CSI; } -llvm::Error CallSiteInfoCollection::encode(FileWriter &O) const { +Error CallSiteInfoCollection::encode(FileWriter &O) const { O.writeU32(CallSites.size()); for (const CallSiteInfo &CSI : CallSites) { - if (llvm::Error Err = CSI.encode(O)) + if (Error Err = CSI.encode(O)) return Err; } - return llvm::Error::success(); + return Error::success(); } -llvm::Expected -CallSiteInfoCollection::decode(DataExtractor &Data, uint64_t BaseAddr) { +Expected +CallSiteInfoCollection::decode(DataExtractor &Data) { CallSiteInfoCollection CSC; uint64_t Offset = 0; @@ -91,8 +92,7 @@ CallSiteInfoCollection::decode(DataExtractor &Data, uint64_t BaseAddr) { CSC.CallSites.reserve(NumCallSites); for (uint32_t i = 0; i < NumCallSites; ++i) { - llvm::Expected ECSI = - CallSiteInfo::decode(Data, Offset, BaseAddr); + Expected ECSI = CallSiteInfo::decode(Data, Offset); if (!ECSI) return ECSI.takeError(); CSC.CallSites.emplace_back(*ECSI); @@ -108,7 +108,7 @@ namespace yaml { struct CallSiteYAML { // The offset of the return address of the call site - relative to the start // of the function. - llvm::yaml::Hex64 return_offset; + Hex64 return_offset; std::vector match_regex; std::vector flags; }; @@ -149,34 +149,22 @@ template <> struct MappingTraits { LLVM_YAML_IS_SEQUENCE_VECTOR(CallSiteYAML) LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionYAML) -// Implementation of CallSiteInfoLoader -StringRef CallSiteInfoLoader::stringFromOffset(uint32_t offset) const { - assert(StringOffsetMap.count(offset) && - "expected function name offset to already be in StringOffsetMap"); - return StringOffsetMap.find(offset)->second.val(); -} - -uint32_t CallSiteInfoLoader::offsetFromString(StringRef str) { - return StrTab.add(StringStorage.insert(str).first->getKey()); -} - -llvm::Error CallSiteInfoLoader::loadYAML(std::vector &Funcs, - StringRef YAMLFile) { +Error CallSiteInfoLoader::loadYAML(std::vector &Funcs, + StringRef YAMLFile) { // Step 1: Read YAML file - auto BufferOrError = llvm::MemoryBuffer::getFile(YAMLFile); + auto BufferOrError = MemoryBuffer::getFile(YAMLFile); if (!BufferOrError) return errorCodeToError(BufferOrError.getError()); - std::unique_ptr Buffer = std::move(*BufferOrError); + std::unique_ptr Buffer = std::move(*BufferOrError); // Step 2: Parse YAML content - llvm::yaml::FunctionsYAML functionsYAML; - llvm::yaml::Input yin(Buffer->getMemBufferRef()); + yaml::FunctionsYAML functionsYAML; + yaml::Input yin(Buffer->getMemBufferRef()); yin >> functionsYAML; - if (yin.error()) { - return llvm::createStringError(yin.error(), "Error parsing YAML file: %s\n", - Buffer->getBufferIdentifier().str().c_str()); - } + if (yin.error()) + return createStringError(yin.error(), "Error parsing YAML file: %s\n", + Buffer->getBufferIdentifier().str().c_str()); // Step 3: Build function map from Funcs auto FuncMap = buildFunctionMap(Funcs); @@ -189,7 +177,7 @@ StringMap CallSiteInfoLoader::buildFunctionMap(std::vector &Funcs) { StringMap FuncMap; auto insertFunc = [&](auto &Function) { - std::string FuncName = stringFromOffset(Function.Name).str(); + StringRef FuncName = GCreator.getString(Function.Name); // If the function name is already in the map, don't update it. This way we // preferentially use the first encountered function. Since symbols are // loaded from dSYM first, we end up preferring keeping track of symbols @@ -208,19 +196,19 @@ CallSiteInfoLoader::buildFunctionMap(std::vector &Funcs) { return FuncMap; } -llvm::Error CallSiteInfoLoader::processYAMLFunctions( - const llvm::yaml::FunctionsYAML &functionsYAML, +Error CallSiteInfoLoader::processYAMLFunctions( + const yaml::FunctionsYAML &functionsYAML, StringMap &FuncMap) { // For each function in the YAML file for (const auto &FuncYAML : functionsYAML.functions) { - auto it = FuncMap.find(FuncYAML.name); - if (it == FuncMap.end()) { - return llvm::createStringError( + auto It = FuncMap.find(FuncYAML.name); + if (It == FuncMap.end()) + return createStringError( std::errc::invalid_argument, "Can't find function '%s' specified in callsite YAML\n", FuncYAML.name.c_str()); - } - FunctionInfo *FuncInfo = it->second; + + FunctionInfo *FuncInfo = It->second; // Create a CallSiteInfoCollection if not already present if (!FuncInfo->CallSites) FuncInfo->CallSites = CallSiteInfoCollection(); @@ -229,11 +217,11 @@ llvm::Error CallSiteInfoLoader::processYAMLFunctions( // Since YAML has specifies relative return offsets, add the function // start address to make the offset absolute. CSI.ReturnAddress = FuncInfo->Range.start() + CallSiteYAML.return_offset; - for (const auto &Regex : CallSiteYAML.match_regex) - CSI.MatchRegex.push_back(offsetFromString(Regex)); + for (const auto &Regex : CallSiteYAML.match_regex) { + uint32_t StrOffset = GCreator.insertString(Regex); + CSI.MatchRegex.push_back(StrOffset); + } - // Initialize flags to None - CSI.Flags = CallSiteInfo::None; // Parse flags and combine them for (const auto &FlagStr : CallSiteYAML.flags) { if (FlagStr == "InternalCall") { @@ -241,18 +229,18 @@ llvm::Error CallSiteInfoLoader::processYAMLFunctions( } else if (FlagStr == "ExternalCall") { CSI.Flags |= static_cast(CallSiteInfo::ExternalCall); } else { - return llvm::createStringError(std::errc::invalid_argument, - "Unknown flag in callsite YAML: %s\n", - FlagStr.c_str()); + return createStringError(std::errc::invalid_argument, + "Unknown flag in callsite YAML: %s\n", + FlagStr.c_str()); } } FuncInfo->CallSites->CallSites.push_back(CSI); } } - return llvm::Error::success(); + return Error::success(); } -raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const CallSiteInfo &CSI) { +raw_ostream &gsym::operator<<(raw_ostream &OS, const CallSiteInfo &CSI) { OS << " Return=" << HEX64(CSI.ReturnAddress); OS << " Flags=" << HEX8(CSI.Flags); @@ -265,8 +253,8 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const CallSiteInfo &CSI) { return OS; } -raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, - const CallSiteInfoCollection &CSIC) { +raw_ostream &gsym::operator<<(raw_ostream &OS, + const CallSiteInfoCollection &CSIC) { for (const auto &CS : CSIC.CallSites) { OS << CS; OS << "\n"; diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp index 9dc9c241168b2..facad95f8fdeb 100644 --- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -100,7 +100,7 @@ llvm::Expected FunctionInfo::decode(DataExtractor &Data, case InfoType::CallSiteInfo: if (Expected CI = - llvm::gsym::CallSiteInfoCollection::decode(InfoData, BaseAddr)) + llvm::gsym::CallSiteInfoCollection::decode(InfoData)) FI.CallSites = std::move(CI.get()); else return CI.takeError(); diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp index 0df84ee256aef..3cabb27053a5c 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -191,7 +191,7 @@ llvm::Error GsymCreator::encode(FileWriter &O) const { llvm::Error GsymCreator::loadCallSitesFromYAML(StringRef YAMLFile) { // Use the loader to load call site information from the YAML file. - CallSiteInfoLoader Loader(StringOffsetMap, StrTab, StringStorage); + CallSiteInfoLoader Loader(*this); return Loader.loadYAML(Funcs, YAMLFile); } @@ -385,6 +385,12 @@ uint32_t GsymCreator::insertString(StringRef S, bool Copy) { return StrOff; } +StringRef GsymCreator::getString(uint32_t offset) { + assert(StringOffsetMap.count(offset) && + "GsymCreator::getString expects a valid offset as parameter."); + return StringOffsetMap.find(offset)->second.val(); +} + void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { std::lock_guard Guard(Mutex); Funcs.emplace_back(std::move(FI)); diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp index bb107c17f6f96..6c7506e51e649 100644 --- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp +++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp @@ -97,7 +97,7 @@ static bool Quiet; static std::vector LookupAddresses; static bool LookupAddressesFromStdin; static bool StoreMergedFunctionInfo = false; -static std::vector CallSiteYamlPaths; +static std::string CallSiteYamlPath; static void parseArgs(int argc, char **argv) { GSYMUtilOptTable Tbl; @@ -180,10 +180,9 @@ static void parseArgs(int argc, char **argv) { LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin); StoreMergedFunctionInfo = Args.hasArg(OPT_merged_functions); - if (const llvm::opt::Arg *A = Args.getLastArg(OPT_callsites_from_yaml_EQ)) { - if (A->getValue() && A->getValue()[0] != '\0') - CallSiteYamlPaths.emplace_back(A->getValue()); - else { + if (Args.hasArg(OPT_callsites_from_yaml_EQ)) { + CallSiteYamlPath = Args.getLastArgValue(OPT_callsites_from_yaml_EQ); + if (CallSiteYamlPath.empty()) { llvm::errs() << ToolName << ": --callsites-from-yaml option requires a non-empty argument.\n"; @@ -384,11 +383,9 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile, return Err; // If any call site YAML files were specified, load them now. - for (const auto &yamlPath : CallSiteYamlPaths) { - if (auto Err = Gsym.loadCallSitesFromYAML(yamlPath)) { + if (!CallSiteYamlPath.empty()) + if (auto Err = Gsym.loadCallSitesFromYAML(CallSiteYamlPath)) return Err; - } - } // Finalize the GSYM to make it ready to save to disk. This will remove // duplicate FunctionInfo entries where we might have found an entry from From 087cdea260ddcaec7b952ff523b5b662b49e18cb Mon Sep 17 00:00:00 2001 From: Alex B Date: Fri, 27 Sep 2024 14:04:16 -0700 Subject: [PATCH 04/12] Address Feedback Nr 2.1 --- llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp index 4f645714480e6..786633adc5120 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp @@ -412,9 +412,8 @@ void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI, dump(OS, *FI.MergedFunctions); } - if (FI.CallSites) { + if (FI.CallSites) dump(OS, *FI.CallSites); - } } void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) { From d7cff1b16cb3edb26208b6cf6d5d2f53f01e34ef Mon Sep 17 00:00:00 2001 From: Alex B Date: Thu, 17 Oct 2024 12:49:04 -0700 Subject: [PATCH 05/12] Address [Some] Feedback Nr 3 - Rest pending YAML format --- .../llvm/DebugInfo/GSYM/CallSiteInfo.h | 19 +++++-------------- llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp | 6 +++--- llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp | 6 +++--- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h index 496679fdf9bc8..8043779f31724 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h @@ -9,24 +9,17 @@ #ifndef LLVM_DEBUGINFO_GSYM_CALLSITEINFO_H #define LLVM_DEBUGINFO_GSYM_CALLSITEINFO_H -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" -#include "llvm/DebugInfo/GSYM/ExtractRanges.h" -#include "llvm/Support/YAMLParser.h" -#include -#include +#include "llvm/Support/Error.h" #include namespace llvm { class DataExtractor; class raw_ostream; -class StringTableBuilder; -class CachedHashStringRef; namespace yaml { -struct CallSiteYAML; -struct FunctionYAML; struct FunctionsYAML; } // namespace yaml @@ -43,6 +36,8 @@ struct CallSiteInfo { // This flag specifies that the call site can only call a function outside // the link unit that the call site is in. ExternalCall = 1 << 1, + + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue*/ ExternalCall), }; /// The return address of the call site. @@ -90,11 +85,7 @@ class CallSiteInfoLoader { /// Constructor that initializes the CallSiteInfoLoader with necessary data /// structures. /// - /// \param StringOffsetMap A reference to a DenseMap that maps existing string - /// offsets to CachedHashStringRef. - /// \param StrTab A reference to a StringTableBuilder used for managing - /// looking up and creating new strings. \param StringStorage A reference to a - /// StringSet for storing the data for generated strings. + /// \param GCreator A reference to the GsymCreator. CallSiteInfoLoader(GsymCreator &GCreator) : GCreator(GCreator) {} /// This method reads the specified YAML file, parses its content, and updates diff --git a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp index 5f8f622043bd8..6439aad4814f5 100644 --- a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp @@ -159,9 +159,9 @@ Error CallSiteInfoLoader::loadYAML(std::vector &Funcs, std::unique_ptr Buffer = std::move(*BufferOrError); // Step 2: Parse YAML content - yaml::FunctionsYAML functionsYAML; + yaml::FunctionsYAML FuncsYAML; yaml::Input yin(Buffer->getMemBufferRef()); - yin >> functionsYAML; + yin >> FuncsYAML; if (yin.error()) return createStringError(yin.error(), "Error parsing YAML file: %s\n", Buffer->getBufferIdentifier().str().c_str()); @@ -170,7 +170,7 @@ Error CallSiteInfoLoader::loadYAML(std::vector &Funcs, auto FuncMap = buildFunctionMap(Funcs); // Step 4: Process parsed YAML functions and update FuncMap - return processYAMLFunctions(functionsYAML, FuncMap); + return processYAMLFunctions(FuncsYAML, FuncMap); } StringMap diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp index facad95f8fdeb..dd754c701f624 100644 --- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -218,9 +218,9 @@ llvm::Expected FunctionInfo::encode(FileWriter &Out, // writing the CallSites out with the number of bytes that were written. Out.writeU32(0); const auto StartOffset = Out.tell(); - llvm::Error err = CallSites->encode(Out); - if (err) - return std::move(err); + Error Err = CallSites->encode(Out); + if (Err) + return std::move(Err); const auto Length = Out.tell() - StartOffset; if (Length > UINT32_MAX) return createStringError(std::errc::invalid_argument, From cc1d3144e48788b7466a4e7a6869bb984c298568 Mon Sep 17 00:00:00 2001 From: Alex B Date: Wed, 13 Nov 2024 12:37:04 -0800 Subject: [PATCH 06/12] Switch to relative return offsets --- .../llvm/DebugInfo/GSYM/CallSiteInfo.h | 11 +++-- llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp | 21 +++++----- llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 4 +- .../macho-gsym-callsite-info-obj.test | 42 +++++++++---------- 4 files changed, 38 insertions(+), 40 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h index 8043779f31724..8a9b3e5553121 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h @@ -40,8 +40,8 @@ struct CallSiteInfo { LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue*/ ExternalCall), }; - /// The return address of the call site. - uint64_t ReturnAddress = 0; + /// The return offset of the call site - relative to the function start. + uint64_t ReturnOffset = 0; /// Offsets into the string table for function names regex patterns. std::vector MatchRegex; @@ -110,15 +110,14 @@ class CallSiteInfoLoader { /// Processes the parsed YAML functions and updates the `FuncMap` accordingly. /// - /// \param functionsYAML A constant reference to an llvm::yaml::FunctionsYAML + /// \param FuncYAMLs A constant reference to an llvm::yaml::FunctionsYAML /// object containing parsed YAML data. /// \param FuncMap A reference to a StringMap mapping function names to /// FunctionInfo pointers. /// \returns An `llvm::Error` indicating success or describing any issues /// encountered during processing. - llvm::Error - processYAMLFunctions(const llvm::yaml::FunctionsYAML &functionsYAML, - StringMap &FuncMap); + llvm::Error processYAMLFunctions(const llvm::yaml::FunctionsYAML &FuncYAMLs, + StringMap &FuncMap); /// Reference to the parent Gsym Creator object. GsymCreator &GCreator; diff --git a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp index 6439aad4814f5..af2a374c908bc 100644 --- a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp @@ -25,7 +25,7 @@ using namespace llvm; using namespace gsym; Error CallSiteInfo::encode(FileWriter &O) const { - O.writeU64(ReturnAddress); + O.writeU64(ReturnOffset); O.writeU8(Flags); O.writeU32(MatchRegex.size()); for (uint32_t Entry : MatchRegex) @@ -37,11 +37,11 @@ Expected CallSiteInfo::decode(DataExtractor &Data, uint64_t &Offset) { CallSiteInfo CSI; - // Read ReturnAddress + // Read ReturnOffset if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint64_t))) return createStringError(std::errc::io_error, - "0x%8.8" PRIx64 ": missing ReturnAddress", Offset); - CSI.ReturnAddress = Data.getU64(&Offset); + "0x%8.8" PRIx64 ": missing ReturnOffset", Offset); + CSI.ReturnOffset = Data.getU64(&Offset); // Read Flags if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint8_t))) @@ -138,8 +138,8 @@ template <> struct MappingTraits { }; template <> struct MappingTraits { - static void mapping(IO &io, FunctionsYAML &functionsYAML) { - io.mapRequired("functions", functionsYAML.functions); + static void mapping(IO &io, FunctionsYAML &FuncYAMLs) { + io.mapRequired("functions", FuncYAMLs.functions); } }; @@ -197,10 +197,9 @@ CallSiteInfoLoader::buildFunctionMap(std::vector &Funcs) { } Error CallSiteInfoLoader::processYAMLFunctions( - const yaml::FunctionsYAML &functionsYAML, - StringMap &FuncMap) { + const yaml::FunctionsYAML &FuncYAMLs, StringMap &FuncMap) { // For each function in the YAML file - for (const auto &FuncYAML : functionsYAML.functions) { + for (const auto &FuncYAML : FuncYAMLs.functions) { auto It = FuncMap.find(FuncYAML.name); if (It == FuncMap.end()) return createStringError( @@ -216,7 +215,7 @@ Error CallSiteInfoLoader::processYAMLFunctions( CallSiteInfo CSI; // Since YAML has specifies relative return offsets, add the function // start address to make the offset absolute. - CSI.ReturnAddress = FuncInfo->Range.start() + CallSiteYAML.return_offset; + CSI.ReturnOffset = CallSiteYAML.return_offset; for (const auto &Regex : CallSiteYAML.match_regex) { uint32_t StrOffset = GCreator.insertString(Regex); CSI.MatchRegex.push_back(StrOffset); @@ -241,7 +240,7 @@ Error CallSiteInfoLoader::processYAMLFunctions( } raw_ostream &gsym::operator<<(raw_ostream &OS, const CallSiteInfo &CSI) { - OS << " Return=" << HEX64(CSI.ReturnAddress); + OS << " Return=" << HEX64(CSI.ReturnOffset); OS << " Flags=" << HEX8(CSI.Flags); OS << " RegEx="; diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp index 786633adc5120..cd92488e8b9cb 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp @@ -424,7 +424,7 @@ void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) { } void GsymReader::dump(raw_ostream &OS, const CallSiteInfo &CSI) { - OS << HEX64(CSI.ReturnAddress); + OS << HEX16(CSI.ReturnOffset); std::string Flags; auto addFlag = [&](const char *Flag) { @@ -456,7 +456,7 @@ void GsymReader::dump(raw_ostream &OS, const CallSiteInfo &CSI) { } void GsymReader::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC) { - OS << "CallSites (by return address):\n"; + OS << "CallSites (by relative return offset):\n"; for (const auto &CS : CSIC.CallSites) { OS.indent(2); dump(OS, CS); diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test index 255b68a343b8d..61304558ba63f 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test @@ -8,39 +8,39 @@ // RUN: llvm-gsymutil %t/call_sites_obj.gsym | FileCheck --check-prefix=CHECK-GSYM %s -// CHECK-GSYM: FunctionInfo @ 0x[[#%x,]]: [0x[[#%x,]] - 0x[[#%x,]]) "func_mainBin_dec_call_everything" +// CHECK-GSYM: FunctionInfo @ 0x[[#%x,FUNC_INFO:]]: [0x[[#%x,FUNC_START:]] - 0x[[#%x,FUNC_END:]]) "func_mainBin_dec_call_everything" // CHECK-GSYM-NEXT: LineTable: // func_mainBin_dec_call_everything() { -// CHECK-GSYM-NEXT: 0x[[#%x,]] {{.*}}/call_sites.cpp:16 +// CHECK-GSYM-NEXT: 0x[[#%x,ENTRY:]] {{.*}}/call_sites.cpp:16 // func_mainBin_dec_01(); -// CHECK-GSYM-NEXT: 0x[[ADDR_dec_01_call:[0-9a-f]+]] {{.*}}/call_sites.cpp:17 +// CHECK-GSYM-NEXT: 0x[[#%x,ADDR_DEC_01_CALL:]] {{.*}}/call_sites.cpp:17 // func_mainBin_dec_02(); -// CHECK-GSYM-NEXT: 0x[[ADDR_dec_02_call:[0-9a-f]+]] {{.*}}/call_sites.cpp:18 +// CHECK-GSYM-NEXT: 0x[[#%x,ADDR_DEC_02_CALL:]] {{.*}}/call_sites.cpp:18 // func_mainBin_dec_03(); -// CHECK-GSYM-NEXT: [[ADDR_dec_03_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:19 +// CHECK-GSYM-NEXT: 0x[[#%x,ADDR_DEC_03_CALL:]] {{.*}}/call_sites.cpp:19 // func_mainBin_inc_01(); -// CHECK-GSYM-NEXT: [[ADDR_inc_01_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:21 +// CHECK-GSYM-NEXT: 0x[[#%x,ADDR_INC_01_CALL:]] {{.*}}/call_sites.cpp:21 // func_mainBin_inc_02(); -// CHECK-GSYM-NEXT: [[ADDR_inc_02_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:22 +// CHECK-GSYM-NEXT: 0x[[#%x,ADDR_INC_02_CALL:]] {{.*}}/call_sites.cpp:22 // func_mainBin_inc_03(); -// CHECK-GSYM-NEXT: [[ADDR_inc_03_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:23 +// CHECK-GSYM-NEXT: 0x[[#%x,ADDR_INC_03_CALL:]] {{.*}}/call_sites.cpp:23 // g_func_ptr(); -// CHECK-GSYM-NEXT: [[ADDR_func_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:25 +// CHECK-GSYM-NEXT: 0x[[#%x,ADDR_FUNC_CALL:]] {{.*}}/call_sites.cpp:25 // g_extern_func_ptr(); -// CHECK-GSYM-NEXT: [[ADDR_extern_func_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:26 +// CHECK-GSYM-NEXT: 0x[[#%x,ADDR_EXTERN_FUNC_CALL:]] {{.*}}/call_sites.cpp:26 // g_volatile_var = 0; -// CHECK-GSYM-NEXT: [[ADDR_var_assign:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:28 +// CHECK-GSYM-NEXT: 0x[[#%x,ADDR_VAR_ASSIGN:]] {{.*}}/call_sites.cpp:28 // } -// CHECK-GSYM-NEXT: [[#%x,]] {{.*}}/call_sites.cpp:29 -// CHECK-GSYM-NEXT: CallSites (by return address): -// CHECK-GSYM-NEXT: [[ADDR_dec_02_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_01] -// CHECK-GSYM-NEXT: [[ADDR_dec_03_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_02] -// CHECK-GSYM-NEXT: [[ADDR_inc_01_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_03] -// CHECK-GSYM-NEXT: [[ADDR_inc_02_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_01] -// CHECK-GSYM-NEXT: [[ADDR_inc_03_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_02] -// CHECK-GSYM-NEXT: [[ADDR_func_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_03] -// CHECK-GSYM-NEXT: [[ADDR_extern_func_call]] Flags[None] MatchRegex[.*func.*] -// CHECK-GSYM-NEXT: [[ADDR_var_assign]] Flags[ExternalCall] MatchRegex[.*extern_func.*] +// CHECK-GSYM-NEXT: 0x[[#%x,]] {{.*}}/call_sites.cpp:29 +// CHECK-GSYM-NEXT: CallSites (by relative return offset): +// CHECK-GSYM-NEXT: 0x[[#%.4x,sub(ADDR_DEC_02_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_dec_01] +// CHECK-GSYM-NEXT: 0x[[#%.4x,sub(ADDR_DEC_03_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_dec_02] +// CHECK-GSYM-NEXT: 0x[[#%.4x,sub(ADDR_INC_01_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_dec_03] +// CHECK-GSYM-NEXT: 0x[[#%.4x,sub(ADDR_INC_02_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_inc_01] +// CHECK-GSYM-NEXT: 0x[[#%.4x,sub(ADDR_INC_03_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_inc_02] +// CHECK-GSYM-NEXT: 0x[[#%.4x,sub(ADDR_FUNC_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_inc_03] +// CHECK-GSYM-NEXT: 0x[[#%.4x,sub(ADDR_EXTERN_FUNC_CALL,FUNC_START)]] Flags[None] MatchRegex[.*func.*] +// CHECK-GSYM-NEXT: 0x[[#%.4x,sub(ADDR_VAR_ASSIGN,FUNC_START)]] Flags[ExternalCall] MatchRegex[.*extern_func.*] //--- callsites.yaml From 32f798791e36d7c6f70fa91e8ed5998856607478 Mon Sep 17 00:00:00 2001 From: Alex B Date: Thu, 14 Nov 2024 10:36:07 -0800 Subject: [PATCH 07/12] Fix lit test --- .../macho-gsym-callsite-info-dsym.yaml | 42 +++++++++---------- .../macho-gsym-callsite-info-exe.yaml | 2 +- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml index 5c31d60962669..2ea53f5e1ce0b 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml @@ -10,39 +10,39 @@ # RUN: llvm-gsymutil %t/call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-GSYM %s -# CHECK-GSYM: FunctionInfo @ 0x[[#%x,]]: [0x[[#%x,]] - 0x[[#%x,]]) "func_mainBin_dec_call_everything" +# CHECK-GSYM: FunctionInfo @ 0x[[#%x,FUNC_INFO:]]: [0x[[#%x,FUNC_START:]] - 0x[[#%x,FUNC_END:]]) "func_mainBin_dec_call_everything" # CHECK-GSYM-NEXT: LineTable: # // func_mainBin_dec_call_everything() { -# CHECK-GSYM-NEXT: 0x[[#%x,]] {{.*}}/call_sites.cpp:16 +# CHECK-GSYM-NEXT: 0x[[#%x,ENTRY:]] {{.*}}/call_sites.cpp:16 # // func_mainBin_dec_01(); -# CHECK-GSYM-NEXT: 0x[[ADDR_dec_01_call:[0-9a-f]+]] {{.*}}/call_sites.cpp:17 +# CHECK-GSYM-NEXT: 0x[[#%x,ADDR_DEC_01_CALL:]] {{.*}}/call_sites.cpp:17 # // func_mainBin_dec_02(); -# CHECK-GSYM-NEXT: 0x[[ADDR_dec_02_call:[0-9a-f]+]] {{.*}}/call_sites.cpp:18 +# CHECK-GSYM-NEXT: 0x[[#%x,ADDR_DEC_02_CALL:]] {{.*}}/call_sites.cpp:18 # // func_mainBin_dec_03(); -# CHECK-GSYM-NEXT: [[ADDR_dec_03_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:19 +# CHECK-GSYM-NEXT: 0x[[#%x,ADDR_DEC_03_CALL:]] {{.*}}/call_sites.cpp:19 # // func_mainBin_inc_01(); -# CHECK-GSYM-NEXT: [[ADDR_inc_01_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:21 +# CHECK-GSYM-NEXT: 0x[[#%x,ADDR_INC_01_CALL:]] {{.*}}/call_sites.cpp:21 # // func_mainBin_inc_02(); -# CHECK-GSYM-NEXT: [[ADDR_inc_02_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:22 +# CHECK-GSYM-NEXT: 0x[[#%x,ADDR_INC_02_CALL:]] {{.*}}/call_sites.cpp:22 # // func_mainBin_inc_03(); -# CHECK-GSYM-NEXT: [[ADDR_inc_03_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:23 +# CHECK-GSYM-NEXT: 0x[[#%x,ADDR_INC_03_CALL:]] {{.*}}/call_sites.cpp:23 # // g_func_ptr(); -# CHECK-GSYM-NEXT: [[ADDR_func_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:25 +# CHECK-GSYM-NEXT: 0x[[#%x,ADDR_FUNC_CALL:]] {{.*}}/call_sites.cpp:25 # // g_extern_func_ptr(); -# CHECK-GSYM-NEXT: [[ADDR_extern_func_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:26 +# CHECK-GSYM-NEXT: 0x[[#%x,ADDR_EXTERN_FUNC_CALL:]] {{.*}}/call_sites.cpp:26 # // g_volatile_var = 0; -# CHECK-GSYM-NEXT: [[ADDR_var_assign:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:28 +# CHECK-GSYM-NEXT: 0x[[#%x,ADDR_VAR_ASSIGN:]] {{.*}}/call_sites.cpp:28 # // } -# CHECK-GSYM-NEXT: [[#%x,]] {{.*}}/call_sites.cpp:29 -# CHECK-GSYM-NEXT: CallSites (by return address): -# CHECK-GSYM-NEXT: [[ADDR_dec_02_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_01] -# CHECK-GSYM-NEXT: [[ADDR_dec_03_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_02] -# CHECK-GSYM-NEXT: [[ADDR_inc_01_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_03] -# CHECK-GSYM-NEXT: [[ADDR_inc_02_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_01] -# CHECK-GSYM-NEXT: [[ADDR_inc_03_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_02] -# CHECK-GSYM-NEXT: [[ADDR_func_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_03] -# CHECK-GSYM-NEXT: [[ADDR_extern_func_call]] Flags[None] MatchRegex[.*func.*] -# CHECK-GSYM-NEXT: [[ADDR_var_assign]] Flags[ExternalCall] MatchRegex[.*extern_func.*] +# CHECK-GSYM-NEXT: 0x[[#%x,]] {{.*}}/call_sites.cpp:29 +# CHECK-GSYM-NEXT: CallSites (by relative return offset): +# CHECK-GSYM-NEXT: 0x[[#%.4x,sub(ADDR_DEC_02_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_dec_01] +# CHECK-GSYM-NEXT: 0x[[#%.4x,sub(ADDR_DEC_03_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_dec_02] +# CHECK-GSYM-NEXT: 0x[[#%.4x,sub(ADDR_INC_01_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_dec_03] +# CHECK-GSYM-NEXT: 0x[[#%.4x,sub(ADDR_INC_02_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_inc_01] +# CHECK-GSYM-NEXT: 0x[[#%.4x,sub(ADDR_INC_03_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_inc_02] +# CHECK-GSYM-NEXT: 0x[[#%.4x,sub(ADDR_FUNC_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_inc_03] +# CHECK-GSYM-NEXT: 0x[[#%.4x,sub(ADDR_EXTERN_FUNC_CALL,FUNC_START)]] Flags[None] MatchRegex[.*func.*] +# CHECK-GSYM-NEXT: 0x[[#%.4x,sub(ADDR_VAR_ASSIGN,FUNC_START)]] Flags[ExternalCall] MatchRegex[.*extern_func.*] #--- callsites.yaml diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml index b454a9e14699a..4a5324f68d2c6 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml @@ -11,7 +11,7 @@ # CHECK-GSYM: FunctionInfo @ 0x[[#%x,]]: [0x[[#%x,]] - 0x[[#%x,]]) "func_mainBin_dec_call_everything" -# CHECK-GSYM-NEXT: CallSites (by return address): +# CHECK-GSYM-NEXT: CallSites (by relative return offset): # CHECK-GSYM-NEXT: 0x[[#%x,]] Flags[InternalCall] MatchRegex[func_mainBin_dec_01] # CHECK-GSYM-NEXT: 0x[[#%x,]] Flags[InternalCall] MatchRegex[func_mainBin_dec_02] # CHECK-GSYM-NEXT: 0x[[#%x,]] Flags[InternalCall] MatchRegex[func_mainBin_dec_03] From 584fdf9d41b0edab5ae20be33db00a98f8569e8a Mon Sep 17 00:00:00 2001 From: Alex B Date: Thu, 14 Nov 2024 11:03:44 -0800 Subject: [PATCH 08/12] Address Feedback Nr 4 --- llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp | 10 +++++----- llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 5 +++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp index af2a374c908bc..bd7eea59169f7 100644 --- a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp @@ -1,4 +1,4 @@ -//===- CallSiteInfo.cpp ----------------------------------*- C++ -*-===// +//===- CallSiteInfo.cpp -----------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -160,10 +160,10 @@ Error CallSiteInfoLoader::loadYAML(std::vector &Funcs, // Step 2: Parse YAML content yaml::FunctionsYAML FuncsYAML; - yaml::Input yin(Buffer->getMemBufferRef()); - yin >> FuncsYAML; - if (yin.error()) - return createStringError(yin.error(), "Error parsing YAML file: %s\n", + yaml::Input Yin(Buffer->getMemBufferRef()); + Yin >> FuncsYAML; + if (Yin.error()) + return createStringError(Yin.error(), "Error parsing YAML file: %s\n", Buffer->getBufferIdentifier().str().c_str()); // Step 3: Build function map from Funcs diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp index 3cabb27053a5c..51ec9ea553396 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -386,9 +386,10 @@ uint32_t GsymCreator::insertString(StringRef S, bool Copy) { } StringRef GsymCreator::getString(uint32_t offset) { - assert(StringOffsetMap.count(offset) && + auto I = StringOffsetMap.find(offset); + assert(I != StringOffsetMap.end() && "GsymCreator::getString expects a valid offset as parameter."); - return StringOffsetMap.find(offset)->second.val(); + return I->second.val(); } void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { From d5bd6d57ed7b20f5ec53c9c8df7d2d15df165bdd Mon Sep 17 00:00:00 2001 From: Alex B Date: Thu, 14 Nov 2024 11:12:11 -0800 Subject: [PATCH 09/12] Fix Formatting --- llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp index 51ec9ea553396..1f48b4cecc285 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -387,7 +387,7 @@ uint32_t GsymCreator::insertString(StringRef S, bool Copy) { StringRef GsymCreator::getString(uint32_t offset) { auto I = StringOffsetMap.find(offset); - assert(I != StringOffsetMap.end() && + assert(I != StringOffsetMap.end() && "GsymCreator::getString expects a valid offset as parameter."); return I->second.val(); } From fda812cb244c897c397dc287c732cd3a9d003b8a Mon Sep 17 00:00:00 2001 From: Alex B Date: Thu, 14 Nov 2024 12:13:52 -0800 Subject: [PATCH 10/12] Address Feedback Nr 5 --- llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp | 28 ++++++++++-------------- llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 4 ++-- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp index bd7eea59169f7..0411002fd374f 100644 --- a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp @@ -71,10 +71,10 @@ Expected CallSiteInfo::decode(DataExtractor &Data, Error CallSiteInfoCollection::encode(FileWriter &O) const { O.writeU32(CallSites.size()); - for (const CallSiteInfo &CSI : CallSites) { + for (const CallSiteInfo &CSI : CallSites) if (Error Err = CSI.encode(O)) return Err; - } + return Error::success(); } @@ -175,23 +175,17 @@ Error CallSiteInfoLoader::loadYAML(std::vector &Funcs, StringMap CallSiteInfoLoader::buildFunctionMap(std::vector &Funcs) { + // If the function name is already in the map, don't update it. This way we + // preferentially use the first encountered function. Since symbols are + // loaded from dSYM first, we end up preferring keeping track of symbols + // from dSYM rather than from the symbol table - which is what we want to + // do. StringMap FuncMap; - auto insertFunc = [&](auto &Function) { - StringRef FuncName = GCreator.getString(Function.Name); - // If the function name is already in the map, don't update it. This way we - // preferentially use the first encountered function. Since symbols are - // loaded from dSYM first, we end up preferring keeping track of symbols - // from dSYM rather than from the symbol table - which is what we want to - // do. - if (FuncMap.count(FuncName)) - return; - FuncMap[FuncName] = &Function; - }; for (auto &Func : Funcs) { - insertFunc(Func); - if (Func.MergedFunctions.has_value()) - for (auto &MFunc : Func.MergedFunctions->MergedFunctions) - insertFunc(MFunc); + FuncMap.try_emplace(GCreator.getString(Func.Name), &Func); + if (auto MFuncs = Func.MergedFunctions) + for (auto &MFunc : MFuncs->MergedFunctions) + FuncMap.try_emplace(GCreator.getString(MFunc.Name), &MFunc); } return FuncMap; } diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp index 1f48b4cecc285..b354593f6dbf8 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -385,8 +385,8 @@ uint32_t GsymCreator::insertString(StringRef S, bool Copy) { return StrOff; } -StringRef GsymCreator::getString(uint32_t offset) { - auto I = StringOffsetMap.find(offset); +StringRef GsymCreator::getString(uint32_t Offset) { + auto I = StringOffsetMap.find(Offset); assert(I != StringOffsetMap.end() && "GsymCreator::getString expects a valid offset as parameter."); return I->second.val(); From ae391a2ea66a81417ef3adf2e60233daac29bce4 Mon Sep 17 00:00:00 2001 From: Alex B Date: Mon, 25 Nov 2024 18:30:11 -0800 Subject: [PATCH 11/12] Address Feedback Nr.6 --- llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h | 10 +++++++--- llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h | 9 +++++---- llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp | 8 +++----- llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 4 ++-- .../ARM_AArch64/macho-gsym-callsite-info-dsym.yaml | 2 +- .../ARM_AArch64/macho-gsym-callsite-info-exe.yaml | 2 +- .../ARM_AArch64/macho-gsym-callsite-info-obj.test | 2 +- llvm/tools/llvm-gsymutil/Opts.td | 4 ++-- llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 6 +++--- 9 files changed, 25 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h index 8a9b3e5553121..55f7322029d0f 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h @@ -86,7 +86,8 @@ class CallSiteInfoLoader { /// structures. /// /// \param GCreator A reference to the GsymCreator. - CallSiteInfoLoader(GsymCreator &GCreator) : GCreator(GCreator) {} + CallSiteInfoLoader(GsymCreator &GCreator, std::vector &Funcs) + : GCreator(GCreator), Funcs(Funcs) {} /// This method reads the specified YAML file, parses its content, and updates /// the `Funcs` vector with call site information based on the YAML data. @@ -97,7 +98,7 @@ class CallSiteInfoLoader { /// file to be loaded. /// \returns An `llvm::Error` indicating success or describing any issues /// encountered during the loading process. - llvm::Error loadYAML(std::vector &Funcs, StringRef YAMLFile); + llvm::Error loadYAML(StringRef YAMLFile); private: /// Builds a map from function names to FunctionInfo pointers based on the @@ -106,7 +107,7 @@ class CallSiteInfoLoader { /// \param Funcs A reference to a vector of FunctionInfo objects. /// \returns A StringMap mapping function names (StringRef) to their /// corresponding FunctionInfo pointers. - StringMap buildFunctionMap(std::vector &Funcs); + StringMap buildFunctionMap(); /// Processes the parsed YAML functions and updates the `FuncMap` accordingly. /// @@ -121,6 +122,9 @@ class CallSiteInfoLoader { /// Reference to the parent Gsym Creator object. GsymCreator &GCreator; + + /// Reference to the vector of FunctionInfo objects to be populated. + std::vector &Funcs; }; raw_ostream &operator<<(raw_ostream &OS, const CallSiteInfo &CSI); diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h index 8e9e500f267c1..0d098da96dd27 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h @@ -329,14 +329,15 @@ class GsymCreator { /// \returns The unique 32 bit offset into the string table. uint32_t insertString(StringRef S, bool Copy = true); - /// Retrieve a string fromt he GSYM string table given its offset. + /// Retrieve a string from the GSYM string table given its offset. /// /// The offset is assumed to be a valid offset into the string table. /// otherwise an assert will be triggered. /// - /// \param offset The offset of the string to retrieve, previously returned by - /// insertString. \returns The string at the given offset in the string table. - StringRef getString(uint32_t offset); + /// \param Offset The offset of the string to retrieve, previously returned by + /// insertString. + /// \returns The string at the given offset in the string table. + StringRef getString(uint32_t Offset); /// Insert a file into this GSYM creator. /// diff --git a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp index 0411002fd374f..cf4c64e5e85ca 100644 --- a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp @@ -149,8 +149,7 @@ template <> struct MappingTraits { LLVM_YAML_IS_SEQUENCE_VECTOR(CallSiteYAML) LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionYAML) -Error CallSiteInfoLoader::loadYAML(std::vector &Funcs, - StringRef YAMLFile) { +Error CallSiteInfoLoader::loadYAML(StringRef YAMLFile) { // Step 1: Read YAML file auto BufferOrError = MemoryBuffer::getFile(YAMLFile); if (!BufferOrError) @@ -167,14 +166,13 @@ Error CallSiteInfoLoader::loadYAML(std::vector &Funcs, Buffer->getBufferIdentifier().str().c_str()); // Step 3: Build function map from Funcs - auto FuncMap = buildFunctionMap(Funcs); + auto FuncMap = buildFunctionMap(); // Step 4: Process parsed YAML functions and update FuncMap return processYAMLFunctions(FuncsYAML, FuncMap); } -StringMap -CallSiteInfoLoader::buildFunctionMap(std::vector &Funcs) { +StringMap CallSiteInfoLoader::buildFunctionMap() { // If the function name is already in the map, don't update it. This way we // preferentially use the first encountered function. Since symbols are // loaded from dSYM first, we end up preferring keeping track of symbols diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp index b354593f6dbf8..eb26c637a2ca3 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -191,8 +191,8 @@ llvm::Error GsymCreator::encode(FileWriter &O) const { llvm::Error GsymCreator::loadCallSitesFromYAML(StringRef YAMLFile) { // Use the loader to load call site information from the YAML file. - CallSiteInfoLoader Loader(*this); - return Loader.loadYAML(Funcs, YAMLFile); + CallSiteInfoLoader Loader(*this, Funcs); + return Loader.loadYAML(YAMLFile); } void GsymCreator::prepareMergedFunctions(OutputAggregator &Out) { diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml index 2ea53f5e1ce0b..c636afe5cb850 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml @@ -4,7 +4,7 @@ # RUN: split-file %s %t # RUN: yaml2obj %t/call_sites.dSYM.yaml -o %t/call_sites.dSYM -# RUN: llvm-gsymutil --convert=%t/call_sites.dSYM --callsites-from-yaml=%t/callsites.yaml -o %t/call_sites_dSYM.gsym +# RUN: llvm-gsymutil --convert=%t/call_sites.dSYM --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym # Dump the GSYM file and check the output for callsite information # RUN: llvm-gsymutil %t/call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-GSYM %s diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml index 4a5324f68d2c6..557f43d778e58 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml @@ -4,7 +4,7 @@ # RUN: split-file %s %t # RUN: yaml2obj %t/call_sites.exe.yaml -o %t/call_sites.exe -# RUN: llvm-gsymutil --convert=%t/call_sites.exe --callsites-from-yaml=%t/callsites.yaml -o %t/call_sites_exe.gsym +# RUN: llvm-gsymutil --convert=%t/call_sites.exe --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_exe.gsym # Dump the GSYM file and check the output for callsite information # RUN: llvm-gsymutil %t/call_sites_exe.gsym | FileCheck --check-prefix=CHECK-GSYM %s diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test index 61304558ba63f..2d082ed6477f1 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test @@ -2,7 +2,7 @@ // Assemble the input assembly code into an object file // RUN: llc -enable-machine-outliner=never -mtriple arm64-apple-darwin -filetype=obj %t/call_sites.ll -o %t/call_sites.o -// RUN: llvm-gsymutil --convert=%t/call_sites.o --callsites-from-yaml=%t/callsites.yaml -o %t/call_sites_obj.gsym +// RUN: llvm-gsymutil --convert=%t/call_sites.o --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_obj.gsym // Dump the GSYM file and check the output for callsite information // RUN: llvm-gsymutil %t/call_sites_obj.gsym | FileCheck --check-prefix=CHECK-GSYM %s diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td index 8dcce1f738875..b1485d14290aa 100644 --- a/llvm/tools/llvm-gsymutil/Opts.td +++ b/llvm/tools/llvm-gsymutil/Opts.td @@ -18,8 +18,8 @@ defm convert : "Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">; def merged_functions : FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">; -defm callsites_from_yaml : - Eq<"callsites-from-yaml", "Load call site info from YAML file.">; +defm callsites_yaml_file : + Eq<"callsites-yaml-file", "Load call site info from YAML file.">; defm arch : Eq<"arch", "Process debug information for the specified CPU architecture only.\nArchitectures may be specified by name or by number.\nThis option can be specified multiple times, once for each desired architecture">; diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp index 6c7506e51e649..4d441465c47fb 100644 --- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp +++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp @@ -180,12 +180,12 @@ static void parseArgs(int argc, char **argv) { LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin); StoreMergedFunctionInfo = Args.hasArg(OPT_merged_functions); - if (Args.hasArg(OPT_callsites_from_yaml_EQ)) { - CallSiteYamlPath = Args.getLastArgValue(OPT_callsites_from_yaml_EQ); + if (Args.hasArg(OPT_callsites_yaml_file_EQ)) { + CallSiteYamlPath = Args.getLastArgValue(OPT_callsites_yaml_file_EQ); if (CallSiteYamlPath.empty()) { llvm::errs() << ToolName - << ": --callsites-from-yaml option requires a non-empty argument.\n"; + << ": --callsites-yaml-file option requires a non-empty argument.\n"; std::exit(1); } } From 4e7d719f1e34791d34b472f44b7ecdf3fc231fea Mon Sep 17 00:00:00 2001 From: Alex B Date: Tue, 26 Nov 2024 14:22:25 -0800 Subject: [PATCH 12/12] Address Feedback Nr.7 --- llvm/tools/llvm-gsymutil/Opts.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td index b1485d14290aa..00f903c5211f3 100644 --- a/llvm/tools/llvm-gsymutil/Opts.td +++ b/llvm/tools/llvm-gsymutil/Opts.td @@ -19,7 +19,7 @@ defm convert : def merged_functions : FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">; defm callsites_yaml_file : - Eq<"callsites-yaml-file", "Load call site info from YAML file.">; + Eq<"callsites-yaml-file", "Load call site info from YAML file. Useful for testing.">, Flags<[HelpHidden]>; defm arch : Eq<"arch", "Process debug information for the specified CPU architecture only.\nArchitectures may be specified by name or by number.\nThis option can be specified multiple times, once for each desired architecture">;