From 67ece329fa68e93b2b47dcc7f3c0d2ea88deef5c Mon Sep 17 00:00:00 2001 From: Alex B Date: Fri, 13 Dec 2024 10:30:25 -0800 Subject: [PATCH 1/4] [llvm-gsymutil] Add option to load callsites from DWARF --- .../llvm/DebugInfo/GSYM/DwarfTransformer.h | 15 ++++- llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 56 +++++++++++++++++++ .../macho-gsym-merged-callsites-dsym.yaml | 2 + llvm/tools/llvm-gsymutil/Opts.td | 1 + llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 5 +- 5 files changed, 76 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h index 198c0ddc26582..a6db47a6b397e 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h +++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h @@ -33,14 +33,17 @@ class OutputAggregator; /// allows this class to be unit tested. class DwarfTransformer { public: - /// Create a DWARF transformer. /// /// \param D The DWARF to use when converting to GSYM. /// /// \param G The GSYM creator to populate with the function information /// from the debug info. - DwarfTransformer(DWARFContext &D, GsymCreator &G) : DICtx(D), Gsym(G) {} + /// + /// \param LDCS Flag to indicate weather we should load the call site + /// information from DWARF `DW_TAG_call_site` entries + DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false) + : DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS) {} /// Extract the DWARF from the supplied object file and convert it into the /// Gsym format in the GsymCreator object that is passed in. Returns an @@ -83,8 +86,16 @@ class DwarfTransformer { /// \param Die The DWARF debug info entry to parse. void handleDie(OutputAggregator &Strm, CUInfo &CUI, DWARFDie Die); + /// Parse call site information from DWARF + /// + /// \param CUI The compile unit info for the current CU. + /// \param Die The DWARFDie for the function. + /// \param FI The FunctionInfo for the function being populated. + void parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die, FunctionInfo &FI); + DWARFContext &DICtx; GsymCreator &Gsym; + bool LoadDwarfCallSites; friend class DwarfTransformerTest; }; diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp index 520baa5ac720e..9b024d50f2a10 100644 --- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -543,6 +543,11 @@ void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI, FI.Inline = std::nullopt; } } + + // If dwarf-callsites flag is set, parse DW_TAG_call_site DIEs. + if (LoadDwarfCallSites) + parseCallSiteInfoFromDwarf(CUI, Die, FI); + Gsym.addFunctionInfo(std::move(FI)); } } break; @@ -553,6 +558,57 @@ void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI, handleDie(Out, CUI, ChildDie); } +void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die, + FunctionInfo &FI) { + // Parse all DW_TAG_call_site DIEs that are children of this subprogram DIE. + // DWARF specification: + // - DW_TAG_call_site can have DW_AT_call_return_pc for return address offset. + // - DW_AT_call_origin might point to a DIE of the function being called. + // For simplicity, we will just extract return_offset and possibly target name + // if available. + + CallSiteInfoCollection CSIC; + + for (DWARFDie Child : Die.children()) { + if (Child.getTag() == dwarf::DW_TAG_call_site) { + CallSiteInfo CSI; + // DW_AT_call_return_pc: the return PC (address). We'll convert it to + // offset relative to FI's start. + uint64_t ReturnPC = + dwarf::toAddress(Child.find(dwarf::DW_AT_call_return_pc), 0); + if (ReturnPC < FI.startAddress() || ReturnPC >= FI.endAddress()) + continue; + + CSI.ReturnOffset = ReturnPC - FI.startAddress(); + + // Attempt to get function name from DW_AT_call_origin. If present, we can + // insert it as a match regex. + if (DWARFDie OriginDie = Child.getAttributeValueAsReferencedDie( + dwarf::DW_AT_call_origin)) { + if (auto Name = OriginDie.getName(DINameKind::ShortName)) { + uint32_t NameOff = Gsym.insertString(Name, /*Copy=*/false); + CSI.MatchRegex.push_back(NameOff); + } + } + + // For now, we won't attempt to deduce InternalCall/ExternalCall flags + // from DWARF. + CSI.Flags = CallSiteInfo::Flags::None; + + CSIC.CallSites.push_back(CSI); + } + } + + if (!CSIC.CallSites.empty()) { + if (!FI.CallSites) + FI.CallSites = CallSiteInfoCollection(); + // Append parsed DWARF callsites: + FI.CallSites->CallSites.insert(FI.CallSites->CallSites.end(), + CSIC.CallSites.begin(), + CSIC.CallSites.end()); + } +} + Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) { size_t NumBefore = Gsym.getNumFunctionInfos(); auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie { diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml index a98d4db0a791b..4cecc79c72b4b 100644 --- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml +++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml @@ -4,9 +4,11 @@ # RUN: yaml2obj %t/merged_callsites.dSYM.yaml -o %t/merged_callsites.dSYM # RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym +# RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --dwarf-callsites -o %t/dwarf_call_sites_dSYM.gsym # Dump the GSYM file and check the output for callsite information # RUN: llvm-gsymutil %t/call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-MERGED-CALLSITES %s +# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-MERGED-CALLSITES %s # CHECK-MERGED-CALLSITES: FunctionInfo @ 0x[[#%x,FUNC4_1:]]: [0x[[#%x,FUNC4_1_START:]] - 0x[[#%x,FUNC4_1_END:]]) "function4_copy1" # CHECK-MERGED-CALLSITES: ++ Merged FunctionInfos[0]: diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td index 00f903c5211f3..d61b418d2d843 100644 --- a/llvm/tools/llvm-gsymutil/Opts.td +++ b/llvm/tools/llvm-gsymutil/Opts.td @@ -18,6 +18,7 @@ defm convert : "Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">; def merged_functions : FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">; +def dwarf_callsites : FF<"dwarf-callsites", "Load call site info from DWARF, if available">; defm callsites_yaml_file : Eq<"callsites-yaml-file", "Load call site info from YAML file. Useful for testing.">, Flags<[HelpHidden]>; defm arch : diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp index 42900159ce966..aed4ae7c615fd 100644 --- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp +++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp @@ -99,6 +99,7 @@ static bool Quiet; static std::vector LookupAddresses; static bool LookupAddressesFromStdin; static bool StoreMergedFunctionInfo = false; +static bool LoadDwarfCallSites = false; static std::string CallSiteYamlPath; static void parseArgs(int argc, char **argv) { @@ -191,6 +192,8 @@ static void parseArgs(int argc, char **argv) { std::exit(1); } } + + LoadDwarfCallSites = Args.hasArg(OPT_dwarf_callsites); } /// @} @@ -365,7 +368,7 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile, // Make a DWARF transformer object and populate the ranges of the code // so we don't end up adding invalid functions to GSYM data. - DwarfTransformer DT(*DICtx, Gsym); + DwarfTransformer DT(*DICtx, Gsym, LoadDwarfCallSites); if (!TextRanges.empty()) Gsym.SetValidTextRanges(TextRanges); From d7ab22a2485609c8d2f12d9af075ca1809521dbb Mon Sep 17 00:00:00 2001 From: Alex B Date: Sun, 15 Dec 2024 21:07:21 -0800 Subject: [PATCH 2/4] Address Feedback Nr.1 --- .../llvm/DebugInfo/GSYM/DwarfTransformer.h | 2 +- llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 45 ++++++++++--------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h index a6db47a6b397e..d4f39ec0dc287 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h +++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h @@ -40,7 +40,7 @@ class DwarfTransformer { /// \param G The GSYM creator to populate with the function information /// from the debug info. /// - /// \param LDCS Flag to indicate weather we should load the call site + /// \param LDCS Flag to indicate whether we should load the call site /// information from DWARF `DW_TAG_call_site` entries DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false) : DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS) {} diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp index 9b024d50f2a10..1ad82f00da94c 100644 --- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -570,33 +570,34 @@ void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die, CallSiteInfoCollection CSIC; for (DWARFDie Child : Die.children()) { - if (Child.getTag() == dwarf::DW_TAG_call_site) { - CallSiteInfo CSI; - // DW_AT_call_return_pc: the return PC (address). We'll convert it to - // offset relative to FI's start. - uint64_t ReturnPC = - dwarf::toAddress(Child.find(dwarf::DW_AT_call_return_pc), 0); - if (ReturnPC < FI.startAddress() || ReturnPC >= FI.endAddress()) - continue; + if (Child.getTag() != dwarf::DW_TAG_call_site) + continue; - CSI.ReturnOffset = ReturnPC - FI.startAddress(); + CallSiteInfo CSI; + // DW_AT_call_return_pc: the return PC (address). We'll convert it to + // offset relative to FI's start. + auto ReturnPC = dwarf::toAddress(Child.find(dwarf::DW_AT_call_return_pc)); + if (!ReturnPC || *ReturnPC < FI.startAddress() || + *ReturnPC >= FI.endAddress()) + continue; - // Attempt to get function name from DW_AT_call_origin. If present, we can - // insert it as a match regex. - if (DWARFDie OriginDie = Child.getAttributeValueAsReferencedDie( - dwarf::DW_AT_call_origin)) { - if (auto Name = OriginDie.getName(DINameKind::ShortName)) { - uint32_t NameOff = Gsym.insertString(Name, /*Copy=*/false); - CSI.MatchRegex.push_back(NameOff); - } + CSI.ReturnOffset = *ReturnPC - FI.startAddress(); + + // Attempt to get function name from DW_AT_call_origin. If present, we can + // insert it as a match regex. + if (DWARFDie OriginDie = + Child.getAttributeValueAsReferencedDie(dwarf::DW_AT_call_origin)) { + if (auto Name = OriginDie.getName(DINameKind::ShortName)) { + uint32_t NameOff = Gsym.insertString(Name, /*Copy=*/false); + CSI.MatchRegex.push_back(NameOff); } + } - // For now, we won't attempt to deduce InternalCall/ExternalCall flags - // from DWARF. - CSI.Flags = CallSiteInfo::Flags::None; + // For now, we won't attempt to deduce InternalCall/ExternalCall flags + // from DWARF. + CSI.Flags = CallSiteInfo::Flags::None; - CSIC.CallSites.push_back(CSI); - } + CSIC.CallSites.push_back(CSI); } if (!CSIC.CallSites.empty()) { From 751a615d82cbe62e97ae10a06c2c1599dc8bf41c Mon Sep 17 00:00:00 2001 From: Alex B Date: Mon, 16 Dec 2024 14:05:12 -0800 Subject: [PATCH 3/4] Address Feedback Nr.2 --- llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp index 1ad82f00da94c..b9d9373439a31 100644 --- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -576,7 +576,8 @@ void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die, CallSiteInfo CSI; // DW_AT_call_return_pc: the return PC (address). We'll convert it to // offset relative to FI's start. - auto ReturnPC = dwarf::toAddress(Child.find(dwarf::DW_AT_call_return_pc)); + auto ReturnPC = + dwarf::toAddress(Child.findRecursively(dwarf::DW_AT_call_return_pc)); if (!ReturnPC || *ReturnPC < FI.startAddress() || *ReturnPC >= FI.endAddress()) continue; @@ -587,9 +588,14 @@ void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die, // insert it as a match regex. if (DWARFDie OriginDie = Child.getAttributeValueAsReferencedDie(dwarf::DW_AT_call_origin)) { - if (auto Name = OriginDie.getName(DINameKind::ShortName)) { - uint32_t NameOff = Gsym.insertString(Name, /*Copy=*/false); - CSI.MatchRegex.push_back(NameOff); + + // Include the full unmangled name if available, otherwise the short name. + if (const char *LinkName = OriginDie.getLinkageName()) { + uint32_t LinkNameOff = Gsym.insertString(LinkName, /*Copy=*/false); + CSI.MatchRegex.push_back(LinkNameOff); + } else if (const char *ShortName = OriginDie.getShortName()) { + uint32_t ShortNameOff = Gsym.insertString(ShortName, /*Copy=*/false); + CSI.MatchRegex.push_back(ShortNameOff); } } From ee508a136a6e8a7ddbe8b5253305a934d230b100 Mon Sep 17 00:00:00 2001 From: Alex B Date: Mon, 16 Dec 2024 15:22:27 -0800 Subject: [PATCH 4/4] Address Feedback Nr.2.1 --- llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp index b9d9373439a31..568af5ee8e3ae 100644 --- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -578,8 +578,7 @@ void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die, // offset relative to FI's start. auto ReturnPC = dwarf::toAddress(Child.findRecursively(dwarf::DW_AT_call_return_pc)); - if (!ReturnPC || *ReturnPC < FI.startAddress() || - *ReturnPC >= FI.endAddress()) + if (!ReturnPC || !FI.Range.contains(*ReturnPC)) continue; CSI.ReturnOffset = *ReturnPC - FI.startAddress();