Skip to content

Conversation

@alx32
Copy link
Contributor

@alx32 alx32 commented Dec 13, 2024

This change adds support for loading gSYM callsite information from DWARF. Previously the only support was for loading callsites info from YAML.

For testing, we add a pass where macho-gsym-merged-callsites-dsym loads callsite info from DWARF rather than YAML.

@alx32 alx32 force-pushed the 03_gsymutil_dwarf_callsites branch from 1c4ec48 to 0849770 Compare December 14, 2024 00:14
@alx32 alx32 marked this pull request as ready for review December 14, 2024 00:15
@llvmbot
Copy link
Member

llvmbot commented Dec 14, 2024

@llvm/pr-subscribers-debuginfo

Author: None (alx32)

Changes

This change adds support for loading gSYM callsite information from DWARF. Previously the only support was for loading callsites info from YAML.


Full diff: https://github.com/llvm/llvm-project/pull/119913.diff

5 Files Affected:

  • (modified) llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h (+14-2)
  • (modified) llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp (+56)
  • (modified) llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml (+2)
  • (modified) llvm/tools/llvm-gsymutil/Opts.td (+1)
  • (modified) llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp (+4-1)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
index 198c0ddc265826..e4ba36be10a648 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
@@ -32,15 +32,19 @@ class OutputAggregator;
 /// function information. Creating a separate class to transform this data
 /// allows this class to be unit tested.
 class DwarfTransformer {
-public:
 
+public:
   /// Create a DWARF transformer.
   ///
   /// \param D The DWARF to use when converting to GSYM.
   ///
   /// \param G The GSYM creator to populate with the function information
   /// from the debug info.
-  DwarfTransformer(DWARFContext &D, GsymCreator &G) : DICtx(D), Gsym(G) {}
+  ///
+  /// \param LDCS Flag to indicate weather we should load the call site
+  /// information from DWARF `DW_TAG_call_site` entries
+  DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false)
+      : DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS) {}
 
   /// Extract the DWARF from the supplied object file and convert it into the
   /// Gsym format in the GsymCreator object that is passed in. Returns an
@@ -83,8 +87,16 @@ class DwarfTransformer {
   /// \param Die The DWARF debug info entry to parse.
   void handleDie(OutputAggregator &Strm, CUInfo &CUI, DWARFDie Die);
 
+  /// Parse call site information from DWARF
+  ///
+  /// \param CUI   The compile unit info for the current CU.
+  /// \param Die   The DWARFDie for the function.
+  /// \param FI    The FunctionInfo for the function being populated.
+  void parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die, FunctionInfo &FI);
+
   DWARFContext &DICtx;
   GsymCreator &Gsym;
+  bool LoadDwarfCallSites;
 
   friend class DwarfTransformerTest;
 };
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 520baa5ac720e8..9b024d50f2a108 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -543,6 +543,11 @@ void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI,
           FI.Inline = std::nullopt;
         }
       }
+
+      // If dwarf-callsites flag is set, parse DW_TAG_call_site DIEs.
+      if (LoadDwarfCallSites)
+        parseCallSiteInfoFromDwarf(CUI, Die, FI);
+
       Gsym.addFunctionInfo(std::move(FI));
     }
   } break;
@@ -553,6 +558,57 @@ void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI,
     handleDie(Out, CUI, ChildDie);
 }
 
+void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die,
+                                                  FunctionInfo &FI) {
+  // Parse all DW_TAG_call_site DIEs that are children of this subprogram DIE.
+  // DWARF specification:
+  // - DW_TAG_call_site can have DW_AT_call_return_pc for return address offset.
+  // - DW_AT_call_origin might point to a DIE of the function being called.
+  // For simplicity, we will just extract return_offset and possibly target name
+  // if available.
+
+  CallSiteInfoCollection CSIC;
+
+  for (DWARFDie Child : Die.children()) {
+    if (Child.getTag() == dwarf::DW_TAG_call_site) {
+      CallSiteInfo CSI;
+      // DW_AT_call_return_pc: the return PC (address). We'll convert it to
+      // offset relative to FI's start.
+      uint64_t ReturnPC =
+          dwarf::toAddress(Child.find(dwarf::DW_AT_call_return_pc), 0);
+      if (ReturnPC < FI.startAddress() || ReturnPC >= FI.endAddress())
+        continue;
+
+      CSI.ReturnOffset = ReturnPC - FI.startAddress();
+
+      // Attempt to get function name from DW_AT_call_origin. If present, we can
+      // insert it as a match regex.
+      if (DWARFDie OriginDie = Child.getAttributeValueAsReferencedDie(
+              dwarf::DW_AT_call_origin)) {
+        if (auto Name = OriginDie.getName(DINameKind::ShortName)) {
+          uint32_t NameOff = Gsym.insertString(Name, /*Copy=*/false);
+          CSI.MatchRegex.push_back(NameOff);
+        }
+      }
+
+      // For now, we won't attempt to deduce InternalCall/ExternalCall flags
+      // from DWARF.
+      CSI.Flags = CallSiteInfo::Flags::None;
+
+      CSIC.CallSites.push_back(CSI);
+    }
+  }
+
+  if (!CSIC.CallSites.empty()) {
+    if (!FI.CallSites)
+      FI.CallSites = CallSiteInfoCollection();
+    // Append parsed DWARF callsites:
+    FI.CallSites->CallSites.insert(FI.CallSites->CallSites.end(),
+                                   CSIC.CallSites.begin(),
+                                   CSIC.CallSites.end());
+  }
+}
+
 Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
   size_t NumBefore = Gsym.getNumFunctionInfos();
   auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
index a98d4db0a791b7..4cecc79c72b4b3 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
@@ -4,9 +4,11 @@
 # RUN: yaml2obj %t/merged_callsites.dSYM.yaml -o %t/merged_callsites.dSYM
 
 # RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym
+# RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --dwarf-callsites -o %t/dwarf_call_sites_dSYM.gsym
 
 # Dump the GSYM file and check the output for callsite information
 # RUN: llvm-gsymutil %t/call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-MERGED-CALLSITES %s
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-MERGED-CALLSITES %s
 
 # CHECK-MERGED-CALLSITES:      FunctionInfo @ 0x[[#%x,FUNC4_1:]]: [0x[[#%x,FUNC4_1_START:]] - 0x[[#%x,FUNC4_1_END:]]) "function4_copy1"
 # CHECK-MERGED-CALLSITES:      ++ Merged FunctionInfos[0]:
diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td
index 00f903c5211f39..d61b418d2d8439 100644
--- a/llvm/tools/llvm-gsymutil/Opts.td
+++ b/llvm/tools/llvm-gsymutil/Opts.td
@@ -18,6 +18,7 @@ defm convert :
      "Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">;
 def merged_functions :
   FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">;
+def dwarf_callsites : FF<"dwarf-callsites", "Load call site info from DWARF, if available">;
 defm callsites_yaml_file :
   Eq<"callsites-yaml-file", "Load call site info from YAML file. Useful for testing.">, Flags<[HelpHidden]>;
 defm arch :
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 42900159ce9667..aed4ae7c615fd1 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -99,6 +99,7 @@ static bool Quiet;
 static std::vector<uint64_t> LookupAddresses;
 static bool LookupAddressesFromStdin;
 static bool StoreMergedFunctionInfo = false;
+static bool LoadDwarfCallSites = false;
 static std::string CallSiteYamlPath;
 
 static void parseArgs(int argc, char **argv) {
@@ -191,6 +192,8 @@ static void parseArgs(int argc, char **argv) {
       std::exit(1);
     }
   }
+
+  LoadDwarfCallSites = Args.hasArg(OPT_dwarf_callsites);
 }
 
 /// @}
@@ -365,7 +368,7 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
 
   // Make a DWARF transformer object and populate the ranges of the code
   // so we don't end up adding invalid functions to GSYM data.
-  DwarfTransformer DT(*DICtx, Gsym);
+  DwarfTransformer DT(*DICtx, Gsym, LoadDwarfCallSites);
   if (!TextRanges.empty())
     Gsym.SetValidTextRanges(TextRanges);
 

@alx32 alx32 force-pushed the 03_gsymutil_dwarf_callsites branch from 0849770 to 65c01b8 Compare December 14, 2024 00:16
@github-actions
Copy link

github-actions bot commented Dec 14, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

@alx32 alx32 force-pushed the 03_gsymutil_dwarf_callsites branch from 65c01b8 to 67ece32 Compare December 14, 2024 00:21
@alx32 alx32 merged commit 558de0e into llvm:main Dec 17, 2024
8 checks passed
@alx32 alx32 deleted the 03_gsymutil_dwarf_callsites branch December 18, 2024 00:01
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants