llvm · Sterling-Augustine · Jun 6, 2025 · May 15, 2025 · Jun 4, 2025 · Jun 5, 2025
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
@@ -11,44 +11,57 @@
 
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DWARF/DWARFDataExtractorSimple.h"
+#include "llvm/DebugInfo/DWARF/DWARFObject.h"
+#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
 #include "llvm/DebugInfo/DWARF/DWARFSection.h"
 
 namespace llvm {
-class DWARFObject;
 
-/// A DWARFDataExtractorSimple (typically for an in-memory copy of an
-/// object-file section) plus a relocation map for that section, if there is
-/// one.
-class DWARFDataExtractor : public DWARFDataExtractorSimple {
+/// A DWARFDataExtractor (typically for an in-memory copy of an object-file
+/// section) plus a relocation map for that section, if there is one.
+class DWARFDataExtractor : public DWARFDataExtractorBase<DWARFDataExtractor> {
   const DWARFObject *Obj = nullptr;
   const DWARFSection *Section = nullptr;
 
 public:
-  using DWARFDataExtractorSimple::DWARFDataExtractorSimple;
+  using DWARFDataExtractorBase::DWARFDataExtractorBase;
 
   /// Constructor for the normal case of extracting data from a DWARF section.
   /// The DWARFSection's lifetime must be at least as long as the extractor's.
   DWARFDataExtractor(const DWARFObject &Obj, const DWARFSection &Section,
                      bool IsLittleEndian, uint8_t AddressSize)
-      : DWARFDataExtractorSimple(Section.Data, IsLittleEndian, AddressSize),
+      : DWARFDataExtractorBase(Section.Data, IsLittleEndian, AddressSize),
         Obj(&Obj), Section(&Section) {}
 
   /// Truncating constructor
   DWARFDataExtractor(const DWARFDataExtractor &Other, size_t Length)
-      : DWARFDataExtractorSimple(Other.getData().substr(0, Length),
-                                 Other.isLittleEndian(),
-                                 Other.getAddressSize()),
+      : DWARFDataExtractorBase(Other.getData().substr(0, Length),
+                               Other.isLittleEndian(), Other.getAddressSize()),
         Obj(Other.Obj), Section(Other.Section) {}
 
   /// Extracts a value and applies a relocation to the result if
   /// one exists for the given offset.
-  uint64_t getRelocatedValue(uint32_t Size, uint64_t *Off,
-                             uint64_t *SectionIndex = nullptr,
-                             Error *Err = nullptr) const override;
+  uint64_t getRelocatedValueImpl(uint32_t Size, uint64_t *Off, uint64_t *SecNdx,
+                                 Error *Err) const {
+    if (SecNdx)
+      *SecNdx = object::SectionedAddress::UndefSection;
+    if (!Section)
+      return getUnsigned(Off, Size, Err);
 
-  uint64_t getRelocatedValue(Cursor &C, uint32_t Size,
-                             uint64_t *SectionIndex = nullptr) const override {
-    return getRelocatedValue(Size, &getOffset(C), SectionIndex, &getError(C));
+    ErrorAsOutParameter ErrAsOut(Err);
+    std::optional<RelocAddrEntry> E = Obj->find(*Section, *Off);
+    uint64_t LocData = getUnsigned(Off, Size, Err);
+    if (!E || (Err && *Err))
+      return LocData;
+    if (SecNdx)
+      *SecNdx = E->SectionIndex;
+
+    uint64_t R = object::resolveRelocation(E->Resolver, E->Reloc,
+                                           E->SymbolValue, LocData);
+    if (E->Reloc2)
+      R = object::resolveRelocation(E->Resolver, *E->Reloc2, E->SymbolValue2,
+                                    R);
+    return R;
   }
 };
 

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDataExtractorSimple.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDataExtractorSimple.h
@@ -14,55 +14,42 @@
 
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/MathExtras.h"
 
 namespace llvm {
 
-/// A DataExtractor suitable use for parsing dwarf from memory with minimal
-/// dwarf context--no sections and no objects.  getRelocated* functions
-/// return raw values.
+/// A DataExtractor suitable use for parsing dwarf from memory.  Clients use
+/// Relocator::getRelocatedValueImpl to relocate values as appropriate.
 
-class DWARFDataExtractorSimple : public DataExtractor {
+template <class Relocator> class DWARFDataExtractorBase : public DataExtractor {
 
 public:
-  DWARFDataExtractorSimple(StringRef Data, bool IsLittleEndian,
-                           uint8_t AddressSize)
+  DWARFDataExtractorBase(StringRef Data, bool IsLittleEndian,
+                         uint8_t AddressSize)
       : DataExtractor(Data, IsLittleEndian, AddressSize) {}
-  DWARFDataExtractorSimple(ArrayRef<uint8_t> Data, bool IsLittleEndian,
-                           uint8_t AddressSize)
+  DWARFDataExtractorBase(ArrayRef<uint8_t> Data, bool IsLittleEndian,
+                         uint8_t AddressSize)
       : DataExtractor(
             StringRef(reinterpret_cast<const char *>(Data.data()), Data.size()),
             IsLittleEndian, AddressSize) {}
 
   /// Truncating constructor
-  DWARFDataExtractorSimple(const DWARFDataExtractorSimple &Other, size_t Length)
+  DWARFDataExtractorBase(const DWARFDataExtractorBase &Other, size_t Length)
       : DataExtractor(Other.getData().substr(0, Length), Other.isLittleEndian(),
                       Other.getAddressSize()) {}
 
-  virtual ~DWARFDataExtractorSimple() = default;
+  ~DWARFDataExtractorBase() {}
 
-  /// Extracts the DWARF "initial length" field, which can either be a 32-bit
-  /// value smaller than 0xfffffff0, or the value 0xffffffff followed by a
-  /// 64-bit length. Returns the actual length, and the DWARF format which is
-  /// encoded in the field. In case of errors, it returns {0, DWARF32} and
-  /// leaves the offset unchanged.
-  std::pair<uint64_t, dwarf::DwarfFormat>
-  getInitialLength(uint64_t *Off, Error *Err = nullptr) const;
-
-  std::pair<uint64_t, dwarf::DwarfFormat> getInitialLength(Cursor &C) const {
-    return getInitialLength(&getOffset(C), &getError(C));
+  /// Extracts a value and returns it as adjusted by the Relocator
+  uint64_t getRelocatedValue(uint32_t Size, uint64_t *Off,
+                             uint64_t *SectionIndex = nullptr,
+                             Error *Err = nullptr) const {
+    return static_cast<const Relocator *>(this)->getRelocatedValueImpl(
+        Size, Off, SectionIndex, Err);
   }
-
-  /// Extracts a value and returns it unrelocated. Named such to implement the
-  /// required interface.
-  virtual uint64_t getRelocatedValue(uint32_t Size, uint64_t *Off,
-                                     uint64_t *SectionIndex = nullptr,
-                                     Error *Err = nullptr) const {
-    assert(SectionIndex == nullptr &&
-           "DWARFDATAExtractorSimple cannot take section indices.");
-    return getUnsigned(Off, Size, Err);
-  }
-  virtual uint64_t getRelocatedValue(Cursor &C, uint32_t Size,
-                                     uint64_t *SectionIndex = nullptr) const {
+  uint64_t getRelocatedValue(Cursor &C, uint32_t Size,
+                             uint64_t *SectionIndex = nullptr) const {
     return getRelocatedValue(Size, &getOffset(C), SectionIndex, &getError(C));
   }
 
@@ -75,14 +62,131 @@ class DWARFDataExtractorSimple : public DataExtractor {
                              &getError(C));
   }
 
+  /// Extracts the DWARF "initial length" field, which can either be a 32-bit
+  /// value smaller than 0xfffffff0, or the value 0xffffffff followed by a
+  /// 64-bit length. Returns the actual length, and the DWARF format which is
+  /// encoded in the field. In case of errors, it returns {0, DWARF32} and
+  /// leaves the offset unchanged.
+  std::pair<uint64_t, dwarf::DwarfFormat>
+  getInitialLength(uint64_t *Off, Error *Err = nullptr) const {
+    ErrorAsOutParameter ErrAsOut(Err);
+    if (Err && *Err)
+      return {0, dwarf::DWARF32};
+
+    Cursor C(*Off);
+    uint64_t Length = getRelocatedValue(C, 4);
+    dwarf::DwarfFormat Format = dwarf::DWARF32;
+    if (Length == dwarf::DW_LENGTH_DWARF64) {
+      Length = getRelocatedValue(C, 8);
+      Format = dwarf::DWARF64;
+    } else if (Length >= dwarf::DW_LENGTH_lo_reserved) {
+      cantFail(C.takeError());
+      if (Err)
+        *Err = createStringError(
+            std::errc::invalid_argument,
+            "unsupported reserved unit length of value 0x%8.8" PRIx64, Length);
+      return {0, dwarf::DWARF32};
+    }
+
+    if (C) {
+      *Off = C.tell();
+      return {Length, Format};
+    }
+    if (Err)
+      *Err = C.takeError();
+    else
+      consumeError(C.takeError());
+    return {0, dwarf::DWARF32};
+  }
+
+  std::pair<uint64_t, dwarf::DwarfFormat> getInitialLength(Cursor &C) const {
+    return getInitialLength(&getOffset(C), &getError(C));
+  }
+
   /// Extracts a DWARF-encoded pointer in \p Offset using \p Encoding.
   /// There is a DWARF encoding that uses a PC-relative adjustment.
   /// For these values, \p AbsPosOffset is used to fix them, which should
   /// reflect the absolute address of this pointer.
   std::optional<uint64_t> getEncodedPointer(uint64_t *Offset, uint8_t Encoding,
-                                            uint64_t AbsPosOffset = 0) const;
+                                            uint64_t PCRelOffset) const {
+    if (Encoding == dwarf::DW_EH_PE_omit)
+      return std::nullopt;
+
+    uint64_t Result = 0;
+    uint64_t OldOffset = *Offset;
+    // First get value
+    switch (Encoding & 0x0F) {
+    case dwarf::DW_EH_PE_absptr:
+      switch (getAddressSize()) {
+      case 2:
+      case 4:
+      case 8:
+        Result = getUnsigned(Offset, getAddressSize());
+        break;
+      default:
+        return std::nullopt;
+      }
+      break;
+    case dwarf::DW_EH_PE_uleb128:
+      Result = getULEB128(Offset);
+      break;
+    case dwarf::DW_EH_PE_sleb128:
+      Result = getSLEB128(Offset);
+      break;
+    case dwarf::DW_EH_PE_udata2:
+      Result = getUnsigned(Offset, 2);
+      break;
+    case dwarf::DW_EH_PE_udata4:
+      Result = getUnsigned(Offset, 4);
+      break;
+    case dwarf::DW_EH_PE_udata8:
+      Result = getUnsigned(Offset, 8);
+      break;
+    case dwarf::DW_EH_PE_sdata2:
+      Result = getSigned(Offset, 2);
+      break;
+    case dwarf::DW_EH_PE_sdata4:
+      Result = SignExtend64<32>(getRelocatedValue(4, Offset));
+      break;
+    case dwarf::DW_EH_PE_sdata8:
+      Result = getRelocatedValue(8, Offset);
+      break;
+    default:
+      return std::nullopt;
+    }
+    // Then add relative offset, if required
+    switch (Encoding & 0x70) {
+    case dwarf::DW_EH_PE_absptr:
+      // do nothing
+      break;
+    case dwarf::DW_EH_PE_pcrel:
+      Result += PCRelOffset;
+      break;
+    case dwarf::DW_EH_PE_datarel:
+    case dwarf::DW_EH_PE_textrel:
+    case dwarf::DW_EH_PE_funcrel:
+    case dwarf::DW_EH_PE_aligned:
+    default:
+      *Offset = OldOffset;
+      return std::nullopt;
+    }
+
+    return Result;
+  }
 };
 
-} // end namespace llvm
+class DWARFDataExtractorSimple
+    : public DWARFDataExtractorBase<DWARFDataExtractorSimple> {
+  using DWARFDataExtractorBase::DWARFDataExtractorBase;
+
+  uint64_t getRelocatedValueImpl(uint32_t Size, uint64_t *Off,
+                                 uint64_t *SectionIndex = nullptr,
+                                 Error *Err = nullptr) const {
+    assert(SectionIndex == nullptr &&
+           "DWARFDATAExtractorSimple cannot take section indices.");
+    return getUnsigned(Off, Size, Err);
+  }
+};
 
+} // end namespace llvm
 #endif // LLVM_DEBUGINFO_DWARF_DWARFDATAEXTRACTOR_H
diff --git a/llvm/lib/DebugInfo/DWARF/CMakeLists.txt b/llvm/lib/DebugInfo/DWARF/CMakeLists.txt
@@ -4,8 +4,6 @@ add_llvm_component_library(LLVMDebugInfoDWARF
   DWARFAcceleratorTable.cpp
   DWARFCompileUnit.cpp
   DWARFContext.cpp
-  DWARFDataExtractor.cpp
-  DWARFDataExtractorSimple.cpp
   DWARFDebugAbbrev.cpp
   DWARFDebugAddr.cpp
   DWARFDebugArangeSet.cpp

diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp