From 159cfd761e09e61949b9abf2b63ac756f6bf0fdf Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Fri, 31 Oct 2025 16:26:12 -0700 Subject: [PATCH 1/5] Fix getting section info in large mach-o files. Mach-o has 32 bit file offsets in the MachO::section_64 structs. dSYM files can contain sections whose start offset exceeds UINT32_MAX, which means the MachO::section_64.offset will get truncated. We can calculate when this happens and properly adjust the section offset to be 64 bit safe. This means tools can get the correct section contents for large dSYM files and allows tools that parse DWARF, like llvm-gsymutil, to be able to load and convert these files correctly. --- llvm/include/llvm/Object/MachO.h | 2 +- llvm/lib/Object/MachOObjectFile.cpp | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h index 01e7c6b07dd36..f4c1e30b097ee 100644 --- a/llvm/include/llvm/Object/MachO.h +++ b/llvm/include/llvm/Object/MachO.h @@ -447,7 +447,7 @@ class LLVM_ABI MachOObjectFile : public ObjectFile { uint64_t getSectionAddress(DataRefImpl Sec) const override; uint64_t getSectionIndex(DataRefImpl Sec) const override; uint64_t getSectionSize(DataRefImpl Sec) const override; - ArrayRef getSectionContents(uint32_t Offset, uint64_t Size) const; + ArrayRef getSectionContents(uint64_t Offset, uint64_t Size) const; Expected> getSectionContents(DataRefImpl Sec) const override; uint64_t getSectionAlignment(DataRefImpl Sec) const override; diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp index e09dc947c2779..300a5f7ed2a48 100644 --- a/llvm/lib/Object/MachOObjectFile.cpp +++ b/llvm/lib/Object/MachOObjectFile.cpp @@ -1978,20 +1978,34 @@ uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const { return SectSize; } -ArrayRef MachOObjectFile::getSectionContents(uint32_t Offset, +ArrayRef MachOObjectFile::getSectionContents(uint64_t Offset, uint64_t Size) const { return arrayRefFromStringRef(getData().substr(Offset, Size)); } Expected> MachOObjectFile::getSectionContents(DataRefImpl Sec) const { - uint32_t Offset; + uint64_t Offset; uint64_t Size; if (is64Bit()) { MachO::section_64 Sect = getSection64(Sec); Offset = Sect.offset; Size = Sect.size; + // Check for large mach-o files where the section contents might exceed + // 4GB. MachO::section_64 objects only have 32 bit file offsets to the + // section contents and can overflow in dSYM files. We can track this and + // adjust the section offset to be 64 bit safe. + uint64_t SectOffsetAdjust = 0; + for (uint32_t SectIdx=0; SectIdx(*this, Sections[SectIdx]); + const uint64_t EndSectFileOffset = + (uint64_t)CurrSect.offset + CurrSect.size; + if (EndSectFileOffset >= UINT32_MAX) + SectOffsetAdjust += EndSectFileOffset & 0xFFFFFFFF00000000ull; + } + Offset += SectOffsetAdjust; } else { MachO::section Sect = getSection(Sec); Offset = Sect.offset; From 350328aeb916bb36859d7e6ff77988981a81e56c Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Sun, 2 Nov 2025 16:37:02 -0800 Subject: [PATCH 2/5] Address user feedback to check for increasing sections sizes. We now return an error if a section file offset exceeds 4GB and the sections are not ordered in the mach-o file. If sections are not ordered, we can't assume the section file offset overflows make sense to apply to other sections, but we can if they are ordered. --- llvm/lib/Object/MachOObjectFile.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp index 300a5f7ed2a48..ddde073eb43d6 100644 --- a/llvm/lib/Object/MachOObjectFile.cpp +++ b/llvm/lib/Object/MachOObjectFile.cpp @@ -1996,14 +1996,20 @@ MachOObjectFile::getSectionContents(DataRefImpl Sec) const { // 4GB. MachO::section_64 objects only have 32 bit file offsets to the // section contents and can overflow in dSYM files. We can track this and // adjust the section offset to be 64 bit safe. + // Assumes the sections are ordered. + uint64_t PrevTrueOffset = 0; uint64_t SectOffsetAdjust = 0; for (uint32_t SectIdx=0; SectIdx(*this, Sections[SectIdx]); + uint64_t CurrTrueOffset = (uint64_t)CurrSect.offset + SectOffsetAdjust; + if ((SectOffsetAdjust > 0) && (PrevTrueOffset > CurrTrueOffset)) + return malformedError("section data exceeds 4GB and are not ordered"); const uint64_t EndSectFileOffset = (uint64_t)CurrSect.offset + CurrSect.size; - if (EndSectFileOffset >= UINT32_MAX) + if (EndSectFileOffset > UINT32_MAX) SectOffsetAdjust += EndSectFileOffset & 0xFFFFFFFF00000000ull; + PrevTrueOffset = CurrTrueOffset; } Offset += SectOffsetAdjust; } else { From 1391dd7df9c605242d9ff3ddcccd8997f8b127b0 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Sun, 2 Nov 2025 16:40:56 -0800 Subject: [PATCH 3/5] Fix comment to reflect new code. --- llvm/lib/Object/MachOObjectFile.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp index ddde073eb43d6..82045b8464a27 100644 --- a/llvm/lib/Object/MachOObjectFile.cpp +++ b/llvm/lib/Object/MachOObjectFile.cpp @@ -1995,8 +1995,9 @@ MachOObjectFile::getSectionContents(DataRefImpl Sec) const { // Check for large mach-o files where the section contents might exceed // 4GB. MachO::section_64 objects only have 32 bit file offsets to the // section contents and can overflow in dSYM files. We can track this and - // adjust the section offset to be 64 bit safe. - // Assumes the sections are ordered. + // adjust the section offset to be 64 bit safe. If sections overflow then + // section ordering is enforced. If sections are not ordered, then an error + // will be returned stopping invalid section data from being returned. uint64_t PrevTrueOffset = 0; uint64_t SectOffsetAdjust = 0; for (uint32_t SectIdx=0; SectIdx Date: Sun, 2 Nov 2025 16:41:35 -0800 Subject: [PATCH 4/5] Run clang format. --- llvm/lib/Object/MachOObjectFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp index 82045b8464a27..d920437498507 100644 --- a/llvm/lib/Object/MachOObjectFile.cpp +++ b/llvm/lib/Object/MachOObjectFile.cpp @@ -2000,7 +2000,7 @@ MachOObjectFile::getSectionContents(DataRefImpl Sec) const { // will be returned stopping invalid section data from being returned. uint64_t PrevTrueOffset = 0; uint64_t SectOffsetAdjust = 0; - for (uint32_t SectIdx=0; SectIdx(*this, Sections[SectIdx]); uint64_t CurrTrueOffset = (uint64_t)CurrSect.offset + SectOffsetAdjust; From eac2e6af39e13464689794f9f7dcf17c4272f275 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 3 Nov 2025 14:05:37 -0800 Subject: [PATCH 5/5] Improve error message when section data exceeds 4GB. --- llvm/lib/Object/MachOObjectFile.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp index d920437498507..c2f4560c06c0d 100644 --- a/llvm/lib/Object/MachOObjectFile.cpp +++ b/llvm/lib/Object/MachOObjectFile.cpp @@ -2005,7 +2005,8 @@ MachOObjectFile::getSectionContents(DataRefImpl Sec) const { getStruct(*this, Sections[SectIdx]); uint64_t CurrTrueOffset = (uint64_t)CurrSect.offset + SectOffsetAdjust; if ((SectOffsetAdjust > 0) && (PrevTrueOffset > CurrTrueOffset)) - return malformedError("section data exceeds 4GB and are not ordered"); + return malformedError("section data exceeds 4GB and section file " + "offsets are not ordered"); const uint64_t EndSectFileOffset = (uint64_t)CurrSect.offset + CurrSect.size; if (EndSectFileOffset > UINT32_MAX)