-
Notifications
You must be signed in to change notification settings - Fork 15k
[llvm-readobj, ELF] Support for reading binary has more than PN_XNUM segments #162648
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
In ELF file, there is a possible extended header for those phnum, shnum, and shstrndx larger than the maximum of 16 bits. This extended header use section 0 to record these fields in 32 bits. We implment this feature so that programs rely on ELFFile::program_headers() can get the correct number of segments. Also, the consumers don't have to check the section 0 themselve, insteead, they can use the getPhNum() as an alternative.
|
@llvm/pr-subscribers-llvm-binary-utilities Author: None (aokblast) ChangesSome binary, like FreeBSD coredump, use program headers to store mmaps Full diff: https://github.com/llvm/llvm-project/pull/162648.diff 7 Files Affected:
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index e619b186dfe3d..136f8cfbde818 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -1123,6 +1123,8 @@ struct Elf64_Shdr {
Elf64_Xword sh_entsize;
};
+enum { PN_XNUM = 0xffff };
+
// Special section indices.
enum {
SHN_UNDEF = 0, // Undefined, missing, irrelevant, or meaningless
diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h
index 59f63eb6b5bb6..3b96c0e2b9d1f 100644
--- a/llvm/include/llvm/Object/ELF.h
+++ b/llvm/include/llvm/Object/ELF.h
@@ -278,9 +278,16 @@ class ELFFile {
std::vector<Elf_Shdr> FakeSections;
SmallString<0> FakeSectionStrings;
+ Elf_Word RealPhNum;
+ Elf_Word RealShNum;
+ Elf_Word RealShStrNdx;
+
ELFFile(StringRef Object);
public:
+ Elf_Word getPhNum() const { return RealPhNum; }
+ Elf_Word getShNum() const { return RealShNum; }
+ Elf_Word getShStrNdx() const { return RealShStrNdx; }
const Elf_Ehdr &getHeader() const {
return *reinterpret_cast<const Elf_Ehdr *>(base());
}
@@ -379,22 +386,21 @@ class ELFFile {
/// Iterate over program header table.
Expected<Elf_Phdr_Range> program_headers() const {
- if (getHeader().e_phnum && getHeader().e_phentsize != sizeof(Elf_Phdr))
+ if (RealPhNum && getHeader().e_phentsize != sizeof(Elf_Phdr))
return createError("invalid e_phentsize: " +
Twine(getHeader().e_phentsize));
- uint64_t HeadersSize =
- (uint64_t)getHeader().e_phnum * getHeader().e_phentsize;
+ uint64_t HeadersSize = (uint64_t)RealPhNum * getHeader().e_phentsize;
uint64_t PhOff = getHeader().e_phoff;
if (PhOff + HeadersSize < PhOff || PhOff + HeadersSize > getBufSize())
return createError("program headers are longer than binary of size " +
Twine(getBufSize()) + ": e_phoff = 0x" +
Twine::utohexstr(getHeader().e_phoff) +
- ", e_phnum = " + Twine(getHeader().e_phnum) +
+ ", e_phnum = " + Twine(RealPhNum) +
", e_phentsize = " + Twine(getHeader().e_phentsize));
auto *Begin = reinterpret_cast<const Elf_Phdr *>(base() + PhOff);
- return ArrayRef(Begin, Begin + getHeader().e_phnum);
+ return ArrayRef(Begin, Begin + RealPhNum);
}
/// Get an iterator over notes in a program header.
@@ -772,18 +778,10 @@ template <class ELFT>
Expected<StringRef>
ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections,
WarningHandler WarnHandler) const {
- uint32_t Index = getHeader().e_shstrndx;
- if (Index == ELF::SHN_XINDEX) {
- // If the section name string table section index is greater than
- // or equal to SHN_LORESERVE, then the actual index of the section name
- // string table section is contained in the sh_link field of the section
- // header at index 0.
- if (Sections.empty())
- return createError(
- "e_shstrndx == SHN_XINDEX, but the section header table is empty");
-
- Index = Sections[0].sh_link;
- }
+ uint32_t Index = RealShStrNdx;
+ if (Index == ELF::SHN_XINDEX)
+ return createError(
+ "e_shstrndx == SHN_XINDEX, but the section header table is empty");
// There is no section name string table. Return FakeSectionStrings which
// is non-empty if we have created fake sections.
@@ -889,7 +887,31 @@ Expected<uint64_t> ELFFile<ELFT>::getDynSymtabSize() const {
return 0;
}
-template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {}
+template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {
+ const Elf_Ehdr &Header = getHeader();
+ RealPhNum = Header.e_phnum;
+ RealShNum = Header.e_shnum;
+ RealShStrNdx = Header.e_shstrndx;
+ if (!Header.hasPhdrNumExtension())
+ return;
+
+ // An ELF binary may report `hasExtendedHeader` as true but not actually
+ // include an extended header. For example, a core dump can contain 65,535
+ // segments but no sections at all. We defer reporting an error until section
+ // 0 is accessed. Consumers should handle and emit the error themselves when
+ // they attempt to access it.
+ auto SecOrErr = getSection(0);
+ if (!SecOrErr) {
+ consumeError(SecOrErr.takeError());
+ return;
+ }
+ if (RealPhNum == 0xFFFF)
+ RealPhNum = (*SecOrErr)->sh_info;
+ if (RealShNum == ELF::SHN_UNDEF)
+ RealShNum = (*SecOrErr)->sh_size;
+ if (RealShStrNdx == ELF::SHN_XINDEX)
+ RealShStrNdx = (*SecOrErr)->sh_link;
+}
template <class ELFT>
Expected<ELFFile<ELFT>> ELFFile<ELFT>::create(StringRef Object) {
@@ -956,7 +978,7 @@ Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const {
const Elf_Shdr *First =
reinterpret_cast<const Elf_Shdr *>(base() + SectionTableOffset);
- uintX_t NumSections = getHeader().e_shnum;
+ uintX_t NumSections = RealShNum;
if (NumSections == 0)
NumSections = First->sh_size;
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index 5a26e2fc31458..b791f7486fe97 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -529,6 +529,11 @@ struct Elf_Ehdr_Impl {
unsigned char getFileClass() const { return e_ident[ELF::EI_CLASS]; }
unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; }
+ bool hasPhdrNumExtension() const {
+ return (e_phnum == ELF::PN_XNUM || e_shnum == ELF::SHN_UNDEF ||
+ e_shstrndx == ELF::SHN_XINDEX) &&
+ e_shoff != 0;
+ }
};
template <endianness Endianness>
diff --git a/llvm/test/tools/llvm-objcopy/ELF/many-sections.test b/llvm/test/tools/llvm-objcopy/ELF/many-sections.test
index 6622db237026f..4c618acb5c951 100644
--- a/llvm/test/tools/llvm-objcopy/ELF/many-sections.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/many-sections.test
@@ -6,8 +6,8 @@ RUN: llvm-readobj --file-headers --sections --symbols %t2 | FileCheck %s
RUN: llvm-readelf --symbols %t2 | FileCheck --check-prefix=SYMS %s
## The ELF header should have e_shnum == 0 and e_shstrndx == SHN_XINDEX.
-# CHECK: SectionHeaderCount: 0
-# CHECK-NEXT: StringTableSectionIndex: 65535
+# CHECK: SectionHeaderCount: 0 (65540)
+# CHECK-NEXT: StringTableSectionIndex: 65535 (65539)
## The first section header should store the real section header count and
## shstrndx in its fields.
diff --git a/llvm/test/tools/llvm-readobj/ELF/Inputs/many-segments.o.gz b/llvm/test/tools/llvm-readobj/ELF/Inputs/many-segments.o.gz
new file mode 100644
index 0000000000000..0709ed1d6389e
Binary files /dev/null and b/llvm/test/tools/llvm-readobj/ELF/Inputs/many-segments.o.gz differ
diff --git a/llvm/test/tools/llvm-readobj/ELF/many-segments.test b/llvm/test/tools/llvm-readobj/ELF/many-segments.test
new file mode 100644
index 0000000000000..20c31e97c8aca
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/ELF/many-segments.test
@@ -0,0 +1,79 @@
+## Show that llvm-readelf can handle an input file with many segments.
+
+RUN: %python %p/../../llvm-objcopy/Inputs/ungzip.py %p/Inputs/many-segments.o.gz > %t
+RUN: llvm-readobj --file-headers --sections --segments %t2 | FileCheck %s
+RUN: llvm-readelf --segments %t2 | FileCheck --check-prefix=SYMS %s
+
+## The ELF header should have e_phnum == PN_XNUM
+# CHECK: ProgramHeaderCount: 65535 (66549)
+## The first section header should store the real program header count in its fields.
+# CHECK: Section {
+# CHECK-NEXT: Index: 0
+# CHECK-NEXT: Name:
+# CHECK-NEXT: Type: SHT_NULL
+# CHECK-NEXT: Flags [
+# CHECK-NEXT: ]
+# CHECK-NEXT: Address:
+# CHECK-NEXT: Offset:
+# CHECK-NEXT: Size:
+# CHECK-NEXT: Link:
+# CHECK-NEXT: Info: 66549
+
+## Show that the symbols with segments indexes around the reserved range still
+## have the right segment indexes afterwards.
+# 65535th segment
+# CHECK: Offset: 0x1183B000
+# CHECK-NEXT: VirtualAddress: 0x349139F3000
+# CHECK: }
+# CHECK-NEXT ProgramHeader {
+# CHECK-NEXT Type: PT_LOAD (0x1)
+# CHECK-NEXT Offset: 0x1183C000
+# CHECK-NEXT VirtualAddress: 0x349139F4000
+# CHECK-NEXT PhysicalAddress: 0x0
+# CHECK-NEXT FileSize: 4096
+# CHECK-NEXT MemSize: 4096
+# CHECK-NEXT Flags [ (0x4)
+# CHECK-NEXT PF_R (0x4)
+# CHECK-NEXT ]
+# CHECK-NEXT Alignment: 4096
+# CHECK-NEXT }
+# CHECK-NEXT ProgramHeader {
+# CHECK-NEXT Type: PT_LOAD (0x1)
+# CHECK-NEXT Offset: 0x1183D000
+# CHECK-NEXT VirtualAddress: 0x349139F5000
+# CHECK-NEXT PhysicalAddress: 0x0
+# CHECK-NEXT FileSize: 4096
+# CHECK-NEXT MemSize: 4096
+# CHECK-NEXT Flags [ (0x6)
+# CHECK-NEXT PF_R (0x4)
+# CHECK-NEXT PF_W (0x2)
+# CHECK-NEXT ]
+# CHECK-NEXT Alignment: 4096
+# CHECK-NEXT }
+# CHECK-NEXT ProgramHeader {
+# CHECK-NEXT Type: PT_LOAD (0x1)
+# CHECK-NEXT Offset: 0x1183E000
+# CHECK-NEXT VirtualAddress: 0x349139F6000
+# CHECK-NEXT PhysicalAddress: 0x0
+# CHECK-NEXT FileSize: 4096
+# CHECK-NEXT MemSize: 4096
+# CHECK-NEXT Flags [ (0x4)
+# CHECK-NEXT PF_R (0x4)
+# CHECK-NEXT ]
+# CHECK-NEXT Alignment: 4096
+# CHECK-NEXT }
+# CHECK ProgramHeader {
+# CHECK-NEXT Type: PT_LOAD (0x1)
+# CHECK-NEXT Offset: 0x11C31000
+# CHECK-NEXT VirtualAddress: 0x30D8E7868000
+# CHECK-NEXT PhysicalAddress: 0x0
+# CHECK-NEXT FileSize: 8192
+# CHECK-NEXT MemSize: 8192
+# CHECK-NEXT Flags [ (0x6)
+# CHECK-NEXT PF_R (0x4)
+# CHECK-NEXT PF_W (0x2)
+# CHECK-NEXT ]
+# CHECK-NEXT Alignment: 4096
+# CHECK-NEXT }
+
+# SYMS: There are 66549 program headers, starting at offset 64
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index ab93316907cc6..53d3a439f1e62 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -3572,12 +3572,30 @@ static inline void printFields(formatted_raw_ostream &OS, StringRef Str1,
OS.flush();
}
+template <class ELFT>
+static std::string getProgramHeadersNumString(const ELFFile<ELFT> &Obj,
+ StringRef FileName) {
+ if (Obj.getHeader().e_phnum != ELF::PN_XNUM)
+ return to_string(Obj.getHeader().e_phnum);
+
+ Expected<ArrayRef<typename ELFT::Shdr>> ArrOrErr = Obj.sections();
+ if (!ArrOrErr) {
+ // In this case we can ignore an error, because we have already reported a
+ // warning about the broken section header table earlier.
+ consumeError(ArrOrErr.takeError());
+ return "<?>";
+ }
+
+ if (Obj.getHeader().e_phnum == Obj.getPhNum())
+ return "65535";
+ return "65535 (" + to_string(Obj.getPhNum()) + ")";
+}
+
template <class ELFT>
static std::string getSectionHeadersNumString(const ELFFile<ELFT> &Obj,
StringRef FileName) {
- const typename ELFT::Ehdr &ElfHeader = Obj.getHeader();
- if (ElfHeader.e_shnum != 0)
- return to_string(ElfHeader.e_shnum);
+ if (Obj.getHeader().e_shnum != 0)
+ return to_string(Obj.getHeader().e_shnum);
Expected<ArrayRef<typename ELFT::Shdr>> ArrOrErr = Obj.sections();
if (!ArrOrErr) {
@@ -3587,17 +3605,16 @@ static std::string getSectionHeadersNumString(const ELFFile<ELFT> &Obj,
return "<?>";
}
- if (ArrOrErr->empty())
+ if (Obj.getHeader().e_shnum == Obj.getShNum())
return "0";
- return "0 (" + to_string((*ArrOrErr)[0].sh_size) + ")";
+ return "0 (" + to_string(Obj.getShNum()) + ")";
}
template <class ELFT>
static std::string getSectionHeaderTableIndexString(const ELFFile<ELFT> &Obj,
StringRef FileName) {
- const typename ELFT::Ehdr &ElfHeader = Obj.getHeader();
- if (ElfHeader.e_shstrndx != SHN_XINDEX)
- return to_string(ElfHeader.e_shstrndx);
+ if (Obj.getHeader().e_shstrndx != SHN_XINDEX)
+ return to_string(Obj.getHeader().e_shstrndx);
Expected<ArrayRef<typename ELFT::Shdr>> ArrOrErr = Obj.sections();
if (!ArrOrErr) {
@@ -3607,10 +3624,9 @@ static std::string getSectionHeaderTableIndexString(const ELFFile<ELFT> &Obj,
return "<?>";
}
- if (ArrOrErr->empty())
+ if (Obj.getHeader().e_shstrndx == Obj.getShStrNdx())
return "65535 (corrupt: out of range)";
- return to_string(ElfHeader.e_shstrndx) + " (" +
- to_string((*ArrOrErr)[0].sh_link) + ")";
+ return "65535 (" + to_string(Obj.getShStrNdx()) + ")";
}
static const EnumEntry<unsigned> *getObjectFileEnumEntry(unsigned Type) {
@@ -3765,7 +3781,7 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
printFields(OS, "Size of this header:", Str);
Str = to_string(e.e_phentsize) + " (bytes)";
printFields(OS, "Size of program headers:", Str);
- Str = to_string(e.e_phnum);
+ Str = getProgramHeadersNumString(this->Obj, this->FileName);
printFields(OS, "Number of program headers:", Str);
Str = to_string(e.e_shentsize) + " (bytes)";
printFields(OS, "Size of section headers:", Str);
@@ -4778,8 +4794,7 @@ void GNUELFDumper<ELFT>::printProgramHeaders(
return;
if (PrintProgramHeaders) {
- const Elf_Ehdr &Header = this->Obj.getHeader();
- if (Header.e_phnum == 0) {
+ if (this->Obj.getPhNum() == 0) {
OS << "\nThere are no program headers in this file.\n";
} else {
printProgramHeaders();
@@ -4798,7 +4813,7 @@ template <class ELFT> void GNUELFDumper<ELFT>::printProgramHeaders() {
OS << "\nElf file type is "
<< enumToString(Header.e_type, ArrayRef(ElfObjectFileType)) << "\n"
<< "Entry point " << format_hex(Header.e_entry, 3) << "\n"
- << "There are " << Header.e_phnum << " program headers,"
+ << "There are " << this->Obj.getPhNum() << " program headers,"
<< " starting at offset " << Header.e_phoff << "\n\n"
<< "Program Headers:\n";
if (ELFT::Is64Bits)
@@ -7470,7 +7485,8 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printFileHeaders() {
W.printFlags("Flags", E.e_flags);
W.printNumber("HeaderSize", E.e_ehsize);
W.printNumber("ProgramHeaderEntrySize", E.e_phentsize);
- W.printNumber("ProgramHeaderCount", E.e_phnum);
+ W.printString("ProgramHeaderCount",
+ getProgramHeadersNumString(this->Obj, this->FileName));
W.printNumber("SectionHeaderEntrySize", E.e_shentsize);
W.printString("SectionHeaderCount",
getSectionHeadersNumString(this->Obj, this->FileName));
|
…ents Some binary, like FreeBSD coredump, use program headers to store mmaps informations. It is possible for program to use more than PN_XNUM program headers. Therefore, we implemnet the support of PN_XNBUM in readelf and objcopy.
ca911f0 to
70f9399
Compare
|
You probably want to use a stacked PR approach here. See https://llvm.org/docs/GitHub.html#stacked-pull-requests for details. |
|
I'd also recommend you have an llvm-readobj PR and an llvm-objcopy PR, since the two are independent (or at least, llvm-readobj shouldn't depend on llvm-objcopy as a minimum). |
|
Make the llvm-objcopy test depend on the llvm-readobj one. Use one of the stacked pull request tools to set this up. Alternatively, simply don'r put the llvm-objcopy review up until the llvm-readobj one has landed. |
Can I recreate a PR for using user branch in llvm instead of my private branch? Since I am unable to change the source branch to the User branch and you have not left any comment on my code. |
I'm not sure I entirely follow what you're asking here, but as long as you follow one of the recommendations in https://llvm.org/docs/GitHub.html#stacked-pull-requests, it's fine to do what you want to do, including closing this PR and creating new ones. |
Some binary, like FreeBSD coredump, use program headers to store mmaps
information. It is possible for program to use more than
PN_XNUM program headers. Therefore, we implement the support of PN_XNBUM
in readelf and objcopy.