-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[lldb] Make ELF files able to load section headers from memory. #129166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -556,6 +556,25 @@ static bool GetOsFromOSABI(unsigned char osabi_byte, | |
| return ostype != llvm::Triple::OSType::UnknownOS; | ||
| } | ||
|
|
||
| /// Read the bytes for the section headers from the ELF object file data. | ||
| static DataExtractor GetSectionHeadersFromELFData( | ||
| const elf::ELFHeader &header, const DataExtractor &object_data) { | ||
| return DataExtractor(object_data, header.e_shoff, | ||
| header.GetSectionHeaderByteSize());; | ||
| } | ||
|
|
||
| /// Read the section data bytes for the section from the ELF object file data. | ||
| bool ObjectFileELF::GetSectionContentsFromELFData( | ||
| const elf::ELFSectionHeader &sh, const DataExtractor &object_data, | ||
| lldb_private::DataExtractor §ion_data) { | ||
| if (sh.sh_type == SHT_NOBITS || sh.sh_size == 0) { | ||
| section_data.Clear(); | ||
| return false; | ||
| } | ||
| return section_data.SetData(object_data, | ||
| sh.sh_offset, sh.sh_size) == sh.sh_size; | ||
| } | ||
|
|
||
| size_t ObjectFileELF::GetModuleSpecifications( | ||
| const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp, | ||
| lldb::offset_t data_offset, lldb::offset_t file_offset, | ||
|
|
@@ -633,9 +652,17 @@ size_t ObjectFileELF::GetModuleSpecifications( | |
| SectionHeaderColl section_headers; | ||
| lldb_private::UUID &uuid = spec.GetUUID(); | ||
|
|
||
| GetSectionHeaderInfo(section_headers, data, header, uuid, | ||
| gnu_debuglink_file, gnu_debuglink_crc, | ||
| spec.GetArchitecture()); | ||
| // Get the section header data from the object file. | ||
| DataExtractor sh_data = GetSectionHeadersFromELFData(header, data); | ||
|
|
||
| auto read_sect_callback = | ||
| [&](const elf::ELFSectionHeader &sh, | ||
| lldb_private::DataExtractor &sh_data) -> bool { | ||
| return GetSectionContentsFromELFData(sh, data, sh_data); | ||
| }; | ||
| GetSectionHeaderInfo(header, sh_data, section_headers, | ||
| read_sect_callback, uuid, gnu_debuglink_file, | ||
| gnu_debuglink_crc, spec.GetArchitecture()); | ||
|
|
||
| llvm::Triple &spec_triple = spec.GetArchitecture().GetTriple(); | ||
|
|
||
|
|
@@ -1284,10 +1311,10 @@ ObjectFileELF::RefineModuleDetailsFromNote(lldb_private::DataExtractor &data, | |
| return error; | ||
| } | ||
|
|
||
| void ObjectFileELF::ParseARMAttributes(DataExtractor &data, uint64_t length, | ||
| void ObjectFileELF::ParseARMAttributes(DataExtractor &data, | ||
| ArchSpec &arch_spec) { | ||
| lldb::offset_t Offset = 0; | ||
|
|
||
| const uint64_t length = data.GetByteSize(); | ||
| uint8_t FormatVersion = data.GetU8(&Offset); | ||
| if (FormatVersion != llvm::ELFAttrs::Format_Version) | ||
| return; | ||
|
|
@@ -1355,9 +1382,10 @@ void ObjectFileELF::ParseARMAttributes(DataExtractor &data, uint64_t length, | |
| } | ||
|
|
||
| // GetSectionHeaderInfo | ||
| size_t ObjectFileELF::GetSectionHeaderInfo(SectionHeaderColl §ion_headers, | ||
| DataExtractor &object_data, | ||
| const elf::ELFHeader &header, | ||
| size_t ObjectFileELF::GetSectionHeaderInfo(const elf::ELFHeader &header, | ||
| const DataExtractor &sh_data, | ||
| SectionHeaderColl §ion_headers, | ||
| ReadSectionDataCallback read_sect, | ||
| lldb_private::UUID &uuid, | ||
| std::string &gnu_debuglink_file, | ||
| uint32_t &gnu_debuglink_crc, | ||
|
|
@@ -1459,14 +1487,12 @@ size_t ObjectFileELF::GetSectionHeaderInfo(SectionHeaderColl §ion_headers, | |
|
|
||
| Log *log = GetLog(LLDBLog::Modules); | ||
|
|
||
| section_headers.resize(header.e_shnum); | ||
| if (section_headers.size() != header.e_shnum) | ||
| if (sh_data.GetByteSize() != header.GetSectionHeaderByteSize()) | ||
| return 0; | ||
|
|
||
| const size_t sh_size = header.e_shnum * header.e_shentsize; | ||
| const elf_off sh_offset = header.e_shoff; | ||
| DataExtractor sh_data; | ||
| if (sh_data.SetData(object_data, sh_offset, sh_size) != sh_size) | ||
| // Only resize our section headers if we got valid section header data. | ||
| section_headers.resize(header.e_shnum); | ||
| if (section_headers.size() != header.e_shnum) | ||
| return 0; | ||
|
|
||
| uint32_t idx; | ||
|
|
@@ -1477,32 +1503,41 @@ size_t ObjectFileELF::GetSectionHeaderInfo(SectionHeaderColl §ion_headers, | |
| } | ||
| if (idx < section_headers.size()) | ||
| section_headers.resize(idx); | ||
| // Sometimes we are able to read the section header memory from an in memory | ||
| // ELF file, but all section header data has been set to zeroes. Remove any | ||
| // SHT_NULL sections if we have more than 1. The first entry in the section | ||
| // headers should always be a SHT_NULL section, but none of the others should | ||
| // be. | ||
| if (section_headers.size() > 1 && section_headers[1].sh_type == SHT_NULL) { | ||
| uint64_t null_count = std::count_if(section_headers.begin(), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can these ever be discontiguous? Here we count the whole list for SHT_NULL, but then erase based on that count. Would it be better for us to remove based on a |
||
| section_headers.end(), | ||
| [](const ELFSectionHeaderInfo &sh){ | ||
| return sh.sh_type == SHT_NULL; | ||
| }); | ||
| if (null_count == section_headers.size()) { | ||
| // Keep only 1 SHT_NULL section if they were all SHT_NULL types. | ||
| section_headers.erase(section_headers.begin() + 1); | ||
| } | ||
| } | ||
|
|
||
| const unsigned strtab_idx = header.e_shstrndx; | ||
| if (strtab_idx && strtab_idx < section_headers.size()) { | ||
| const ELFSectionHeaderInfo &sheader = section_headers[strtab_idx]; | ||
| const size_t byte_size = sheader.sh_size; | ||
| const Elf64_Off offset = sheader.sh_offset; | ||
| lldb_private::DataExtractor shstr_data; | ||
|
|
||
| if (shstr_data.SetData(object_data, offset, byte_size) == byte_size) { | ||
| if (read_sect(sheader, shstr_data)) { | ||
| for (SectionHeaderCollIter I = section_headers.begin(); | ||
| I != section_headers.end(); ++I) { | ||
| static ConstString g_sect_name_gnu_debuglink(".gnu_debuglink"); | ||
| const ELFSectionHeaderInfo &sheader = *I; | ||
| const uint64_t section_size = | ||
| sheader.sh_type == SHT_NOBITS ? 0 : sheader.sh_size; | ||
| ConstString name(shstr_data.PeekCStr(I->sh_name)); | ||
|
|
||
| I->section_name = name; | ||
|
|
||
| if (arch_spec.IsMIPS()) { | ||
| uint32_t arch_flags = arch_spec.GetFlags(); | ||
| DataExtractor data; | ||
| if (sheader.sh_type == SHT_MIPS_ABIFLAGS) { | ||
|
|
||
| if (section_size && (data.SetData(object_data, sheader.sh_offset, | ||
| section_size) == section_size)) { | ||
| DataExtractor data; | ||
| if (read_sect(sheader, data)) { | ||
| // MIPS ASE Mask is at offset 12 in MIPS.abiflags section | ||
| lldb::offset_t offset = 12; // MIPS ABI Flags Version: 0 | ||
| arch_flags |= data.GetU32(&offset); | ||
|
|
@@ -1565,16 +1600,14 @@ size_t ObjectFileELF::GetSectionHeaderInfo(SectionHeaderColl §ion_headers, | |
| if (arch_spec.GetMachine() == llvm::Triple::arm || | ||
| arch_spec.GetMachine() == llvm::Triple::thumb) { | ||
| DataExtractor data; | ||
|
|
||
| if (sheader.sh_type == SHT_ARM_ATTRIBUTES && section_size != 0 && | ||
| data.SetData(object_data, sheader.sh_offset, section_size) == section_size) | ||
| ParseARMAttributes(data, section_size, arch_spec); | ||
| if (sheader.sh_type == SHT_ARM_ATTRIBUTES && | ||
| read_sect(sheader, data)) | ||
| ParseARMAttributes(data, arch_spec); | ||
| } | ||
|
|
||
| if (name == g_sect_name_gnu_debuglink) { | ||
| DataExtractor data; | ||
| if (section_size && (data.SetData(object_data, sheader.sh_offset, | ||
| section_size) == section_size)) { | ||
| if (read_sect(sheader, data)) { | ||
| lldb::offset_t gnu_debuglink_offset = 0; | ||
| gnu_debuglink_file = data.GetCStr(&gnu_debuglink_offset); | ||
| gnu_debuglink_offset = llvm::alignTo(gnu_debuglink_offset, 4); | ||
|
|
@@ -1594,8 +1627,7 @@ size_t ObjectFileELF::GetSectionHeaderInfo(SectionHeaderColl §ion_headers, | |
| if (is_note_header) { | ||
| // Allow notes to refine module info. | ||
| DataExtractor data; | ||
| if (section_size && (data.SetData(object_data, sheader.sh_offset, | ||
| section_size) == section_size)) { | ||
| if (read_sect(sheader, data)) { | ||
| Status error = RefineModuleDetailsFromNote(data, arch_spec, uuid); | ||
| if (error.Fail()) { | ||
| LLDB_LOGF(log, "ObjectFileELF::%s ELF note processing failed: %s", | ||
|
|
@@ -1627,9 +1659,28 @@ ObjectFileELF::StripLinkerSymbolAnnotations(llvm::StringRef symbol_name) const { | |
|
|
||
| // ParseSectionHeaders | ||
| size_t ObjectFileELF::ParseSectionHeaders() { | ||
| return GetSectionHeaderInfo(m_section_headers, m_data, m_header, m_uuid, | ||
| m_gnu_debuglink_file, m_gnu_debuglink_crc, | ||
| m_arch_spec); | ||
| DataExtractor sh_data = GetSectionHeadersFromELFData(m_header, m_data); | ||
| const size_t sh_size = m_header.GetSectionHeaderByteSize(); | ||
| if (sh_data.GetByteSize() != sh_size) { | ||
| if (IsInMemory()) { | ||
| // We have a ELF file in process memory, read the program header data from | ||
| // the process. | ||
| if (ProcessSP process_sp = m_process_wp.lock()) { | ||
| const addr_t addr = m_memory_addr + m_header.e_shoff; | ||
| if (DataBufferSP data_sp = ReadMemory(process_sp, addr, sh_size)) | ||
| sh_data = DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize()); | ||
| } | ||
| } | ||
| } | ||
| auto read_sect_callback = [&](const elf::ELFSectionHeader &sh, | ||
| lldb_private::DataExtractor &sh_data) -> bool { | ||
| sh_data = this->GetSectionData(sh); | ||
| return sh_data.GetByteSize() == sh.sh_size; | ||
| }; | ||
|
|
||
| return GetSectionHeaderInfo(m_header, sh_data, m_section_headers, | ||
| read_sect_callback, m_uuid, m_gnu_debuglink_file, | ||
| m_gnu_debuglink_crc, m_arch_spec); | ||
| } | ||
|
|
||
| const ObjectFileELF::ELFSectionHeaderInfo * | ||
|
|
@@ -3805,6 +3856,27 @@ DataExtractor ObjectFileELF::GetSegmentData(const ELFProgramHeader &H) { | |
| return DataExtractor(); | ||
| } | ||
|
|
||
| DataExtractor ObjectFileELF::GetSectionData(const elf::ELFSectionHeader &sh) { | ||
| // Try and read the section contents from our cached m_data which can come | ||
| // from the file on disk being mmap'ed or from the initial part of the ELF | ||
| // file we read from memory and cached. | ||
| DataExtractor data; | ||
| if (GetSectionContentsFromELFData(sh, m_data, data)) | ||
| return data; | ||
| if (IsInMemory()) { | ||
| // We have a ELF file in process memory, read the program header data from | ||
| // the process. | ||
| if (ProcessSP process_sp = m_process_wp.lock()) { | ||
| const addr_t addr = m_memory_addr + sh.sh_offset; | ||
| if (DataBufferSP data_sp = ReadMemory(process_sp, addr, sh.sh_size)) { | ||
| if (data_sp->GetByteSize() == sh.sh_size) | ||
| return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize()); | ||
| } | ||
| } | ||
| } | ||
| return DataExtractor(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we just return data here instead of creating a new extractor?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't want to in case |
||
| } | ||
|
|
||
| bool ObjectFileELF::AnySegmentHasPhysicalAddress() { | ||
| for (const ELFProgramHeader &H : ProgramHeaders()) { | ||
| if (H.p_paddr != 0) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm worried about this check as it's untested and its basically impossible to prove it's safe to remove, ever. Can explain how this comes to be and why leaving those SHT_NULL sections in the list is a problem? (I'm thinking whether e.g., we could leave those in, but change the code which consumes them to ignore them)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Normal ELF files, when mapped into a process, will have sometimes or maybe always, have the data for the section headers in readable memory, but they are all set to zero. So it doesn't make sense to show a ton of
SHT_NULLsections. So we keep oneSHT_NULLsection for the first section and remove the rest. I can check to make sure they are all SHT_NULL sections and only remove them if they are all SHT_NULL if that would make everyone feel betterThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, I think I understand what's going on. In the on-disk layout, the section headers are (typically) at the end of the file, after all the code and data and stuff. In the in-memory layout the section headers are not present. Instead, the data segment is (typically) followed
bss. So if you take the section header offset from the elf header, and apply it to the memory address, you're most likely going to be reading from the bss section:This explains why you'd normally see the sections headers as SHT_NULL, as
.bssis zero at the beginning of the program. However, that's not guaranteed. If we attach to the program after it's been running, we'd just be reading random data from the program's bss section, and interpreting that as section headers. This is why I think we should just avoid reading section headers from the in-memory images altogether.There's just one catch with that -- the VDSO pseudo-module actually does have its sections headers in memory (I don't know how intentional is that -- I think it's mainly a side-effect of it not being loaded normally), and it'd be nice to be able to access those. However, the VDSO is unique enough that I think we can just special case that. For example, we could use the property that its section headers do not overlap with any segments (
m_memory_addr + ehdr->e_shoffis beyond the last ELF segment). If that's not enough, we could also check that the address is section headers are within the same memory region as the elf header (the VDSO is loaded as a single page).TL;DR: What I'm proposing is to skip reading of section headers for in-memory elf files, unless we are dealing with a VDSO. We could tell that we're dealing with the VDSO by looking at the location of the section headers relative to the elf segments.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We actually want this to work for ELF files when we don't have the file and it is only in memory. GPU debugging often has images that are in memory only and we need those section headers. I agree with you that normal binaries loaded by the dynamic loader have this issue, but if we have JIT solutions that produce ELF files in memory, or GPU stuff that will compile things on the fly and point debuggers to ELF files in memory, then we should allow for this somehow.
I guess we will need to modify the SBModule from memory API to take a size so that it can be specified, though this can still be wrong for most linux binaries. My example's section headers were in the dynamic section:
Let me know what you think of being able to specify the size of the ELF data in memory as a fix? By default if anyone loads something from memory from say a core file, they shouldn't specify the size of the range and we would avoid reading the section headers. But if they do specify it, then we would try to read them, but only if they are contained within the data buffer for the object file? Ack, neither sounds great.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we need a boolean to specify the object file is fully mapped into memory? Or that the memory image is "complete"? We would then need to specify this when loading the image. JIT and GPU ELF images that are in memory only would use this flag, and ELF core files and loading an image from program memory from the dynamic loader would avoid setting this flag.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Having a flag for this would probably be the best solution (I was considering that myself). It basically tells the object file how the file was mapped, and that's something that's better done from the outside. The dynamic loader knows that it is loading a VDSO file, so it could easily set this flag when it is loading it.
I'm not particularly keen on the size argument, as it isn't very well defined. It's not uncommon to have holes in the memory image of an elf file (e.g. to have one chunk mapped from 0x10000->0x15000 and then another in 0x20000->25000). In theory, I think you could even end up mapping a different elf file into a hole in one of the previously mapped files.
That said, I'm not sure if the flag is completely necessary for this. I have a feeling it should still be possible to by looking at the segment data. In both of our examples, the section header offset pointed directly into a PT_LOAD segment. I think that shouldn't be the case for these GPU files (as otherwise, the section headers could be overwritten by some data in the binary). If that's true, then we could condition the reading of the section headers on this. The flag thing would likely still be better though, but I don't know how involved that change would be.