Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions lldb/source/Plugins/ObjectFile/ELF/ELFHeader.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,12 @@ struct ELFHeader {
/// The byte order of this ELF file as described by the header.
lldb::ByteOrder GetByteOrder() const;

/// Get the size in bytes of the section header data.
///
/// \return
/// The byte size of the section header data.
uint64_t GetSectionHeaderByteSize() const { return e_shnum * e_shentsize; }

/// The jump slot relocation type of this ELF.
unsigned GetRelocationJumpSlotType() const;

Expand Down
142 changes: 107 additions & 35 deletions lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,25 @@ static bool GetOsFromOSABI(unsigned char osabi_byte,
return ostype != llvm::Triple::OSType::UnknownOS;
}

/// Read the bytes for the section headers from the ELF object file data.
static DataExtractor GetSectionHeadersFromELFData(
const elf::ELFHeader &header, const DataExtractor &object_data) {
return DataExtractor(object_data, header.e_shoff,
header.GetSectionHeaderByteSize());;
}

/// Read the section data bytes for the section from the ELF object file data.
bool ObjectFileELF::GetSectionContentsFromELFData(
const elf::ELFSectionHeader &sh, const DataExtractor &object_data,
lldb_private::DataExtractor &section_data) {
if (sh.sh_type == SHT_NOBITS || sh.sh_size == 0) {
section_data.Clear();
return false;
}
return section_data.SetData(object_data,
sh.sh_offset, sh.sh_size) == sh.sh_size;
}

size_t ObjectFileELF::GetModuleSpecifications(
const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp,
lldb::offset_t data_offset, lldb::offset_t file_offset,
Expand Down Expand Up @@ -633,9 +652,17 @@ size_t ObjectFileELF::GetModuleSpecifications(
SectionHeaderColl section_headers;
lldb_private::UUID &uuid = spec.GetUUID();

GetSectionHeaderInfo(section_headers, data, header, uuid,
gnu_debuglink_file, gnu_debuglink_crc,
spec.GetArchitecture());
// Get the section header data from the object file.
DataExtractor sh_data = GetSectionHeadersFromELFData(header, data);

auto read_sect_callback =
[&](const elf::ELFSectionHeader &sh,
lldb_private::DataExtractor &sh_data) -> bool {
return GetSectionContentsFromELFData(sh, data, sh_data);
};
GetSectionHeaderInfo(header, sh_data, section_headers,
read_sect_callback, uuid, gnu_debuglink_file,
gnu_debuglink_crc, spec.GetArchitecture());

llvm::Triple &spec_triple = spec.GetArchitecture().GetTriple();

Expand Down Expand Up @@ -1284,10 +1311,10 @@ ObjectFileELF::RefineModuleDetailsFromNote(lldb_private::DataExtractor &data,
return error;
}

void ObjectFileELF::ParseARMAttributes(DataExtractor &data, uint64_t length,
void ObjectFileELF::ParseARMAttributes(DataExtractor &data,
ArchSpec &arch_spec) {
lldb::offset_t Offset = 0;

const uint64_t length = data.GetByteSize();
uint8_t FormatVersion = data.GetU8(&Offset);
if (FormatVersion != llvm::ELFAttrs::Format_Version)
return;
Expand Down Expand Up @@ -1355,9 +1382,10 @@ void ObjectFileELF::ParseARMAttributes(DataExtractor &data, uint64_t length,
}

// GetSectionHeaderInfo
size_t ObjectFileELF::GetSectionHeaderInfo(SectionHeaderColl &section_headers,
DataExtractor &object_data,
const elf::ELFHeader &header,
size_t ObjectFileELF::GetSectionHeaderInfo(const elf::ELFHeader &header,
const DataExtractor &sh_data,
SectionHeaderColl &section_headers,
ReadSectionDataCallback read_sect,
lldb_private::UUID &uuid,
std::string &gnu_debuglink_file,
uint32_t &gnu_debuglink_crc,
Expand Down Expand Up @@ -1459,14 +1487,12 @@ size_t ObjectFileELF::GetSectionHeaderInfo(SectionHeaderColl &section_headers,

Log *log = GetLog(LLDBLog::Modules);

section_headers.resize(header.e_shnum);
if (section_headers.size() != header.e_shnum)
if (sh_data.GetByteSize() != header.GetSectionHeaderByteSize())
return 0;

const size_t sh_size = header.e_shnum * header.e_shentsize;
const elf_off sh_offset = header.e_shoff;
DataExtractor sh_data;
if (sh_data.SetData(object_data, sh_offset, sh_size) != sh_size)
// Only resize our section headers if we got valid section header data.
section_headers.resize(header.e_shnum);
if (section_headers.size() != header.e_shnum)
return 0;

uint32_t idx;
Expand All @@ -1477,32 +1503,41 @@ size_t ObjectFileELF::GetSectionHeaderInfo(SectionHeaderColl &section_headers,
}
if (idx < section_headers.size())
section_headers.resize(idx);
// Sometimes we are able to read the section header memory from an in memory
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm worried about this check as it's untested and its basically impossible to prove it's safe to remove, ever. Can explain how this comes to be and why leaving those SHT_NULL sections in the list is a problem? (I'm thinking whether e.g., we could leave those in, but change the code which consumes them to ignore them)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normal ELF files, when mapped into a process, will have sometimes or maybe always, have the data for the section headers in readable memory, but they are all set to zero. So it doesn't make sense to show a ton of SHT_NULL sections. So we keep one SHT_NULL section for the first section and remove the rest. I can check to make sure they are all SHT_NULL sections and only remove them if they are all SHT_NULL if that would make everyone feel better

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I think I understand what's going on. In the on-disk layout, the section headers are (typically) at the end of the file, after all the code and data and stuff. In the in-memory layout the section headers are not present. Instead, the data segment is (typically) followed bss. So if you take the section header offset from the elf header, and apply it to the memory address, you're most likely going to be reading from the bss section:

$ readelf -h /bin/ls | grep section
  Start of section headers:          144264 (bytes into file)
  Size of section headers:           64 (bytes)
  Number of section headers:         29
$ lldb /bin/ls
(lldb) target create "/bin/ls"
Current executable set to '/bin/ls' (x86_64).
(lldb) process la -s
Process 56286 stopped
* thread #1, name = 'ls', stop reason = signal SIGSTOP
    frame #0: 0x00007ffff7fe2bc0 ld-linux-x86-64.so.2`_start
ld-linux-x86-64.so.2`_start:
->  0x7ffff7fe2bc0 <+0>: movq   %rsp, %rdi
    0x7ffff7fe2bc3 <+3>: callq  0x7ffff7fe38c0 ; _dl_start

Process 56286 launched: '/bin/ls' (x86_64)
(lldb) image list /bin/ls
[  0] 770647CD 0x0000555555554000 /bin/ls 
(lldb) image lookup -a `0x0000555555554000+144264`
      Address: ls[0x0000000000023388] (ls.PT_LOAD[3]..bss + 264)
      Summary: 

This explains why you'd normally see the sections headers as SHT_NULL, as .bss is zero at the beginning of the program. However, that's not guaranteed. If we attach to the program after it's been running, we'd just be reading random data from the program's bss section, and interpreting that as section headers. This is why I think we should just avoid reading section headers from the in-memory images altogether.

There's just one catch with that -- the VDSO pseudo-module actually does have its sections headers in memory (I don't know how intentional is that -- I think it's mainly a side-effect of it not being loaded normally), and it'd be nice to be able to access those. However, the VDSO is unique enough that I think we can just special case that. For example, we could use the property that its section headers do not overlap with any segments (m_memory_addr + ehdr->e_shoff is beyond the last ELF segment). If that's not enough, we could also check that the address is section headers are within the same memory region as the elf header (the VDSO is loaded as a single page).

TL;DR: What I'm proposing is to skip reading of section headers for in-memory elf files, unless we are dealing with a VDSO. We could tell that we're dealing with the VDSO by looking at the location of the section headers relative to the elf segments.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We actually want this to work for ELF files when we don't have the file and it is only in memory. GPU debugging often has images that are in memory only and we need those section headers. I agree with you that normal binaries loaded by the dynamic loader have this issue, but if we have JIT solutions that produce ELF files in memory, or GPU stuff that will compile things on the fly and point debuggers to ELF files in memory, then we should allow for this somehow.

I guess we will need to modify the SBModule from memory API to take a size so that it can be specified, though this can still be wrong for most linux binaries. My example's section headers were in the dynamic section:

(lldb) image lookup --verbose --address 0x0000555555554000+0x0000000000003f88
      Address: a.out[0x0000000000003f88] (a.out.PT_LOAD[3]..dynamic + 448)
      Summary: a.out`_DYNAMIC + 448
       Module: file = "/home/gclayton/local/args/a.out", arch = "x86_64"
       Symbol: id = {0x00000033}, range = [0x0000555555557dc8-0x0000555555557fd8), name="_DYNAMIC"

Let me know what you think of being able to specify the size of the ELF data in memory as a fix? By default if anyone loads something from memory from say a core file, they shouldn't specify the size of the range and we would avoid reading the section headers. But if they do specify it, then we would try to read them, but only if they are contained within the data buffer for the object file? Ack, neither sounds great.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we need a boolean to specify the object file is fully mapped into memory? Or that the memory image is "complete"? We would then need to specify this when loading the image. JIT and GPU ELF images that are in memory only would use this flag, and ELF core files and loading an image from program memory from the dynamic loader would avoid setting this flag.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having a flag for this would probably be the best solution (I was considering that myself). It basically tells the object file how the file was mapped, and that's something that's better done from the outside. The dynamic loader knows that it is loading a VDSO file, so it could easily set this flag when it is loading it.

I'm not particularly keen on the size argument, as it isn't very well defined. It's not uncommon to have holes in the memory image of an elf file (e.g. to have one chunk mapped from 0x10000->0x15000 and then another in 0x20000->25000). In theory, I think you could even end up mapping a different elf file into a hole in one of the previously mapped files.

That said, I'm not sure if the flag is completely necessary for this. I have a feeling it should still be possible to by looking at the segment data. In both of our examples, the section header offset pointed directly into a PT_LOAD segment. I think that shouldn't be the case for these GPU files (as otherwise, the section headers could be overwritten by some data in the binary). If that's true, then we could condition the reading of the section headers on this. The flag thing would likely still be better though, but I don't know how involved that change would be.

// ELF file, but all section header data has been set to zeroes. Remove any
// SHT_NULL sections if we have more than 1. The first entry in the section
// headers should always be a SHT_NULL section, but none of the others should
// be.
if (section_headers.size() > 1 && section_headers[1].sh_type == SHT_NULL) {
uint64_t null_count = std::count_if(section_headers.begin(),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can these ever be discontiguous? Here we count the whole list for SHT_NULL, but then erase based on that count. Would it be better for us to remove based on a find_if iterator?

section_headers.end(),
[](const ELFSectionHeaderInfo &sh){
return sh.sh_type == SHT_NULL;
});
if (null_count == section_headers.size()) {
// Keep only 1 SHT_NULL section if they were all SHT_NULL types.
section_headers.erase(section_headers.begin() + 1);
}
}

const unsigned strtab_idx = header.e_shstrndx;
if (strtab_idx && strtab_idx < section_headers.size()) {
const ELFSectionHeaderInfo &sheader = section_headers[strtab_idx];
const size_t byte_size = sheader.sh_size;
const Elf64_Off offset = sheader.sh_offset;
lldb_private::DataExtractor shstr_data;

if (shstr_data.SetData(object_data, offset, byte_size) == byte_size) {
if (read_sect(sheader, shstr_data)) {
for (SectionHeaderCollIter I = section_headers.begin();
I != section_headers.end(); ++I) {
static ConstString g_sect_name_gnu_debuglink(".gnu_debuglink");
const ELFSectionHeaderInfo &sheader = *I;
const uint64_t section_size =
sheader.sh_type == SHT_NOBITS ? 0 : sheader.sh_size;
ConstString name(shstr_data.PeekCStr(I->sh_name));

I->section_name = name;

if (arch_spec.IsMIPS()) {
uint32_t arch_flags = arch_spec.GetFlags();
DataExtractor data;
if (sheader.sh_type == SHT_MIPS_ABIFLAGS) {

if (section_size && (data.SetData(object_data, sheader.sh_offset,
section_size) == section_size)) {
DataExtractor data;
if (read_sect(sheader, data)) {
// MIPS ASE Mask is at offset 12 in MIPS.abiflags section
lldb::offset_t offset = 12; // MIPS ABI Flags Version: 0
arch_flags |= data.GetU32(&offset);
Expand Down Expand Up @@ -1565,16 +1600,14 @@ size_t ObjectFileELF::GetSectionHeaderInfo(SectionHeaderColl &section_headers,
if (arch_spec.GetMachine() == llvm::Triple::arm ||
arch_spec.GetMachine() == llvm::Triple::thumb) {
DataExtractor data;

if (sheader.sh_type == SHT_ARM_ATTRIBUTES && section_size != 0 &&
data.SetData(object_data, sheader.sh_offset, section_size) == section_size)
ParseARMAttributes(data, section_size, arch_spec);
if (sheader.sh_type == SHT_ARM_ATTRIBUTES &&
read_sect(sheader, data))
ParseARMAttributes(data, arch_spec);
}

if (name == g_sect_name_gnu_debuglink) {
DataExtractor data;
if (section_size && (data.SetData(object_data, sheader.sh_offset,
section_size) == section_size)) {
if (read_sect(sheader, data)) {
lldb::offset_t gnu_debuglink_offset = 0;
gnu_debuglink_file = data.GetCStr(&gnu_debuglink_offset);
gnu_debuglink_offset = llvm::alignTo(gnu_debuglink_offset, 4);
Expand All @@ -1594,8 +1627,7 @@ size_t ObjectFileELF::GetSectionHeaderInfo(SectionHeaderColl &section_headers,
if (is_note_header) {
// Allow notes to refine module info.
DataExtractor data;
if (section_size && (data.SetData(object_data, sheader.sh_offset,
section_size) == section_size)) {
if (read_sect(sheader, data)) {
Status error = RefineModuleDetailsFromNote(data, arch_spec, uuid);
if (error.Fail()) {
LLDB_LOGF(log, "ObjectFileELF::%s ELF note processing failed: %s",
Expand Down Expand Up @@ -1627,9 +1659,28 @@ ObjectFileELF::StripLinkerSymbolAnnotations(llvm::StringRef symbol_name) const {

// ParseSectionHeaders
size_t ObjectFileELF::ParseSectionHeaders() {
return GetSectionHeaderInfo(m_section_headers, m_data, m_header, m_uuid,
m_gnu_debuglink_file, m_gnu_debuglink_crc,
m_arch_spec);
DataExtractor sh_data = GetSectionHeadersFromELFData(m_header, m_data);
const size_t sh_size = m_header.GetSectionHeaderByteSize();
if (sh_data.GetByteSize() != sh_size) {
if (IsInMemory()) {
// We have a ELF file in process memory, read the program header data from
// the process.
if (ProcessSP process_sp = m_process_wp.lock()) {
const addr_t addr = m_memory_addr + m_header.e_shoff;
if (DataBufferSP data_sp = ReadMemory(process_sp, addr, sh_size))
sh_data = DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
}
}
}
auto read_sect_callback = [&](const elf::ELFSectionHeader &sh,
lldb_private::DataExtractor &sh_data) -> bool {
sh_data = this->GetSectionData(sh);
return sh_data.GetByteSize() == sh.sh_size;
};

return GetSectionHeaderInfo(m_header, sh_data, m_section_headers,
read_sect_callback, m_uuid, m_gnu_debuglink_file,
m_gnu_debuglink_crc, m_arch_spec);
}

const ObjectFileELF::ELFSectionHeaderInfo *
Expand Down Expand Up @@ -3805,6 +3856,27 @@ DataExtractor ObjectFileELF::GetSegmentData(const ELFProgramHeader &H) {
return DataExtractor();
}

DataExtractor ObjectFileELF::GetSectionData(const elf::ELFSectionHeader &sh) {
// Try and read the section contents from our cached m_data which can come
// from the file on disk being mmap'ed or from the initial part of the ELF
// file we read from memory and cached.
DataExtractor data;
if (GetSectionContentsFromELFData(sh, m_data, data))
return data;
if (IsInMemory()) {
// We have a ELF file in process memory, read the program header data from
// the process.
if (ProcessSP process_sp = m_process_wp.lock()) {
const addr_t addr = m_memory_addr + sh.sh_offset;
if (DataBufferSP data_sp = ReadMemory(process_sp, addr, sh.sh_size)) {
if (data_sp->GetByteSize() == sh.sh_size)
return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
}
}
}
return DataExtractor();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we just return data here instead of creating a new extractor?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't want to in case GetSectionContentsFromELFData() added some, but not all data to it.

}

bool ObjectFileELF::AnySegmentHasPhysicalAddress() {
for (const ELFProgramHeader &H : ProgramHeaders()) {
if (H.p_paddr != 0)
Expand Down
44 changes: 39 additions & 5 deletions lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ class ObjectFileELF : public lldb_private::ObjectFile {

llvm::ArrayRef<elf::ELFProgramHeader> ProgramHeaders();
lldb_private::DataExtractor GetSegmentData(const elf::ELFProgramHeader &H);

lldb_private::DataExtractor GetSectionData(const elf::ELFSectionHeader &H);
llvm::StringRef
StripLinkerSymbolAnnotations(llvm::StringRef symbol_name) const override;

Expand Down Expand Up @@ -270,14 +270,48 @@ class ObjectFileELF : public lldb_private::ObjectFile {
lldb::SectionType GetSectionType(const ELFSectionHeaderInfo &H) const;

static void ParseARMAttributes(lldb_private::DataExtractor &data,
uint64_t length,
lldb_private::ArchSpec &arch_spec);

/// Read the section contents for a given section header.
///
/// Section contents are available in the ELF file data if we have all of the
/// data mapped into \a object_data.
///
/// \param[in] sh
/// The section header of the section we want to read.
///
/// \param[in] object_data
/// The object data that contains all of the contiguous bytes which starts
/// with the ELF header and also contains the program headers. This data
/// will contain the full ELF file if the ELF file is read from disk. If
/// the ELF file is read from memory, then it might only contain the ELF
/// header and program header data.
///
/// \param[out] section_data
/// The section data to fill in if we are able to read the section contents
/// completely.
///
/// \return True if we are able to read all of the section data from
/// \a object_data. False otherwise.
static bool GetSectionContentsFromELFData(
const elf::ELFSectionHeader &sh,
const lldb_private::DataExtractor &object_data,
lldb_private::DataExtractor &section_data);

/// Callback that can be used to read the data for a section.
///
/// \return True if the section has a size and all bytes can be read,
/// False otherwise.
using ReadSectionDataCallback =
llvm::function_ref<bool(const elf::ELFSectionHeader &sh,
lldb_private::DataExtractor &data)>;

/// Parses the elf section headers and returns the uuid, debug link name,
/// crc, archspec.
static size_t GetSectionHeaderInfo(SectionHeaderColl &section_headers,
lldb_private::DataExtractor &object_data,
const elf::ELFHeader &header,
static size_t GetSectionHeaderInfo(const elf::ELFHeader &header,
const lldb_private::DataExtractor &sh_data,
SectionHeaderColl &section_headers,
ReadSectionDataCallback read_sect_callback,
lldb_private::UUID &uuid,
std::string &gnu_debuglink_file,
uint32_t &gnu_debuglink_crc,
Expand Down
40 changes: 40 additions & 0 deletions lldb/test/Shell/ObjectFile/ELF/elf-memory.test
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,15 @@
// RUN: -o "script real_module = lldb.target.module[0]" \
// RUN: -o "script base_addr = real_module.GetObjectFileHeaderAddress().GetLoadAddress(lldb.target)" \
// RUN: -o "script mem_module = lldb.SBModule(lldb.process, base_addr)" \
// RUN: -o 'script vsdo_module = [m for m in lldb.target.modules if m.GetFileSpec().fullpath == "[vdso]"][0]' \
// RUN: -o "script target2 = lldb.debugger.CreateTarget('')" \
// RUN: -o "script target2.AddModule(mem_module)" \
// RUN: -o "target select 1" \
// RUN: -o "image dump objfile" \
// RUN: -o "script target3 = lldb.debugger.CreateTarget('')" \
// RUN: -o "script target3.AddModule(vsdo_module)" \
// RUN: -o "target select 2" \
// RUN: -o "image dump objfile" \
// RUN: | FileCheck %s --check-prefix=MAIN --dump-input=always
// MAIN: (lldb) image dump objfile
// MAIN: Dumping headers for 1 module(s).
Expand All @@ -33,6 +38,9 @@
// MAIN: Program Headers
// MAIN: ] PT_DYNAMIC

// Make sure there are no section headers for when they aren't loaded
// MAIN-NOT: Section Headers

// Make sure we see some sections created from the program headers
// MAIN: SectID
// MAIN: PT_LOAD[0]
Expand All @@ -53,3 +61,35 @@
// MAIN-DAG: SYMENT {{0x[0-9a-f]+}}
// MAIN-DAG: DEBUG {{0x[0-9a-f]+}}
// MAIN: NULL {{0x[0-9a-f]+}}

// Watch for the dump of the VSDO image. This should have section headers
// MAIN: ObjectFileELF, file = '', arch = {{.*, addr = 0x[0-9a-f]+}}
// MAIN: ELF Header

// Make sure we find the program headers and see a PT_DYNAMIC entry.
// MAIN: Program Headers
// MAIN: ] PT_DYNAMIC

// Make sure we find the section headers with at least a .text section to
// ensure we found the sections and their names.
// MAIN: Section Headers
// MAIN: .text

// Make sure we see some sections created from the program headers
// MAIN: SectID
// MAIN: PT_LOAD[0]

// Ensure we find some dependent modules as won't find these if we aren't able
// to load the .dynamic section from the PT_DYNAMIC program header.
// MAIN-NOT: Dependent Modules:

// Check for the .dynamic dump and ensure we find all dynamic entries that are
// required to be there and needed to get the .dynstr section and the symbol
// table, and the DT_DEBUG entry to find the list of shared libraries.
// MAIN: .dynamic:
// Make sure we found the .dynstr section by checking for valid strings after NEEDED
// MAIN-DAG: STRTAB {{0x[0-9a-f]+}}
// MAIN-DAG: SYMTAB {{0x[0-9a-f]+}}
// MAIN-DAG: STRSZ {{0x[0-9a-f]+}}
// MAIN-DAG: SYMENT {{0x[0-9a-f]+}}
// MAIN: NULL {{0x[0-9a-f]+}}
Loading