diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp index 777b20e9bb0f6..492b441867205 100644 --- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp +++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp @@ -71,6 +71,47 @@ GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { return std::string(toStringRef(llvm::ArrayRef(str_storage))); } +/// An "init expr" refers to a constant expression used to determine the initial +/// value of certain elements within a module during instantiation. These +/// expressions are restricted to operations that can be evaluated at module +/// instantiation time. Currently we only support simple constant opcodes. +static lldb::offset_t GetWasmOffsetFromInitExpr(DataExtractor &data, + lldb::offset_t &offset) { + lldb::offset_t init_expr_offset = LLDB_INVALID_OFFSET; + + uint8_t opcode = data.GetU8(&offset); + switch (opcode) { + case llvm::wasm::WASM_OPCODE_I32_CONST: + case llvm::wasm::WASM_OPCODE_I64_CONST: + init_expr_offset = data.GetSLEB128(&offset); + break; + case llvm::wasm::WASM_OPCODE_GLOBAL_GET: + init_expr_offset = data.GetULEB128(&offset); + break; + case llvm::wasm::WASM_OPCODE_F32_CONST: + case llvm::wasm::WASM_OPCODE_F64_CONST: + // Not a meaningful offset. + data.GetFloat(&offset); + break; + case llvm::wasm::WASM_OPCODE_REF_NULL: + // Not a meaningful offset. + data.GetULEB128(&offset); + break; + } + + // Make sure the opcodes we read aren't part of an extended init expr. + opcode = data.GetU8(&offset); + if (opcode == llvm::wasm::WASM_OPCODE_END) + return init_expr_offset; + + // Extended init expressions are not supported, but we still have to parse + // them to skip over them and read the next segment. + do { + opcode = data.GetU8(&offset); + } while (opcode != llvm::wasm::WASM_OPCODE_END); + return LLDB_INVALID_OFFSET; +} + /// Checks whether the data buffer starts with a valid Wasm module header. static bool ValidateModuleHeader(const DataBufferSP &data_sp) { if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize) @@ -261,17 +302,20 @@ bool ObjectFileWasm::ParseHeader() { return true; } -static llvm::Expected> -ParseFunctions(SectionSP code_section_sp) { - DataExtractor data; - code_section_sp->GetSectionData(data); +struct WasmFunction { + lldb::offset_t section_offset = LLDB_INVALID_OFFSET; + uint32_t size = 0; +}; + +static llvm::Expected> +ParseFunctions(DataExtractor &data) { lldb::offset_t offset = 0; llvm::Expected function_count = GetULEB32(data, offset); if (!function_count) return function_count.takeError(); - std::vector functions; + std::vector functions; functions.reserve(*function_count); for (uint32_t i = 0; i < *function_count; ++i) { @@ -281,7 +325,7 @@ ParseFunctions(SectionSP code_section_sp) { // llvm-objdump considers the ULEB with the function size to be part of the // function. We can't do that here because that would break symbolic // breakpoints, as that address is never executed. - functions.emplace_back(code_section_sp, offset, *function_size); + functions.push_back({offset, *function_size}); std::optional next_offset = llvm::checkedAddUnsigned(offset, *function_size); @@ -294,17 +338,22 @@ ParseFunctions(SectionSP code_section_sp) { } struct WasmSegment { - WasmSegment(SectionSP section_sp, lldb::offset_t offset, uint32_t size) - : address_range(section_sp, offset, size) {}; + enum SegmentType { + Active, + Passive, + }; + std::string name; - AddressRange address_range; -}; + SegmentType type = Passive; + lldb::offset_t section_offset = LLDB_INVALID_OFFSET; + uint32_t size = 0; + uint32_t memory_index = 0; + lldb::offset_t init_expr_offset = 0; -static llvm::Expected> -ParseData(SectionSP data_section_sp) { - DataExtractor data; - data_section_sp->GetSectionData(data); + lldb::offset_t GetFileOffset() const { return section_offset & 0xffffffff; } +}; +static llvm::Expected> ParseData(DataExtractor &data) { lldb::offset_t offset = 0; llvm::Expected segment_count = GetULEB32(data, offset); @@ -319,27 +368,34 @@ ParseData(SectionSP data_section_sp) { if (!flags) return flags.takeError(); + WasmSegment segment; + // Data segments have a mode that identifies them as either passive or // active. An active data segment copies its contents into a memory during // instantiation, as specified by a memory index and a constant expression // defining an offset into that memory. + segment.type = (*flags & llvm::wasm::WASM_DATA_SEGMENT_IS_PASSIVE) + ? WasmSegment::Passive + : WasmSegment::Active; + if (*flags & llvm::wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX) { + assert(segment.type == WasmSegment::Active); llvm::Expected memidx = GetULEB32(data, offset); if (!memidx) return memidx.takeError(); + segment.memory_index = *memidx; } - if ((*flags & llvm::wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) { - // Skip over the constant expression. - for (uint8_t b = 0; b != llvm::wasm::WASM_OPCODE_END;) - b = data.GetU8(&offset); - } + if (segment.type == WasmSegment::Active) + segment.init_expr_offset = GetWasmOffsetFromInitExpr(data, offset); llvm::Expected segment_size = GetULEB32(data, offset); if (!segment_size) return segment_size.takeError(); - segments.emplace_back(data_section_sp, offset, *segment_size); + segment.section_offset = offset; + segment.size = *segment_size; + segments.push_back(segment); std::optional next_offset = llvm::checkedAddUnsigned(offset, *segment_size); @@ -352,13 +408,11 @@ ParseData(SectionSP data_section_sp) { } static llvm::Expected> -ParseNames(SectionSP name_section_sp, - const std::vector &function_ranges, +ParseNames(SectionSP code_section_sp, DataExtractor &name_data, + const std::vector &functions, std::vector &segments) { - DataExtractor name_section_data; - name_section_sp->GetSectionData(name_section_data); - llvm::DataExtractor data = name_section_data.GetAsLLVM(); + llvm::DataExtractor data = name_data.GetAsLLVM(); llvm::DataExtractor::Cursor c(0); std::vector symbols; while (c && c.tell() < data.size()) { @@ -380,12 +434,13 @@ ParseNames(SectionSP name_section_sp, llvm::Expected name = GetWasmString(data, c); if (!name) return name.takeError(); - if (*idx >= function_ranges.size()) + if (*idx >= functions.size()) continue; symbols.emplace_back( - symbols.size(), Mangled(*name), lldb::eSymbolTypeCode, + symbols.size(), *name, lldb::eSymbolTypeCode, /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false, - /*is_artificial=*/false, function_ranges[*idx], + /*is_artificial=*/false, code_section_sp, + functions[i].section_offset, functions[i].size, /*size_is_valid=*/true, /*contains_linker_annotations=*/false, /*flags=*/0); } @@ -405,12 +460,6 @@ ParseNames(SectionSP name_section_sp, continue; // Update the segment name. segments[i].name = *name; - symbols.emplace_back( - symbols.size(), Mangled(*name), lldb::eSymbolTypeData, - /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false, - /*is_artificial=*/false, segments[i].address_range, - /*size_is_valid=*/true, /*contains_linker_annotations=*/false, - /*flags=*/0); } } break; @@ -432,80 +481,11 @@ ParseNames(SectionSP name_section_sp, } void ObjectFileWasm::ParseSymtab(Symtab &symtab) { - assert(m_sections_up && "sections must be parsed"); - Log *log = GetLog(LLDBLog::Object); - - // The name section contains names and indexes. First parse the data from the - // relevant sections so we can access it by its index. - std::vector functions; - std::vector segments; - - // Parse the code section. - if (SectionSP code_section_sp = - m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false)) { - llvm::Expected> maybe_functions = - ParseFunctions(code_section_sp); - if (!maybe_functions) { - LLDB_LOG_ERROR(log, maybe_functions.takeError(), - "Failed to parse Wasm code section: {0}"); - return; - } - functions = *maybe_functions; - } - - // Parse the data section. - SectionSP data_section_sp = - m_sections_up->FindSectionByType(lldb::eSectionTypeData, false); - if (data_section_sp) { - llvm::Expected> maybe_segments = - ParseData(data_section_sp); - if (!maybe_segments) { - LLDB_LOG_ERROR(log, maybe_segments.takeError(), - "Failed to parse Wasm data section: {0}"); - return; - } - segments = *maybe_segments; - } - - // Parse the name section. - SectionSP name_section_sp = - m_sections_up->FindSectionByType(lldb::eSectionTypeWasmName, false); - if (!name_section_sp) { - LLDB_LOG(log, "Failed to parse Wasm symbol table: no names section"); - return; - } - - llvm::Expected> symbols = - ParseNames(name_section_sp, functions, segments); - if (!symbols) { - LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: {0}"); - return; - } - - for (const Symbol &symbol : *symbols) + for (const Symbol &symbol : m_symbols) symtab.AddSymbol(symbol); - lldb::user_id_t segment_id = 0; - for (const WasmSegment &segment : segments) { - const lldb::addr_t segment_addr = - segment.address_range.GetBaseAddress().GetFileAddress(); - const size_t segment_size = segment.address_range.GetByteSize(); - SectionSP segment_sp = std::make_shared
( - /*parent_section_sp=*/data_section_sp, GetModule(), - /*obj_file=*/this, - ++segment_id << 8, // 1-based segment index, shifted by 8 bits to avoid - // collision with section IDs. - ConstString(segment.name), eSectionTypeData, - /*file_vm_addr=*/segment_addr, - /*vm_size=*/segment_size, - /*file_offset=*/segment_addr, - /*file_size=*/segment_size, - /*log2align=*/0, /*flags=*/0); - m_sections_up->AddSection(segment_sp); - GetModule()->GetSectionList()->AddSection(segment_sp); - } - symtab.Finalize(); + m_symbols.clear(); } static SectionType GetSectionTypeFromName(llvm::StringRef Name) { @@ -516,7 +496,27 @@ static SectionType GetSectionTypeFromName(llvm::StringRef Name) { return eSectionTypeOther; } +std::optional +ObjectFileWasm::GetSectionInfo(uint32_t section_id) { + for (const section_info §_info : m_sect_infos) { + if (sect_info.id == section_id) + return sect_info; + } + return std::nullopt; +} + +std::optional +ObjectFileWasm::GetSectionInfo(llvm::StringRef section_name) { + for (const section_info §_info : m_sect_infos) { + if (sect_info.name == section_name) + return sect_info; + } + return std::nullopt; +} + void ObjectFileWasm::CreateSections(SectionList &unified_section_list) { + Log *log = GetLog(LLDBLog::Object); + if (m_sections_up) return; @@ -530,7 +530,7 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) { SectionType section_type = eSectionTypeOther; ConstString section_name; offset_t file_offset = sect_info.offset & 0xffffffff; - addr_t vm_addr = file_offset; + addr_t vm_addr = sect_info.offset; size_t vm_size = sect_info.size; if (llvm::wasm::WASM_SEC_CODE == sect_info.id) { @@ -542,9 +542,6 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) { // For this reason Section::GetFileAddress() must return zero for the // Code section. vm_addr = 0; - } else if (llvm::wasm::WASM_SEC_DATA == sect_info.id) { - section_type = eSectionTypeData; - section_name = ConstString("data"); } else { section_type = GetSectionTypeFromName(sect_info.name.GetStringRef()); if (section_type == eSectionTypeOther) @@ -556,23 +553,107 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) { } } - SectionSP section_sp( - new Section(GetModule(), // Module to which this section belongs. - this, // ObjectFile to which this section belongs and - // should read section data from. - section_type, // Section ID. - section_name, // Section name. - section_type, // Section type. - vm_addr, // VM address. - vm_size, // VM size in bytes of this section. - file_offset, // Offset of this section in the file. - sect_info.size, // Size of the section as found in the file. - 0, // Alignment of the section - 0, // Flags for this section. - 1)); // Number of host bytes per target byte + SectionSP section_sp = std::make_shared
( + GetModule(), // Module to which this section belongs. + this, // ObjectFile to which this section belongs and + // should read section data from. + section_type, // Section ID. + section_name, // Section name. + section_type, // Section type. + vm_addr, // VM address. + vm_size, // VM size in bytes of this section. + file_offset, // Offset of this section in the file. + sect_info.size, // Size of the section as found in the file. + 0, // Alignment of the section + 0, // Flags for this section. + 1); // Number of host bytes per target byte m_sections_up->AddSection(section_sp); unified_section_list.AddSection(section_sp); } + + // The name section contains names and indexes. First parse the data from the + // relevant sections so we can access it by its index. + std::vector functions; + std::vector segments; + + // Parse the code section. + if (std::optional info = + GetSectionInfo(llvm::wasm::WASM_SEC_CODE)) { + DataExtractor code_data = ReadImageData(info->offset, info->size); + llvm::Expected> maybe_functions = + ParseFunctions(code_data); + if (!maybe_functions) { + LLDB_LOG_ERROR(log, maybe_functions.takeError(), + "Failed to parse Wasm code section: {0}"); + } else { + functions = *maybe_functions; + } + } + + // Parse the data section. + std::optional data_info = + GetSectionInfo(llvm::wasm::WASM_SEC_DATA); + if (data_info) { + DataExtractor data_data = ReadImageData(data_info->offset, data_info->size); + llvm::Expected> maybe_segments = + ParseData(data_data); + if (!maybe_segments) { + LLDB_LOG_ERROR(log, maybe_segments.takeError(), + "Failed to parse Wasm data section: {0}"); + } else { + segments = *maybe_segments; + } + } + + if (std::optional info = GetSectionInfo("name")) { + DataExtractor names_data = ReadImageData(info->offset, info->size); + llvm::Expected> symbols = ParseNames( + m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false), + names_data, functions, segments); + if (!symbols) { + LLDB_LOG_ERROR(log, symbols.takeError(), + "Failed to parse Wasm names: {0}"); + } else { + m_symbols = *symbols; + } + } + + lldb::user_id_t segment_id = 0; + for (const WasmSegment &segment : segments) { + if (segment.type == WasmSegment::Active) { + // FIXME: Support segments with a memory index. + if (segment.memory_index != 0) { + LLDB_LOG(log, "Skipping segment {0}: non-zero memory index is " + "currently unsupported"); + continue; + } + + if (segment.init_expr_offset == LLDB_INVALID_OFFSET) { + LLDB_LOG(log, "Skipping segment {0}: unsupported init expression"); + continue; + } + } + + const lldb::addr_t file_vm_addr = + segment.type == WasmSegment::Active + ? segment.init_expr_offset + : data_info->offset + segment.section_offset; + const lldb::offset_t file_offset = + data_info->GetFileOffset() + segment.GetFileOffset(); + SectionSP segment_sp = std::make_shared
( + GetModule(), + /*obj_file=*/this, + ++segment_id << 8, // 1-based segment index, shifted by 8 bits to avoid + // collision with section IDs. + ConstString(segment.name), eSectionTypeData, + /*file_vm_addr=*/file_vm_addr, + /*vm_size=*/segment.size, + /*file_offset=*/file_offset, + /*file_size=*/segment.size, + /*log2align=*/0, /*flags=*/0); + m_sections_up->AddSection(segment_sp); + GetModule()->GetSectionList()->AddSection(segment_sp); + } } bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address, @@ -697,7 +778,7 @@ void ObjectFileWasm::Dump(Stream *s) { } void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream, - const section_info_t &sh) { + const section_info &sh) { ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " " << llvm::format_hex(sh.offset, 10) << " " << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6) diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h index 531b5f0437a43..86ecbf26803cf 100644 --- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h +++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h @@ -128,20 +128,25 @@ class ObjectFileWasm : public ObjectFile { /// Read a range of bytes from the Wasm module. DataExtractor ReadImageData(lldb::offset_t offset, uint32_t size); - typedef struct section_info { + struct section_info { lldb::offset_t offset; uint32_t size; uint32_t id; ConstString name; - } section_info_t; + lldb::offset_t GetFileOffset() const { return offset & 0xffffffff; } + }; + + std::optional GetSectionInfo(uint32_t section_id); + std::optional GetSectionInfo(llvm::StringRef section_name); /// Wasm section header dump routines. /// \{ - void DumpSectionHeader(llvm::raw_ostream &ostream, const section_info_t &sh); + void DumpSectionHeader(llvm::raw_ostream &ostream, const section_info &sh); void DumpSectionHeaders(llvm::raw_ostream &ostream); /// \} - std::vector m_sect_infos; + std::vector m_sect_infos; + std::vector m_symbols; ArchSpec m_arch; UUID m_uuid; }; diff --git a/lldb/test/Shell/Symtab/symtab-wasm.test b/lldb/test/Shell/Symtab/symtab-wasm.test index 4170d9aba9eea..524691b897322 100644 --- a/lldb/test/Shell/Symtab/symtab-wasm.test +++ b/lldb/test/Shell/Symtab/symtab-wasm.test @@ -1,15 +1,16 @@ # RUN: yaml2obj %S/Inputs/simple.wasm.yaml -o %t.wasm -# RUN: %lldb %t.wasm -o 'image dump symtab' -o 'image dump sections' | FileCheck %s -CHECK: Code 0x0000000000000002 0x0000000000000002 0x00000000 __wasm_call_ctors -CHECK: Code 0x0000000000000005 0x0000000000000029 0x00000000 add -CHECK: Code 0x000000000000002f 0x000000000000004c 0x00000000 __original_main -CHECK: Code 0x000000000000007c 0x0000000000000009 0x00000000 main -CHECK: Data 0x0000000000000233 0x0000000000000009 0x00000000 .rodata -CHECK: Data 0x0000000000000242 0x0000000000000004 0x00000000 .data +# RUN: %lldb %t.wasm -o 'image dump symtab' | FileCheck %s --check-prefix SYMTAB +SYMTAB: Code 0x0000000000000002 0x0000000000000002 0x00000000 __wasm_call_ctors +SYMTAB: Code 0x0000000000000005 0x0000000000000029 0x00000000 add +SYMTAB: Code 0x000000000000002f 0x000000000000004c 0x00000000 __original_main +SYMTAB: Code 0x000000000000007c 0x0000000000000009 0x00000000 main -CHECK: 0x0000000000000001 code {{.*}} 0x000001a1 0x00000085 0x00000000 symtab-wasm.test.tmp.wasm.code -CHECK: 0x0000000000000003 data {{.*}} 0x0000022c 0x0000001a 0x00000000 symtab-wasm.test.tmp.wasm.data -CHECK: 0x0000000000000040 wasm-name {{.*}} 0x00000251 0x00000059 0x00000000 symtab-wasm.test.tmp.wasm.name -CHECK: 0x0000000000000100 data {{.*}} 0x00000233 0x00000009 0x00000000 symtab-wasm.test.tmp.wasm.data..rodata -CHECK: 0x0000000000000200 data {{.*}} 0x00000242 0x00000004 0x00000000 symtab-wasm.test.tmp.wasm.data..data +# RUN: %lldb %t.wasm -o 'image dump sections' | FileCheck %s --check-prefix SECTIONS +SECTIONS: 0x0000000000000001 code [0x0000000000000000-0x0000000000000085) --- 0x000001a1 0x00000085 0x00000000 symtab-wasm.test.tmp.wasm.code +SECTIONS: 0x0000000000000040 wasm-name --- 0x00000251 0x00000059 0x00000000 symtab-wasm.test.tmp.wasm.name +SECTIONS: 0x0000000000000100 data [0x0000000000000400-0x0000000000000409) --- 0x00000233 0x00000009 0x00000000 symtab-wasm.test.tmp.wasm..rodata +SECTIONS: 0x0000000000000200 data [0x000000000000040c-0x0000000000000410) --- 0x00000242 0x00000004 0x00000000 symtab-wasm.test.tmp.wasm..data + +# RUN: %lldb %t.wasm -o 'x/s 0x0000000000000400' | FileCheck %s --check-prefix STR +STR: "data str"