diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index c63c1f03e58da..fec9fdef44df9 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -777,6 +777,7 @@ enum SectionType { eSectionTypeLLDBTypeSummaries, eSectionTypeLLDBFormatters, eSectionTypeSwiftModules, + eSectionTypeWasmName, }; FLAGS_ENUM(EmulateInstructionOptions){ diff --git a/lldb/source/Core/Section.cpp b/lldb/source/Core/Section.cpp index 27dcf987b0278..02d9d86fe5374 100644 --- a/lldb/source/Core/Section.cpp +++ b/lldb/source/Core/Section.cpp @@ -153,6 +153,8 @@ const char *Section::GetTypeAsCString() const { return "lldb-formatters"; case eSectionTypeSwiftModules: return "swift-modules"; + case eSectionTypeWasmName: + return "wasm-name"; case eSectionTypeOther: return "regular"; } @@ -415,6 +417,7 @@ bool Section::ContainsOnlyDebugInfo() const { case eSectionTypeCompactUnwind: case eSectionTypeGoSymtab: case eSectionTypeAbsoluteAddress: + case eSectionTypeWasmName: case eSectionTypeOther: // Used for "__dof_cache" in mach-o or ".debug" for COFF which isn't debug // information that we parse at all. This was causing system files with no diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 13df6e2f26b53..d7cb60e3f0c38 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1156,6 +1156,7 @@ AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) { case eSectionTypeDataObjCMessageRefs: case eSectionTypeDataObjCCFStrings: case eSectionTypeGoSymtab: + case eSectionTypeWasmName: return AddressClass::eData; case eSectionTypeDebug: diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp index b1efd25949379..a489b05acfcb4 100644 --- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp +++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/BinaryFormat/Wasm.h" +#include "llvm/Support/CheckedArithmetic.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Format.h" #include @@ -50,7 +51,8 @@ static bool ValidateModuleHeader(const DataBufferSP &data_sp) { return version == llvm::wasm::WasmVersion; } -static std::optional +// FIXME: Use lldb::DataExtractor instead of llvm::DataExtractor. +static std::optional GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { // A Wasm string is encoded as a vector of UTF-8 codes. // Vectors are encoded with their u32 length followed by the element @@ -72,8 +74,7 @@ GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { return std::nullopt; } - llvm::StringRef str = toStringRef(llvm::ArrayRef(str_storage)); - return ConstString(str); + return std::string(toStringRef(llvm::ArrayRef(str_storage))); } char ObjectFileWasm::ID; @@ -182,7 +183,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) { // identifying the custom section, followed by an uninterpreted sequence // of bytes. lldb::offset_t prev_offset = c.tell(); - std::optional sect_name = GetWasmString(data, c); + std::optional sect_name = GetWasmString(data, c); if (!sect_name) return false; @@ -191,7 +192,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) { uint32_t section_length = payload_len - (c.tell() - prev_offset); m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length, - section_id, *sect_name}); + section_id, ConstString(*sect_name)}); *offset_ptr += (c.tell() + section_length); } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) { m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), @@ -248,12 +249,136 @@ bool ObjectFileWasm::ParseHeader() { return true; } -void ObjectFileWasm::ParseSymtab(Symtab &symtab) {} +static llvm::Expected> +ParseFunctions(SectionSP code_section_sp) { + DataExtractor code_section_data; + code_section_sp->GetSectionData(code_section_data); + lldb::offset_t offset = 0; + + const uint64_t function_count = code_section_data.GetULEB128(&offset); + if (function_count >= std::numeric_limits::max()) + return llvm::createStringError("function count overflows uint32_t"); + + std::vector functions; + functions.reserve(function_count); + + for (uint32_t i = 0; i < function_count; ++i) { + const uint64_t function_size = code_section_data.GetULEB128(&offset); + if (function_size >= std::numeric_limits::max()) + return llvm::createStringError("function size overflows uint32_t"); + // llvm-objdump considers the ULEB with the function size to be part of the + // function. We can't do that here because that would break symbolic + // breakpoints, as that address is never executed. + functions.emplace_back(code_section_sp, offset, function_size); + + std::optional next_offset = + llvm::checkedAddUnsigned(offset, function_size); + if (!next_offset) + return llvm::createStringError("function offset overflows uint64_t"); + offset = *next_offset; + } + + return functions; +} + +static llvm::Expected> +ParseNames(SectionSP name_section_sp, + const std::vector &functions) { + DataExtractor name_section_data; + name_section_sp->GetSectionData(name_section_data); + + llvm::DataExtractor data = name_section_data.GetAsLLVM(); + llvm::DataExtractor::Cursor c(0); + std::vector symbols; + while (c && c.tell() < data.size()) { + const uint8_t type = data.getU8(c); + const uint64_t size = data.getULEB128(c); + if (size >= std::numeric_limits::max()) + return llvm::createStringError("size overflows uint32_t"); + + switch (type) { + case llvm::wasm::WASM_NAMES_FUNCTION: { + const uint64_t count = data.getULEB128(c); + if (count >= std::numeric_limits::max()) + return llvm::createStringError("function count overflows uint32_t"); + + for (uint64_t i = 0; c && i < count; ++i) { + const uint64_t idx = data.getULEB128(c); + const std::optional name = GetWasmString(data, c); + if (!name || idx >= functions.size()) + continue; + symbols.emplace_back( + symbols.size(), Mangled(*name), lldb::eSymbolTypeCode, + /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false, + /*is_artificial=*/false, functions[idx], + /*size_is_valid=*/true, /*contains_linker_annotations=*/false, + /*flags=*/0); + } + } break; + case llvm::wasm::WASM_NAMES_DATA_SEGMENT: + case llvm::wasm::WASM_NAMES_GLOBAL: + case llvm::wasm::WASM_NAMES_LOCAL: + default: + std::optional offset = llvm::checkedAddUnsigned(c.tell(), size); + if (!offset) + return llvm::createStringError("offset overflows uint64_t"); + c.seek(*offset); + } + } + + if (!c) + return c.takeError(); + + return symbols; +} + +void ObjectFileWasm::ParseSymtab(Symtab &symtab) { + assert(m_sections_up && "sections must be parsed"); + Log *log = GetLog(LLDBLog::Object); + + // The name section contains names and indexes. First parse the functions from + // the code section so we can access them by their index. + SectionSP code_section_sp = + m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false); + if (!code_section_sp) { + LLDB_LOG(log, "Failed to parse Wasm symbol table: no functions section"); + return; + } + + llvm::Expected> functions = + ParseFunctions(code_section_sp); + if (!functions) { + LLDB_LOG_ERROR(log, functions.takeError(), + "Failed to parse Wasm functions: {0}"); + return; + } + + // Parse the name section. + SectionSP name_section_sp = + m_sections_up->FindSectionByType(lldb::eSectionTypeWasmName, false); + if (!name_section_sp) { + LLDB_LOG(log, "Failed to parse Wasm symbol table: no names section"); + return; + } + + llvm::Expected> symbols = + ParseNames(name_section_sp, *functions); + if (!symbols) { + LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: {0}"); + return; + } + + for (const Symbol &symbol : *symbols) + symtab.AddSymbol(symbol); + + symtab.Finalize(); +} static SectionType GetSectionTypeFromName(llvm::StringRef Name) { - if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) { + if (Name == "name") + return lldb::eSectionTypeWasmName; + if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) return ObjectFile::GetDWARFSectionTypeFromName(Name); - } return eSectionTypeOther; } @@ -397,9 +522,9 @@ std::optional ObjectFileWasm::GetExternalDebugInfoFileSpec() { ReadImageData(sect_info.offset, kBufferSize); llvm::DataExtractor data = section_header_data.GetAsLLVM(); llvm::DataExtractor::Cursor c(0); - std::optional symbols_url = GetWasmString(data, c); + std::optional symbols_url = GetWasmString(data, c); if (symbols_url) - return FileSpec(symbols_url->GetStringRef()); + return FileSpec(*symbols_url); } } return std::nullopt; diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp index 21daf7476b522..7efce2a035505 100644 --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -379,6 +379,7 @@ AddressClass ObjectFile::GetAddressClass(addr_t file_addr) { case eSectionTypeELFDynamicSymbols: case eSectionTypeELFRelocationEntries: case eSectionTypeELFDynamicLinkInfo: + case eSectionTypeWasmName: case eSectionTypeOther: return AddressClass::eUnknown; case eSectionTypeAbsoluteAddress: diff --git a/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml b/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml new file mode 100644 index 0000000000000..165bb53662f40 --- /dev/null +++ b/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml @@ -0,0 +1,210 @@ +--- !WASM +FileHeader: + Version: 0x1 +Sections: + - Type: TYPE + Signatures: + - Index: 0 + ParamTypes: [] + ReturnTypes: [] + - Index: 1 + ParamTypes: + - I32 + - I32 + ReturnTypes: + - I32 + - Index: 2 + ParamTypes: [] + ReturnTypes: + - I32 + - Type: FUNCTION + FunctionTypes: [ 0, 1, 2, 1 ] + - Type: TABLE + Tables: + - Index: 0 + ElemType: FUNCREF + Limits: + Flags: [ HAS_MAX ] + Minimum: 0x1 + Maximum: 0x1 + - Type: MEMORY + Memories: + - Minimum: 0x2 + - Type: GLOBAL + Globals: + - Index: 0 + Type: I32 + Mutable: true + InitExpr: + Opcode: I32_CONST + Value: 66560 + - Index: 1 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 1024 + - Index: 2 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 1024 + - Index: 3 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 1024 + - Index: 4 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 66560 + - Index: 5 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 1024 + - Index: 6 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 66560 + - Index: 7 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 131072 + - Index: 8 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 0 + - Index: 9 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 1 + - Index: 10 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 65536 + - Type: EXPORT + Exports: + - Name: memory + Kind: MEMORY + Index: 0 + - Name: __wasm_call_ctors + Kind: FUNCTION + Index: 0 + - Name: add + Kind: FUNCTION + Index: 1 + - Name: __original_main + Kind: FUNCTION + Index: 2 + - Name: main + Kind: FUNCTION + Index: 3 + - Name: __main_void + Kind: FUNCTION + Index: 2 + - Name: __indirect_function_table + Kind: TABLE + Index: 0 + - Name: __dso_handle + Kind: GLOBAL + Index: 1 + - Name: __data_end + Kind: GLOBAL + Index: 2 + - Name: __stack_low + Kind: GLOBAL + Index: 3 + - Name: __stack_high + Kind: GLOBAL + Index: 4 + - Name: __global_base + Kind: GLOBAL + Index: 5 + - Name: __heap_base + Kind: GLOBAL + Index: 6 + - Name: __heap_end + Kind: GLOBAL + Index: 7 + - Name: __memory_base + Kind: GLOBAL + Index: 8 + - Name: __table_base + Kind: GLOBAL + Index: 9 + - Name: __wasm_first_page_end + Kind: GLOBAL + Index: 10 + - Type: CODE + Functions: + - Index: 0 + Locals: [] + Body: 0B + - Index: 1 + Locals: + - Type: I32 + Count: 1 + Body: 23808080800041106B21022002200036020C20022001360208200228020C20022802086A0F0B + - Index: 2 + Locals: + - Type: I32 + Count: 2 + Body: 23808080800041106B210020002480808080002000410036020C2000410136020820004102360204200028020820002802041081808080002101200041106A24808080800020010F0B + - Index: 3 + Locals: [] + Body: 1082808080000F0B + - Type: CUSTOM + Name: name + FunctionNames: + - Index: 0 + Name: __wasm_call_ctors + - Index: 1 + Name: add + - Index: 2 + Name: __original_main + - Index: 3 + Name: main + GlobalNames: + - Index: 0 + Name: __stack_pointer + - Type: CUSTOM + Name: producers + Tools: + - Name: clang + Version: '22.0.0git' + - Type: CUSTOM + Name: target_features + Features: + - Prefix: USED + Name: bulk-memory + - Prefix: USED + Name: bulk-memory-opt + - Prefix: USED + Name: call-indirect-overlong + - Prefix: USED + Name: multivalue + - Prefix: USED + Name: mutable-globals + - Prefix: USED + Name: nontrapping-fptoint + - Prefix: USED + Name: reference-types + - Prefix: USED + Name: sign-ext +... diff --git a/lldb/test/Shell/Symtab/symtab-wasm.test b/lldb/test/Shell/Symtab/symtab-wasm.test new file mode 100644 index 0000000000000..fc185cd81a0ec --- /dev/null +++ b/lldb/test/Shell/Symtab/symtab-wasm.test @@ -0,0 +1,7 @@ +# RUN: yaml2obj %S/Inputs/simple.wasm.yaml -o %t.wasm +# RUN: %lldb %t.wasm -o 'image dump symtab' + +# CHECK: Code 0x0000000000000002 {{.*}} __wasm_call_ctors +# CHECK: Code 0x0000000000000005 {{.*}} add +# CHECK: Code 0x000000000000002f {{.*}} __original_main +# CHECK: Code 0x000000000000007c {{.*}} main