Skip to content

Commit 5be2063

Browse files
authored
[lldb] Support parsing the Wasm symbol table (#153093)
This PR adds support for parsing the WebAssembly symbol table. The symbol table is encoded in the "names" section and contains names and indexes into other sections. For now we only support parsing function (code) symbols. The result is that you can set breakpoints by symbol name, while previously breakpoints by name required debug info (DWARF). This is also necessary for Swift, which checks for the presence of `swift_release` as a heuristic to determine if there's a static Swift stdlib.
1 parent 2e9944a commit 5be2063

File tree

7 files changed

+358
-10
lines changed

7 files changed

+358
-10
lines changed

lldb/include/lldb/lldb-enumerations.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,7 @@ enum SectionType {
777777
eSectionTypeLLDBTypeSummaries,
778778
eSectionTypeLLDBFormatters,
779779
eSectionTypeSwiftModules,
780+
eSectionTypeWasmName,
780781
};
781782

782783
FLAGS_ENUM(EmulateInstructionOptions){

lldb/source/Core/Section.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,8 @@ const char *Section::GetTypeAsCString() const {
153153
return "lldb-formatters";
154154
case eSectionTypeSwiftModules:
155155
return "swift-modules";
156+
case eSectionTypeWasmName:
157+
return "wasm-name";
156158
case eSectionTypeOther:
157159
return "regular";
158160
}
@@ -415,6 +417,7 @@ bool Section::ContainsOnlyDebugInfo() const {
415417
case eSectionTypeCompactUnwind:
416418
case eSectionTypeGoSymtab:
417419
case eSectionTypeAbsoluteAddress:
420+
case eSectionTypeWasmName:
418421
case eSectionTypeOther:
419422
// Used for "__dof_cache" in mach-o or ".debug" for COFF which isn't debug
420423
// information that we parse at all. This was causing system files with no

lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,7 @@ AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) {
11561156
case eSectionTypeDataObjCMessageRefs:
11571157
case eSectionTypeDataObjCCFStrings:
11581158
case eSectionTypeGoSymtab:
1159+
case eSectionTypeWasmName:
11591160
return AddressClass::eData;
11601161

11611162
case eSectionTypeDebug:

lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp

Lines changed: 135 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/ADT/StringRef.h"
2323
#include "llvm/BinaryFormat/Magic.h"
2424
#include "llvm/BinaryFormat/Wasm.h"
25+
#include "llvm/Support/CheckedArithmetic.h"
2526
#include "llvm/Support/Endian.h"
2627
#include "llvm/Support/Format.h"
2728
#include <optional>
@@ -50,7 +51,8 @@ static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
5051
return version == llvm::wasm::WasmVersion;
5152
}
5253

53-
static std::optional<ConstString>
54+
// FIXME: Use lldb::DataExtractor instead of llvm::DataExtractor.
55+
static std::optional<std::string>
5456
GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
5557
// A Wasm string is encoded as a vector of UTF-8 codes.
5658
// Vectors are encoded with their u32 length followed by the element
@@ -72,8 +74,7 @@ GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
7274
return std::nullopt;
7375
}
7476

75-
llvm::StringRef str = toStringRef(llvm::ArrayRef(str_storage));
76-
return ConstString(str);
77+
return std::string(toStringRef(llvm::ArrayRef(str_storage)));
7778
}
7879

7980
char ObjectFileWasm::ID;
@@ -182,7 +183,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
182183
// identifying the custom section, followed by an uninterpreted sequence
183184
// of bytes.
184185
lldb::offset_t prev_offset = c.tell();
185-
std::optional<ConstString> sect_name = GetWasmString(data, c);
186+
std::optional<std::string> sect_name = GetWasmString(data, c);
186187
if (!sect_name)
187188
return false;
188189

@@ -191,7 +192,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
191192

192193
uint32_t section_length = payload_len - (c.tell() - prev_offset);
193194
m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,
194-
section_id, *sect_name});
195+
section_id, ConstString(*sect_name)});
195196
*offset_ptr += (c.tell() + section_length);
196197
} else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {
197198
m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),
@@ -248,12 +249,136 @@ bool ObjectFileWasm::ParseHeader() {
248249
return true;
249250
}
250251

251-
void ObjectFileWasm::ParseSymtab(Symtab &symtab) {}
252+
static llvm::Expected<std::vector<AddressRange>>
253+
ParseFunctions(SectionSP code_section_sp) {
254+
DataExtractor code_section_data;
255+
code_section_sp->GetSectionData(code_section_data);
256+
lldb::offset_t offset = 0;
257+
258+
const uint64_t function_count = code_section_data.GetULEB128(&offset);
259+
if (function_count >= std::numeric_limits<uint32_t>::max())
260+
return llvm::createStringError("function count overflows uint32_t");
261+
262+
std::vector<AddressRange> functions;
263+
functions.reserve(function_count);
264+
265+
for (uint32_t i = 0; i < function_count; ++i) {
266+
const uint64_t function_size = code_section_data.GetULEB128(&offset);
267+
if (function_size >= std::numeric_limits<uint32_t>::max())
268+
return llvm::createStringError("function size overflows uint32_t");
269+
// llvm-objdump considers the ULEB with the function size to be part of the
270+
// function. We can't do that here because that would break symbolic
271+
// breakpoints, as that address is never executed.
272+
functions.emplace_back(code_section_sp, offset, function_size);
273+
274+
std::optional<lldb::offset_t> next_offset =
275+
llvm::checkedAddUnsigned(offset, function_size);
276+
if (!next_offset)
277+
return llvm::createStringError("function offset overflows uint64_t");
278+
offset = *next_offset;
279+
}
280+
281+
return functions;
282+
}
283+
284+
static llvm::Expected<std::vector<Symbol>>
285+
ParseNames(SectionSP name_section_sp,
286+
const std::vector<AddressRange> &functions) {
287+
DataExtractor name_section_data;
288+
name_section_sp->GetSectionData(name_section_data);
289+
290+
llvm::DataExtractor data = name_section_data.GetAsLLVM();
291+
llvm::DataExtractor::Cursor c(0);
292+
std::vector<Symbol> symbols;
293+
while (c && c.tell() < data.size()) {
294+
const uint8_t type = data.getU8(c);
295+
const uint64_t size = data.getULEB128(c);
296+
if (size >= std::numeric_limits<uint32_t>::max())
297+
return llvm::createStringError("size overflows uint32_t");
298+
299+
switch (type) {
300+
case llvm::wasm::WASM_NAMES_FUNCTION: {
301+
const uint64_t count = data.getULEB128(c);
302+
if (count >= std::numeric_limits<uint32_t>::max())
303+
return llvm::createStringError("function count overflows uint32_t");
304+
305+
for (uint64_t i = 0; c && i < count; ++i) {
306+
const uint64_t idx = data.getULEB128(c);
307+
const std::optional<std::string> name = GetWasmString(data, c);
308+
if (!name || idx >= functions.size())
309+
continue;
310+
symbols.emplace_back(
311+
symbols.size(), Mangled(*name), lldb::eSymbolTypeCode,
312+
/*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
313+
/*is_artificial=*/false, functions[idx],
314+
/*size_is_valid=*/true, /*contains_linker_annotations=*/false,
315+
/*flags=*/0);
316+
}
317+
} break;
318+
case llvm::wasm::WASM_NAMES_DATA_SEGMENT:
319+
case llvm::wasm::WASM_NAMES_GLOBAL:
320+
case llvm::wasm::WASM_NAMES_LOCAL:
321+
default:
322+
std::optional<uint64_t> offset = llvm::checkedAddUnsigned(c.tell(), size);
323+
if (!offset)
324+
return llvm::createStringError("offset overflows uint64_t");
325+
c.seek(*offset);
326+
}
327+
}
328+
329+
if (!c)
330+
return c.takeError();
331+
332+
return symbols;
333+
}
334+
335+
void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
336+
assert(m_sections_up && "sections must be parsed");
337+
Log *log = GetLog(LLDBLog::Object);
338+
339+
// The name section contains names and indexes. First parse the functions from
340+
// the code section so we can access them by their index.
341+
SectionSP code_section_sp =
342+
m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false);
343+
if (!code_section_sp) {
344+
LLDB_LOG(log, "Failed to parse Wasm symbol table: no functions section");
345+
return;
346+
}
347+
348+
llvm::Expected<std::vector<AddressRange>> functions =
349+
ParseFunctions(code_section_sp);
350+
if (!functions) {
351+
LLDB_LOG_ERROR(log, functions.takeError(),
352+
"Failed to parse Wasm functions: {0}");
353+
return;
354+
}
355+
356+
// Parse the name section.
357+
SectionSP name_section_sp =
358+
m_sections_up->FindSectionByType(lldb::eSectionTypeWasmName, false);
359+
if (!name_section_sp) {
360+
LLDB_LOG(log, "Failed to parse Wasm symbol table: no names section");
361+
return;
362+
}
363+
364+
llvm::Expected<std::vector<Symbol>> symbols =
365+
ParseNames(name_section_sp, *functions);
366+
if (!symbols) {
367+
LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: {0}");
368+
return;
369+
}
370+
371+
for (const Symbol &symbol : *symbols)
372+
symtab.AddSymbol(symbol);
373+
374+
symtab.Finalize();
375+
}
252376

253377
static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
254-
if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) {
378+
if (Name == "name")
379+
return lldb::eSectionTypeWasmName;
380+
if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_"))
255381
return ObjectFile::GetDWARFSectionTypeFromName(Name);
256-
}
257382
return eSectionTypeOther;
258383
}
259384

@@ -397,9 +522,9 @@ std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
397522
ReadImageData(sect_info.offset, kBufferSize);
398523
llvm::DataExtractor data = section_header_data.GetAsLLVM();
399524
llvm::DataExtractor::Cursor c(0);
400-
std::optional<ConstString> symbols_url = GetWasmString(data, c);
525+
std::optional<std::string> symbols_url = GetWasmString(data, c);
401526
if (symbols_url)
402-
return FileSpec(symbols_url->GetStringRef());
527+
return FileSpec(*symbols_url);
403528
}
404529
}
405530
return std::nullopt;

lldb/source/Symbol/ObjectFile.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ AddressClass ObjectFile::GetAddressClass(addr_t file_addr) {
379379
case eSectionTypeELFDynamicSymbols:
380380
case eSectionTypeELFRelocationEntries:
381381
case eSectionTypeELFDynamicLinkInfo:
382+
case eSectionTypeWasmName:
382383
case eSectionTypeOther:
383384
return AddressClass::eUnknown;
384385
case eSectionTypeAbsoluteAddress:

0 commit comments

Comments
 (0)