2222#include " llvm/ADT/StringRef.h"
2323#include " llvm/BinaryFormat/Magic.h"
2424#include " llvm/BinaryFormat/Wasm.h"
25+ #include " llvm/Support/CheckedArithmetic.h"
2526#include " llvm/Support/Endian.h"
2627#include " llvm/Support/Format.h"
2728#include < optional>
@@ -50,7 +51,8 @@ static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
5051 return version == llvm::wasm::WasmVersion;
5152}
5253
53- static std::optional<ConstString>
54+ // FIXME: Use lldb::DataExtractor instead of llvm::DataExtractor.
55+ static std::optional<std::string>
5456GetWasmString (llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
5557 // A Wasm string is encoded as a vector of UTF-8 codes.
5658 // Vectors are encoded with their u32 length followed by the element
@@ -72,8 +74,7 @@ GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
7274 return std::nullopt ;
7375 }
7476
75- llvm::StringRef str = toStringRef (llvm::ArrayRef (str_storage));
76- return ConstString (str);
77+ return std::string (toStringRef (llvm::ArrayRef (str_storage)));
7778}
7879
7980char ObjectFileWasm::ID;
@@ -182,7 +183,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
182183 // identifying the custom section, followed by an uninterpreted sequence
183184 // of bytes.
184185 lldb::offset_t prev_offset = c.tell ();
185- std::optional<ConstString > sect_name = GetWasmString (data, c);
186+ std::optional<std::string > sect_name = GetWasmString (data, c);
186187 if (!sect_name)
187188 return false ;
188189
@@ -191,7 +192,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
191192
192193 uint32_t section_length = payload_len - (c.tell () - prev_offset);
193194 m_sect_infos.push_back (section_info{*offset_ptr + c.tell (), section_length,
194- section_id, *sect_name});
195+ section_id, ConstString ( *sect_name) });
195196 *offset_ptr += (c.tell () + section_length);
196197 } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {
197198 m_sect_infos.push_back (section_info{*offset_ptr + c.tell (),
@@ -248,12 +249,136 @@ bool ObjectFileWasm::ParseHeader() {
248249 return true ;
249250}
250251
251- void ObjectFileWasm::ParseSymtab (Symtab &symtab) {}
252+ static llvm::Expected<std::vector<AddressRange>>
253+ ParseFunctions (SectionSP code_section_sp) {
254+ DataExtractor code_section_data;
255+ code_section_sp->GetSectionData (code_section_data);
256+ lldb::offset_t offset = 0 ;
257+
258+ const uint64_t function_count = code_section_data.GetULEB128 (&offset);
259+ if (function_count >= std::numeric_limits<uint32_t >::max ())
260+ return llvm::createStringError (" function count overflows uint32_t" );
261+
262+ std::vector<AddressRange> functions;
263+ functions.reserve (function_count);
264+
265+ for (uint32_t i = 0 ; i < function_count; ++i) {
266+ const uint64_t function_size = code_section_data.GetULEB128 (&offset);
267+ if (function_size >= std::numeric_limits<uint32_t >::max ())
268+ return llvm::createStringError (" function size overflows uint32_t" );
269+ // llvm-objdump considers the ULEB with the function size to be part of the
270+ // function. We can't do that here because that would break symbolic
271+ // breakpoints, as that address is never executed.
272+ functions.emplace_back (code_section_sp, offset, function_size);
273+
274+ std::optional<lldb::offset_t > next_offset =
275+ llvm::checkedAddUnsigned (offset, function_size);
276+ if (!next_offset)
277+ return llvm::createStringError (" function offset overflows uint64_t" );
278+ offset = *next_offset;
279+ }
280+
281+ return functions;
282+ }
283+
284+ static llvm::Expected<std::vector<Symbol>>
285+ ParseNames (SectionSP name_section_sp,
286+ const std::vector<AddressRange> &functions) {
287+ DataExtractor name_section_data;
288+ name_section_sp->GetSectionData (name_section_data);
289+
290+ llvm::DataExtractor data = name_section_data.GetAsLLVM ();
291+ llvm::DataExtractor::Cursor c (0 );
292+ std::vector<Symbol> symbols;
293+ while (c && c.tell () < data.size ()) {
294+ const uint8_t type = data.getU8 (c);
295+ const uint64_t size = data.getULEB128 (c);
296+ if (size >= std::numeric_limits<uint32_t >::max ())
297+ return llvm::createStringError (" size overflows uint32_t" );
298+
299+ switch (type) {
300+ case llvm::wasm::WASM_NAMES_FUNCTION: {
301+ const uint64_t count = data.getULEB128 (c);
302+ if (count >= std::numeric_limits<uint32_t >::max ())
303+ return llvm::createStringError (" function count overflows uint32_t" );
304+
305+ for (uint64_t i = 0 ; c && i < count; ++i) {
306+ const uint64_t idx = data.getULEB128 (c);
307+ const std::optional<std::string> name = GetWasmString (data, c);
308+ if (!name || idx >= functions.size ())
309+ continue ;
310+ symbols.emplace_back (
311+ symbols.size (), Mangled (*name), lldb::eSymbolTypeCode,
312+ /* external=*/ false , /* is_debug=*/ false , /* is_trampoline=*/ false ,
313+ /* is_artificial=*/ false , functions[idx],
314+ /* size_is_valid=*/ true , /* contains_linker_annotations=*/ false ,
315+ /* flags=*/ 0 );
316+ }
317+ } break ;
318+ case llvm::wasm::WASM_NAMES_DATA_SEGMENT:
319+ case llvm::wasm::WASM_NAMES_GLOBAL:
320+ case llvm::wasm::WASM_NAMES_LOCAL:
321+ default :
322+ std::optional<uint64_t > offset = llvm::checkedAddUnsigned (c.tell (), size);
323+ if (!offset)
324+ return llvm::createStringError (" offset overflows uint64_t" );
325+ c.seek (*offset);
326+ }
327+ }
328+
329+ if (!c)
330+ return c.takeError ();
331+
332+ return symbols;
333+ }
334+
335+ void ObjectFileWasm::ParseSymtab (Symtab &symtab) {
336+ assert (m_sections_up && " sections must be parsed" );
337+ Log *log = GetLog (LLDBLog::Object);
338+
339+ // The name section contains names and indexes. First parse the functions from
340+ // the code section so we can access them by their index.
341+ SectionSP code_section_sp =
342+ m_sections_up->FindSectionByType (lldb::eSectionTypeCode, false );
343+ if (!code_section_sp) {
344+ LLDB_LOG (log, " Failed to parse Wasm symbol table: no functions section" );
345+ return ;
346+ }
347+
348+ llvm::Expected<std::vector<AddressRange>> functions =
349+ ParseFunctions (code_section_sp);
350+ if (!functions) {
351+ LLDB_LOG_ERROR (log, functions.takeError (),
352+ " Failed to parse Wasm functions: {0}" );
353+ return ;
354+ }
355+
356+ // Parse the name section.
357+ SectionSP name_section_sp =
358+ m_sections_up->FindSectionByType (lldb::eSectionTypeWasmName, false );
359+ if (!name_section_sp) {
360+ LLDB_LOG (log, " Failed to parse Wasm symbol table: no names section" );
361+ return ;
362+ }
363+
364+ llvm::Expected<std::vector<Symbol>> symbols =
365+ ParseNames (name_section_sp, *functions);
366+ if (!symbols) {
367+ LLDB_LOG_ERROR (log, symbols.takeError (), " Failed to parse Wasm names: {0}" );
368+ return ;
369+ }
370+
371+ for (const Symbol &symbol : *symbols)
372+ symtab.AddSymbol (symbol);
373+
374+ symtab.Finalize ();
375+ }
252376
253377static SectionType GetSectionTypeFromName (llvm::StringRef Name) {
254- if (Name.consume_front (" .debug_" ) || Name.consume_front (" .zdebug_" )) {
378+ if (Name == " name" )
379+ return lldb::eSectionTypeWasmName;
380+ if (Name.consume_front (" .debug_" ) || Name.consume_front (" .zdebug_" ))
255381 return ObjectFile::GetDWARFSectionTypeFromName (Name);
256- }
257382 return eSectionTypeOther;
258383}
259384
@@ -397,9 +522,9 @@ std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
397522 ReadImageData (sect_info.offset , kBufferSize );
398523 llvm::DataExtractor data = section_header_data.GetAsLLVM ();
399524 llvm::DataExtractor::Cursor c (0 );
400- std::optional<ConstString > symbols_url = GetWasmString (data, c);
525+ std::optional<std::string > symbols_url = GetWasmString (data, c);
401526 if (symbols_url)
402- return FileSpec (symbols_url-> GetStringRef () );
527+ return FileSpec (* symbols_url);
403528 }
404529 }
405530 return std::nullopt ;
0 commit comments