22
22
#include " llvm/ADT/StringRef.h"
23
23
#include " llvm/BinaryFormat/Magic.h"
24
24
#include " llvm/BinaryFormat/Wasm.h"
25
+ #include " llvm/Support/CheckedArithmetic.h"
25
26
#include " llvm/Support/Endian.h"
26
27
#include " llvm/Support/Format.h"
27
28
#include < optional>
@@ -50,7 +51,8 @@ static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
50
51
return version == llvm::wasm::WasmVersion;
51
52
}
52
53
53
- static std::optional<ConstString>
54
+ // FIXME: Use lldb::DataExtractor instead of llvm::DataExtractor.
55
+ static std::optional<std::string>
54
56
GetWasmString (llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
55
57
// A Wasm string is encoded as a vector of UTF-8 codes.
56
58
// Vectors are encoded with their u32 length followed by the element
@@ -72,8 +74,7 @@ GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
72
74
return std::nullopt;
73
75
}
74
76
75
- llvm::StringRef str = toStringRef (llvm::ArrayRef (str_storage));
76
- return ConstString (str);
77
+ return std::string (toStringRef (llvm::ArrayRef (str_storage)));
77
78
}
78
79
79
80
char ObjectFileWasm::ID;
@@ -182,7 +183,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
182
183
// identifying the custom section, followed by an uninterpreted sequence
183
184
// of bytes.
184
185
lldb::offset_t prev_offset = c.tell ();
185
- std::optional<ConstString > sect_name = GetWasmString (data, c);
186
+ std::optional<std::string > sect_name = GetWasmString (data, c);
186
187
if (!sect_name)
187
188
return false ;
188
189
@@ -191,7 +192,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
191
192
192
193
uint32_t section_length = payload_len - (c.tell () - prev_offset);
193
194
m_sect_infos.push_back (section_info{*offset_ptr + c.tell (), section_length,
194
- section_id, *sect_name});
195
+ section_id, ConstString ( *sect_name) });
195
196
*offset_ptr += (c.tell () + section_length);
196
197
} else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {
197
198
m_sect_infos.push_back (section_info{*offset_ptr + c.tell (),
@@ -248,12 +249,136 @@ bool ObjectFileWasm::ParseHeader() {
248
249
return true ;
249
250
}
250
251
251
- void ObjectFileWasm::ParseSymtab (Symtab &symtab) {}
252
+ static llvm::Expected<std::vector<AddressRange>>
253
+ ParseFunctions (SectionSP code_section_sp) {
254
+ DataExtractor code_section_data;
255
+ code_section_sp->GetSectionData (code_section_data);
256
+ lldb::offset_t offset = 0 ;
257
+
258
+ const uint64_t function_count = code_section_data.GetULEB128 (&offset);
259
+ if (function_count >= std::numeric_limits<uint32_t >::max ())
260
+ return llvm::createStringError (" function count overflows uint32_t" );
261
+
262
+ std::vector<AddressRange> functions;
263
+ functions.reserve (function_count);
264
+
265
+ for (uint32_t i = 0 ; i < function_count; ++i) {
266
+ const uint64_t function_size = code_section_data.GetULEB128 (&offset);
267
+ if (function_size >= std::numeric_limits<uint32_t >::max ())
268
+ return llvm::createStringError (" function size overflows uint32_t" );
269
+ // llvm-objdump considers the ULEB with the function size to be part of the
270
+ // function. We can't do that here because that would break symbolic
271
+ // breakpoints, as that address is never executed.
272
+ functions.emplace_back (code_section_sp, offset, function_size);
273
+
274
+ std::optional<lldb::offset_t > next_offset =
275
+ llvm::checkedAddUnsigned (offset, function_size);
276
+ if (!next_offset)
277
+ return llvm::createStringError (" function offset overflows uint64_t" );
278
+ offset = *next_offset;
279
+ }
280
+
281
+ return functions;
282
+ }
283
+
284
+ static llvm::Expected<std::vector<Symbol>>
285
+ ParseNames (SectionSP name_section_sp,
286
+ const std::vector<AddressRange> &functions) {
287
+ DataExtractor name_section_data;
288
+ name_section_sp->GetSectionData (name_section_data);
289
+
290
+ llvm::DataExtractor data = name_section_data.GetAsLLVM ();
291
+ llvm::DataExtractor::Cursor c (0 );
292
+ std::vector<Symbol> symbols;
293
+ while (c && c.tell () < data.size ()) {
294
+ const uint8_t type = data.getU8 (c);
295
+ const uint64_t size = data.getULEB128 (c);
296
+ if (size >= std::numeric_limits<uint32_t >::max ())
297
+ return llvm::createStringError (" size overflows uint32_t" );
298
+
299
+ switch (type) {
300
+ case llvm::wasm::WASM_NAMES_FUNCTION: {
301
+ const uint64_t count = data.getULEB128 (c);
302
+ if (count >= std::numeric_limits<uint32_t >::max ())
303
+ return llvm::createStringError (" function count overflows uint32_t" );
304
+
305
+ for (uint64_t i = 0 ; c && i < count; ++i) {
306
+ const uint64_t idx = data.getULEB128 (c);
307
+ const std::optional<std::string> name = GetWasmString (data, c);
308
+ if (!name || idx >= functions.size ())
309
+ continue ;
310
+ symbols.emplace_back (
311
+ symbols.size (), Mangled (*name), lldb::eSymbolTypeCode,
312
+ /* external=*/ false , /* is_debug=*/ false , /* is_trampoline=*/ false ,
313
+ /* is_artificial=*/ false , functions[idx],
314
+ /* size_is_valid=*/ true , /* contains_linker_annotations=*/ false ,
315
+ /* flags=*/ 0 );
316
+ }
317
+ } break ;
318
+ case llvm::wasm::WASM_NAMES_DATA_SEGMENT:
319
+ case llvm::wasm::WASM_NAMES_GLOBAL:
320
+ case llvm::wasm::WASM_NAMES_LOCAL:
321
+ default :
322
+ std::optional<uint64_t > offset = llvm::checkedAddUnsigned (c.tell (), size);
323
+ if (!offset)
324
+ return llvm::createStringError (" offset overflows uint64_t" );
325
+ c.seek (*offset);
326
+ }
327
+ }
328
+
329
+ if (!c)
330
+ return c.takeError ();
331
+
332
+ return symbols;
333
+ }
334
+
335
+ void ObjectFileWasm::ParseSymtab (Symtab &symtab) {
336
+ assert (m_sections_up && " sections must be parsed" );
337
+ Log *log = GetLog (LLDBLog::Object);
338
+
339
+ // The name section contains names and indexes. First parse the functions from
340
+ // the code section so we can access them by their index.
341
+ SectionSP code_section_sp =
342
+ m_sections_up->FindSectionByType (lldb::eSectionTypeCode, false );
343
+ if (!code_section_sp) {
344
+ LLDB_LOG (log, " Failed to parse Wasm symbol table: no functions section" );
345
+ return ;
346
+ }
347
+
348
+ llvm::Expected<std::vector<AddressRange>> functions =
349
+ ParseFunctions (code_section_sp);
350
+ if (!functions) {
351
+ LLDB_LOG_ERROR (log, functions.takeError (),
352
+ " Failed to parse Wasm functions: {0}" );
353
+ return ;
354
+ }
355
+
356
+ // Parse the name section.
357
+ SectionSP name_section_sp =
358
+ m_sections_up->FindSectionByType (lldb::eSectionTypeWasmName, false );
359
+ if (!name_section_sp) {
360
+ LLDB_LOG (log, " Failed to parse Wasm symbol table: no names section" );
361
+ return ;
362
+ }
363
+
364
+ llvm::Expected<std::vector<Symbol>> symbols =
365
+ ParseNames (name_section_sp, *functions);
366
+ if (!symbols) {
367
+ LLDB_LOG_ERROR (log, symbols.takeError (), " Failed to parse Wasm names: {0}" );
368
+ return ;
369
+ }
370
+
371
+ for (const Symbol &symbol : *symbols)
372
+ symtab.AddSymbol (symbol);
373
+
374
+ symtab.Finalize ();
375
+ }
252
376
253
377
static SectionType GetSectionTypeFromName (llvm::StringRef Name) {
254
- if (Name.consume_front (" .debug_" ) || Name.consume_front (" .zdebug_" )) {
378
+ if (Name == " name" )
379
+ return lldb::eSectionTypeWasmName;
380
+ if (Name.consume_front (" .debug_" ) || Name.consume_front (" .zdebug_" ))
255
381
return ObjectFile::GetDWARFSectionTypeFromName (Name);
256
- }
257
382
return eSectionTypeOther;
258
383
}
259
384
@@ -397,9 +522,9 @@ std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
397
522
ReadImageData (sect_info.offset , kBufferSize );
398
523
llvm::DataExtractor data = section_header_data.GetAsLLVM ();
399
524
llvm::DataExtractor::Cursor c (0 );
400
- std::optional<ConstString > symbols_url = GetWasmString (data, c);
525
+ std::optional<std::string > symbols_url = GetWasmString (data, c);
401
526
if (symbols_url)
402
- return FileSpec (symbols_url-> GetStringRef () );
527
+ return FileSpec (* symbols_url);
403
528
}
404
529
}
405
530
return std::nullopt;
0 commit comments