Skip to content

Commit d0e40ff

Browse files
authored
[lldb] Support parsing data symbols from the Wasm name section (#153494)
This PR adds support for parsing the data symbols from the WebAssembly name section, which consists of a name and address range for the segments in the Wasm data section. Unlike other object file formats, Wasm has no symbols for referencing items within those segments (i.e. symbols the user has defined).
1 parent 0ff92fe commit d0e40ff

File tree

3 files changed

+161
-45
lines changed

3 files changed

+161
-45
lines changed

lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp

Lines changed: 92 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -251,19 +251,19 @@ bool ObjectFileWasm::ParseHeader() {
251251

252252
static llvm::Expected<std::vector<AddressRange>>
253253
ParseFunctions(SectionSP code_section_sp) {
254-
DataExtractor code_section_data;
255-
code_section_sp->GetSectionData(code_section_data);
254+
DataExtractor data;
255+
code_section_sp->GetSectionData(data);
256256
lldb::offset_t offset = 0;
257257

258-
const uint64_t function_count = code_section_data.GetULEB128(&offset);
258+
const uint64_t function_count = data.GetULEB128(&offset);
259259
if (function_count > std::numeric_limits<uint32_t>::max())
260260
return llvm::createStringError("function count overflows uint32_t");
261261

262262
std::vector<AddressRange> functions;
263263
functions.reserve(function_count);
264264

265265
for (uint32_t i = 0; i < function_count; ++i) {
266-
const uint64_t function_size = code_section_data.GetULEB128(&offset);
266+
const uint64_t function_size = data.GetULEB128(&offset);
267267
if (function_size > std::numeric_limits<uint32_t>::max())
268268
return llvm::createStringError("function size overflows uint32_t");
269269
// llvm-objdump considers the ULEB with the function size to be part of the
@@ -281,9 +281,45 @@ ParseFunctions(SectionSP code_section_sp) {
281281
return functions;
282282
}
283283

284+
static llvm::Expected<std::vector<AddressRange>>
285+
ParseData(SectionSP data_section_sp) {
286+
DataExtractor data;
287+
data_section_sp->GetSectionData(data);
288+
289+
lldb::offset_t offset = 0;
290+
291+
const uint64_t segment_count = data.GetULEB128(&offset);
292+
if (segment_count > std::numeric_limits<uint32_t>::max())
293+
return llvm::createStringError("segment count overflows uint32_t");
294+
295+
std::vector<AddressRange> segments;
296+
segments.reserve(segment_count);
297+
298+
for (uint32_t i = 0; i < segment_count; ++i) {
299+
const uint64_t flags = data.GetULEB128(&offset);
300+
if (flags > std::numeric_limits<uint32_t>::max())
301+
return llvm::createStringError("segment flags overflows uint32_t");
302+
303+
const uint64_t segment_size = data.GetULEB128(&offset);
304+
if (flags > std::numeric_limits<uint32_t>::max())
305+
return llvm::createStringError("segment size overflows uint32_t");
306+
307+
segments.emplace_back(data_section_sp, offset, segment_size);
308+
309+
std::optional<lldb::offset_t> next_offset =
310+
llvm::checkedAddUnsigned(offset, segment_size);
311+
if (!next_offset)
312+
return llvm::createStringError("segment offset overflows uint64_t");
313+
offset = *next_offset;
314+
}
315+
316+
return segments;
317+
}
318+
284319
static llvm::Expected<std::vector<Symbol>>
285320
ParseNames(SectionSP name_section_sp,
286-
const std::vector<AddressRange> &functions) {
321+
const std::vector<AddressRange> &function_ranges,
322+
const std::vector<AddressRange> &segment_ranges) {
287323
DataExtractor name_section_data;
288324
name_section_sp->GetSectionData(name_section_data);
289325

@@ -305,17 +341,34 @@ ParseNames(SectionSP name_section_sp,
305341
for (uint64_t i = 0; c && i < count; ++i) {
306342
const uint64_t idx = data.getULEB128(c);
307343
const std::optional<std::string> name = GetWasmString(data, c);
308-
if (!name || idx >= functions.size())
344+
if (!name || idx >= function_ranges.size())
309345
continue;
310346
symbols.emplace_back(
311347
symbols.size(), Mangled(*name), lldb::eSymbolTypeCode,
312348
/*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
313-
/*is_artificial=*/false, functions[idx],
349+
/*is_artificial=*/false, function_ranges[idx],
314350
/*size_is_valid=*/true, /*contains_linker_annotations=*/false,
315351
/*flags=*/0);
316352
}
317353
} break;
318-
case llvm::wasm::WASM_NAMES_DATA_SEGMENT:
354+
case llvm::wasm::WASM_NAMES_DATA_SEGMENT: {
355+
const uint64_t count = data.getULEB128(c);
356+
if (count > std::numeric_limits<uint32_t>::max())
357+
return llvm::createStringError("data count overflows uint32_t");
358+
for (uint64_t i = 0; c && i < count; ++i) {
359+
const uint64_t idx = data.getULEB128(c);
360+
const std::optional<std::string> name = GetWasmString(data, c);
361+
if (!name || idx >= segment_ranges.size())
362+
continue;
363+
symbols.emplace_back(
364+
symbols.size(), Mangled(*name), lldb::eSymbolTypeData,
365+
/*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
366+
/*is_artificial=*/false, segment_ranges[idx],
367+
/*size_is_valid=*/true, /*contains_linker_annotations=*/false,
368+
/*flags=*/0);
369+
}
370+
371+
} break;
319372
case llvm::wasm::WASM_NAMES_GLOBAL:
320373
case llvm::wasm::WASM_NAMES_LOCAL:
321374
default:
@@ -336,21 +389,35 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
336389
assert(m_sections_up && "sections must be parsed");
337390
Log *log = GetLog(LLDBLog::Object);
338391

339-
// The name section contains names and indexes. First parse the functions from
340-
// the code section so we can access them by their index.
341-
SectionSP code_section_sp =
342-
m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false);
343-
if (!code_section_sp) {
344-
LLDB_LOG(log, "Failed to parse Wasm symbol table: no functions section");
345-
return;
392+
// The name section contains names and indexes. First parse the data from the
393+
// relevant sections so we can access it by its index.
394+
std::vector<AddressRange> function_ranges;
395+
std::vector<AddressRange> segment_ranges;
396+
397+
// Parse the code section.
398+
if (SectionSP code_section_sp =
399+
m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false)) {
400+
llvm::Expected<std::vector<AddressRange>> functions =
401+
ParseFunctions(code_section_sp);
402+
if (!functions) {
403+
LLDB_LOG_ERROR(log, functions.takeError(),
404+
"Failed to parse Wasm code section: {0}");
405+
return;
406+
}
407+
function_ranges = *functions;
346408
}
347409

348-
llvm::Expected<std::vector<AddressRange>> functions =
349-
ParseFunctions(code_section_sp);
350-
if (!functions) {
351-
LLDB_LOG_ERROR(log, functions.takeError(),
352-
"Failed to parse Wasm functions: {0}");
353-
return;
410+
// Parse the data section.
411+
if (SectionSP data_section_sp =
412+
m_sections_up->FindSectionByType(lldb::eSectionTypeData, false)) {
413+
llvm::Expected<std::vector<AddressRange>> segments =
414+
ParseData(data_section_sp);
415+
if (!segments) {
416+
LLDB_LOG_ERROR(log, segments.takeError(),
417+
"Failed to parse Wasm data section: {0}");
418+
return;
419+
}
420+
segment_ranges = *segments;
354421
}
355422

356423
// Parse the name section.
@@ -362,7 +429,7 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
362429
}
363430

364431
llvm::Expected<std::vector<Symbol>> symbols =
365-
ParseNames(name_section_sp, *functions);
432+
ParseNames(name_section_sp, function_ranges, segment_ranges);
366433
if (!symbols) {
367434
LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: {0}");
368435
return;
@@ -408,6 +475,9 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
408475
// For this reason Section::GetFileAddress() must return zero for the
409476
// Code section.
410477
vm_addr = 0;
478+
} else if (llvm::wasm::WASM_SEC_DATA == sect_info.id) {
479+
section_type = eSectionTypeData;
480+
section_name = ConstString("data");
411481
} else {
412482
section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
413483
if (section_type == eSectionTypeOther)

lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml

Lines changed: 63 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
# clang -target wasm32 -nostdlib -Wl,--no-entry -Wl,--export-all -O0 -g -o simple.wasm simple.c
2+
# char* str = "data str";
3+
#
4+
# int add(int a, int b) {
5+
# return a + b;
6+
# }
7+
#
8+
# int main() {
9+
# int i = 1;
10+
# int j = 2;
11+
# return add(i, j);
12+
# }
113
--- !WASM
214
FileHeader:
315
Version: 0x1
@@ -37,13 +49,13 @@ Sections:
3749
Mutable: true
3850
InitExpr:
3951
Opcode: I32_CONST
40-
Value: 66560
52+
Value: 66576
4153
- Index: 1
4254
Type: I32
4355
Mutable: false
4456
InitExpr:
4557
Opcode: I32_CONST
46-
Value: 1024
58+
Value: 1036
4759
- Index: 2
4860
Type: I32
4961
Mutable: false
@@ -55,44 +67,50 @@ Sections:
5567
Mutable: false
5668
InitExpr:
5769
Opcode: I32_CONST
58-
Value: 1024
70+
Value: 1040
5971
- Index: 4
6072
Type: I32
6173
Mutable: false
6274
InitExpr:
6375
Opcode: I32_CONST
64-
Value: 66560
76+
Value: 1040
6577
- Index: 5
6678
Type: I32
6779
Mutable: false
6880
InitExpr:
6981
Opcode: I32_CONST
70-
Value: 1024
82+
Value: 66576
7183
- Index: 6
7284
Type: I32
7385
Mutable: false
7486
InitExpr:
7587
Opcode: I32_CONST
76-
Value: 66560
88+
Value: 1024
7789
- Index: 7
7890
Type: I32
7991
Mutable: false
8092
InitExpr:
8193
Opcode: I32_CONST
82-
Value: 131072
94+
Value: 66576
8395
- Index: 8
8496
Type: I32
8597
Mutable: false
8698
InitExpr:
8799
Opcode: I32_CONST
88-
Value: 0
100+
Value: 131072
89101
- Index: 9
90102
Type: I32
91103
Mutable: false
92104
InitExpr:
93105
Opcode: I32_CONST
94-
Value: 1
106+
Value: 0
95107
- Index: 10
108+
Type: I32
109+
Mutable: false
110+
InitExpr:
111+
Opcode: I32_CONST
112+
Value: 1
113+
- Index: 11
96114
Type: I32
97115
Mutable: false
98116
InitExpr:
@@ -115,6 +133,9 @@ Sections:
115133
- Name: main
116134
Kind: FUNCTION
117135
Index: 3
136+
- Name: str
137+
Kind: GLOBAL
138+
Index: 1
118139
- Name: __main_void
119140
Kind: FUNCTION
120141
Index: 2
@@ -123,34 +144,34 @@ Sections:
123144
Index: 0
124145
- Name: __dso_handle
125146
Kind: GLOBAL
126-
Index: 1
147+
Index: 2
127148
- Name: __data_end
128149
Kind: GLOBAL
129-
Index: 2
150+
Index: 3
130151
- Name: __stack_low
131152
Kind: GLOBAL
132-
Index: 3
153+
Index: 4
133154
- Name: __stack_high
134155
Kind: GLOBAL
135-
Index: 4
156+
Index: 5
136157
- Name: __global_base
137158
Kind: GLOBAL
138-
Index: 5
159+
Index: 6
139160
- Name: __heap_base
140161
Kind: GLOBAL
141-
Index: 6
162+
Index: 7
142163
- Name: __heap_end
143164
Kind: GLOBAL
144-
Index: 7
165+
Index: 8
145166
- Name: __memory_base
146167
Kind: GLOBAL
147-
Index: 8
168+
Index: 9
148169
- Name: __table_base
149170
Kind: GLOBAL
150-
Index: 9
171+
Index: 10
151172
- Name: __wasm_first_page_end
152173
Kind: GLOBAL
153-
Index: 10
174+
Index: 11
154175
- Type: CODE
155176
Functions:
156177
- Index: 0
@@ -169,6 +190,20 @@ Sections:
169190
- Index: 3
170191
Locals: []
171192
Body: 1082808080000F0B
193+
- Type: DATA
194+
Segments:
195+
- SectionOffset: 7
196+
InitFlags: 0
197+
Offset:
198+
Opcode: I32_CONST
199+
Value: 1024
200+
Content: '646174612073747200'
201+
- SectionOffset: 22
202+
InitFlags: 0
203+
Offset:
204+
Opcode: I32_CONST
205+
Value: 1036
206+
Content: '00040000'
172207
- Type: CUSTOM
173208
Name: name
174209
FunctionNames:
@@ -183,8 +218,17 @@ Sections:
183218
GlobalNames:
184219
- Index: 0
185220
Name: __stack_pointer
221+
DataSegmentNames:
222+
- Index: 0
223+
Name: .rodata
224+
- Index: 1
225+
Name: .data
186226
- Type: CUSTOM
227+
HeaderSecSizeEncodingLen: 2
187228
Name: producers
229+
Languages:
230+
- Name: C11
231+
Version: ''
188232
Tools:
189233
- Name: clang
190234
Version: '22.0.0git'
Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
# RUN: yaml2obj %S/Inputs/simple.wasm.yaml -o %t.wasm
22
# RUN: %lldb %t.wasm -o 'image dump symtab'
33

4-
# CHECK: Code 0x0000000000000002 {{.*}} __wasm_call_ctors
5-
# CHECK: Code 0x0000000000000005 {{.*}} add
6-
# CHECK: Code 0x000000000000002f {{.*}} __original_main
7-
# CHECK: Code 0x000000000000007c {{.*}} main
4+
# CHECK: Code 0x0000000000000002 0x0000000000000002 {{.*}} __wasm_call_ctors
5+
# CHECK: Code 0x0000000000000005 0x0000000000000029 {{.*}} add
6+
# CHECK: Code 0x000000000000002f 0x000000000000004c {{.*}} __original_main
7+
# CHECK: Code 0x000000000000007c 0x0000000000000009 {{.*}} main
8+
# CHECK: Data 0x000000000000022f 0x0000000000000041 {{.*}} .rodata
9+
# CHECK: Data 0x0000000000000270 0x0000000000000000 {{.*}} .data

0 commit comments

Comments
 (0)