Skip to content

Commit cab5be0

Browse files
authored
Test source map support for emsymbolizer (#17568)
This adds source map tests for emsymbolizer. This also does some refactoring in the script. - Created `LocationInfo` class, which is used to treat location info in the same way across information sources (now DWARF and source map). Currently its `print` method prints location info in the same format as `llvm-dwarfdump` does. - Removed `name` field reference from the script. It's not currently used anywhere, unclear what that means, and our wasm-sourcemap.py doesn't put anything in that field either. - Renamed `WasmSourceMap.mapping` to `WasmSourceMap.mappings` - Removed the warning message saying the command line and the output format are not finalized yet. What is? :) - Added source map tests to `test_emsymbolizer`. Addresses are changed because now we build the binary with both DWARF and the source map enabled. In case of DWARF, we additionally check inlined info and function names.
1 parent 2acf4bd commit cab5be0

File tree

2 files changed

+79
-44
lines changed

2 files changed

+79
-44
lines changed

emsymbolizer.py

Lines changed: 46 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@
1515
from collections import namedtuple
1616
import json
1717
import os
18+
import re
19+
import subprocess
1820
import sys
1921
from tools import shared
2022
from tools import webassembly
21-
from tools.shared import check_call
2223

2324
LLVM_SYMBOLIZER = os.path.expanduser(
2425
shared.build_llvm_tool_path(shared.exe_suffix('llvm-symbolizer')))
@@ -28,6 +29,20 @@ class Error(BaseException):
2829
pass
2930

3031

32+
# Class to treat location info in a uniform way across information sources.
33+
class LocationInfo(object):
34+
def __init__(self, source=None, line=0, column=0, func=None):
35+
self.source = source
36+
self.line = line
37+
self.column = column
38+
self.func = func
39+
40+
def print(self):
41+
source = self.source if self.source else '??'
42+
func = self.func if self.func else '??'
43+
print(f'{func}\n{source}:{self.line}:{self.column}')
44+
45+
3146
def get_codesec_offset(module):
3247
sec = module.get_section(webassembly.SecType.CODE)
3348
if not sec:
@@ -46,7 +61,24 @@ def symbolize_address_dwarf(module, address):
4661
vma_adjust = get_codesec_offset(module)
4762
cmd = [LLVM_SYMBOLIZER, '-e', module.filename, f'--adjust-vma={vma_adjust}',
4863
str(address)]
49-
check_call(cmd)
64+
out = shared.run_process(cmd, stdout=subprocess.PIPE).stdout.strip()
65+
out_lines = out.splitlines()
66+
# Source location regex, e.g., /abc/def.c:3:5
67+
SOURCE_LOC_RE = re.compile(r'(.+):(\d+):(\d+)$')
68+
# llvm-dwarfdump prints two lines per location. The first line contains a
69+
# function name, and the second contains a source location like
70+
# '/abc/def.c:3:5'. If the function or source info is not available, it will
71+
# be printed as '??', in which case we store None. If the line and column info
72+
# is not available, they will be printed as 0, which we store as is.
73+
for i in range(0, len(out_lines), 2):
74+
func, loc_str = out_lines[i], out_lines[i + 1]
75+
m = SOURCE_LOC_RE.match(loc_str)
76+
source, line, column = m.group(1), m.group(2), m.group(3)
77+
if func == '??':
78+
func = None
79+
if source == '??':
80+
source = None
81+
LocationInfo(source, line, column, func).print()
5082

5183

5284
def get_sourceMappingURL_section(module):
@@ -58,14 +90,12 @@ def get_sourceMappingURL_section(module):
5890

5991
class WasmSourceMap(object):
6092
# This implementation is derived from emscripten's sourcemap-support.js
61-
Location = namedtuple('Location',
62-
['source', 'line', 'column', 'name'])
93+
Location = namedtuple('Location', ['source', 'line', 'column'])
6394

6495
def __init__(self):
6596
self.version = None
6697
self.sources = []
67-
self.names = []
68-
self.mapping = {}
98+
self.mappings = {}
6999
self.offsets = []
70100

71101
def parse(self, filename):
@@ -76,7 +106,6 @@ def parse(self, filename):
76106

77107
self.version = source_map_json['version']
78108
self.sources = source_map_json['sources']
79-
self.names = source_map_json['names']
80109

81110
vlq_map = {}
82111
chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='
@@ -106,7 +135,6 @@ def decodeVLQ(string):
106135
src = 0
107136
line = 1
108137
col = 1
109-
name = 0
110138
for segment in source_map_json['mappings'].split(','):
111139
data = decodeVLQ(segment)
112140
info = []
@@ -121,11 +149,9 @@ def decodeVLQ(string):
121149
if len(data) >= 4:
122150
col += data[3]
123151
info.append(col)
124-
if len(data) >= 5:
125-
name += data[4]
126-
info.append(name)
152+
# TODO: see if we need the name, which is the next field (data[4])
127153

128-
self.mapping[offset] = WasmSourceMap.Location(*info)
154+
self.mappings[offset] = WasmSourceMap.Location(*info)
129155
self.offsets.append(offset)
130156
self.offsets.sort()
131157

@@ -144,18 +170,13 @@ def find_offset(self, offset):
144170

145171
def lookup(self, offset):
146172
nearest = self.find_offset(offset)
147-
assert nearest in self.mapping, 'Sourcemap has an offset with no mapping'
148-
info = self.mapping[nearest]
149-
150-
# TODO: it's kind of icky to use Location for both the internal indexed
151-
# location and external string version. Once we have more uniform output
152-
# format and API for the various backends (e.g SM vs DWARF vs others), this
153-
# could be improved.
154-
return WasmSourceMap.Location(
173+
assert nearest in self.mappings, 'Sourcemap has an offset with no mapping'
174+
info = self.mappings[nearest]
175+
return LocationInfo(
155176
self.sources[info.source] if info.source is not None else None,
156177
info.line,
157-
info.column,
158-
self.names[info.name] if info.name is not None else None)
178+
info.column
179+
)
159180

160181

161182
def symbolize_address_sourcemap(module, address, force_file):
@@ -175,11 +196,11 @@ def symbolize_address_sourcemap(module, address, force_file):
175196
sm.parse(URL)
176197
if shared.DEBUG:
177198
csoff = get_codesec_offset(module)
178-
print(sm.mapping)
199+
print(sm.mappings)
179200
# Print with section offsets to easily compare against dwarf
180-
for k, v in sm.mapping.items():
201+
for k, v in sm.mappings.items():
181202
print(f'{k-csoff:x}: {v}')
182-
print(sm.lookup(address))
203+
sm.lookup(address).print()
183204

184205

185206
def main(args):
@@ -228,8 +249,6 @@ def get_args():
228249

229250

230251
if __name__ == '__main__':
231-
print('Warning: the command-line and output format of this tool are not '
232-
'finalized yet', file=sys.stderr)
233252
try:
234253
rv = main(get_args())
235254
except (Error, webassembly.InvalidWasmError, OSError) as e:

test/test_other.py

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8631,29 +8631,45 @@ def test(infile, source_map_added_dir=''):
86318631
test('inner/a.cpp', 'inner')
86328632

86338633
def test_emsymbolizer(self):
8634-
# Test DWARF output
8635-
self.run_process([EMCC, test_file('core/test_dwarf.c'),
8636-
'-g', '-O1', '-o', 'test_dwarf.js'])
8634+
def check_loc_info(address, source, funcs, locs):
8635+
out = self.run_process(
8636+
[emsymbolizer, '-tcode', '-s', source, 'test_dwarf.wasm', address],
8637+
stdout=PIPE).stdout
8638+
for func in funcs:
8639+
self.assertIn(func, out)
8640+
for loc in locs:
8641+
self.assertIn(loc, out)
86378642

86388643
# Use hard-coded addresses. This is potentially brittle, but LLVM's
86398644
# O1 output is pretty minimal so hopefully it won't break too much?
86408645
# Another option would be to disassemble the binary to look for certain
86418646
# instructions or code sequences.
86428647

8643-
def get_addr(address):
8644-
return self.run_process(
8645-
[emsymbolizer, 'test_dwarf.wasm', '-tcode', address], stdout=PIPE).stdout
8646-
8647-
# Check a location in foo(), not inlined.
8648-
# If the output binary size changes use `wasm-objdump -d` on the binary
8649-
# look for the offset of the first call to `out_to_js`.
8650-
self.assertIn('test_dwarf.c:6:3', get_addr('0x8'))
8651-
# Check that both bar (inlined) and main (inlinee) are in the output,
8652-
# as described by the DWARF.
8653-
# TODO: consider also checking the function names once the output format
8654-
# stabilizes more
8655-
self.assertRegex(get_addr('0x1f').replace('\n', ''),
8656-
'test_dwarf.c:13:3.*test_dwarf.c:18:3')
8648+
# 1. Test DWARF + source map together
8649+
self.run_process([EMCC, test_file('core/test_dwarf.c'),
8650+
'-g', '-gsource-map', '-O1', '-o', 'test_dwarf.js'])
8651+
# 0x8 corresponds to out_to_js(0) within foo(), uninlined
8652+
# DWARF info provides function names, but source maps don't
8653+
check_loc_info('0x8', 'dwarf', ['foo'], ['test_dwarf.c:6:3'])
8654+
check_loc_info('0x8', 'sourcemap', [], ['test_dwarf.c:6:3'])
8655+
# 0x1f corresponds to __builtin_trap() within bar(), inlined into main()
8656+
# DWARF info provides inlined info, but source maps don't
8657+
check_loc_info('0x1f', 'dwarf', ['bar', 'main'],
8658+
['test_dwarf.c:13:3', 'test_dwarf.c:18:3'])
8659+
check_loc_info('0x1f', 'sourcemap', [], ['test_dwarf.c:13:3'])
8660+
8661+
# 2. Test source map only
8662+
self.run_process([EMCC, test_file('core/test_dwarf.c'),
8663+
'-gsource-map', '-O1', '-o', 'test_dwarf.js'])
8664+
check_loc_info('0x8', 'sourcemap', [], ['test_dwarf.c:6:3'])
8665+
check_loc_info('0x1f', 'sourcemap', [], ['test_dwarf.c:13:3'])
8666+
8667+
# 3. Test DWARF only
8668+
self.run_process([EMCC, test_file('core/test_dwarf.c'),
8669+
'-g', '-O1', '-o', 'test_dwarf.js'])
8670+
check_loc_info('0x8', 'dwarf', ['foo'], ['test_dwarf.c:6:3'])
8671+
check_loc_info('0x1f', 'dwarf', ['bar', 'main'],
8672+
['test_dwarf.c:13:3', 'test_dwarf.c:18:3'])
86578673

86588674
def test_separate_dwarf(self):
86598675
self.run_process([EMCC, test_file('hello_world.c'), '-g'])

0 commit comments

Comments
 (0)