Skip to content

Commit bce7571

Browse files
authored
[empath-split] Add --print-sources option (#25386)
The 'paths' file should contain paths that match those of `sources` field in the source map, which can be hard to figure out because they can be relative to some build directory. This adds `--print-sources` option, which prints the contents of the `sources` field. You can just manually open source map files to obtain the same information, but they are usually hard to read without any newlines. You also can use a general JSON prettyprinting tool, but I think it wouldn't hurt to provide the same info handy here.
1 parent 25fa2a4 commit bce7571

File tree

2 files changed

+67
-12
lines changed

2 files changed

+67
-12
lines changed

test/test_other.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15655,3 +15655,9 @@ def has_defined_function(file, func):
1565515655
# /emsdk/emscripten/system/lib/libcxx
1565615656
self.assertTrue(has_defined_function('test_4.wasm', r'std::__2::ios_base::getloc\\28\\29\\20const'))
1565715657
self.assertTrue(has_defined_function('test_4.wasm', r'std::uncaught_exceptions\\28\\29'))
15658+
15659+
# Check --print-sources option
15660+
out = self.run_process([empath_split, 'test.wasm', '--print-sources'], stdout=PIPE).stdout
15661+
self.assertIn('main.cpp', out)
15662+
self.assertIn('foo.cpp', out)
15663+
self.assertIn('/emsdk/emscripten/system/lib/libc/musl/src/string/strcmp.c', out)

tools/empath-split.py

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,24 @@
2828
split as the inner path's module, and the rest of the functions will be split as
2929
the outer path's module. Functions that do not belong to any of the specified
3030
paths will remain in the primary module.
31+
32+
The paths in the paths file can be either absolute or relative, but they should
33+
match those of 'sources' field in the source map file. Sometimes a source map's
34+
'sources' field contains paths relative to a build directory, so source files
35+
may be recorded as '../src/subdir/test.c', for example. In this case, if you
36+
want to split the directory src/subdir, you should list it as ../src/subdir. You
37+
can manually open the source map file and check 'sources' field, but we also an
38+
option to help that. You can do like
39+
$ empath-split --print-sources test.wasm
40+
or
41+
$ empath-split --print-sources --source-map test.wasm.map
42+
to print the list of sources in 'sources' field in the source map. Note that
43+
emscripten's libraries' source files have /emsdk/emscripten prefix, which is a
44+
fake deterministic prefix to produce reproducible builds across platforms.
3145
"""
3246

3347
import argparse
48+
import json
3449
import os
3550
import sys
3651
import tempfile
@@ -59,23 +74,35 @@ def parse_args():
5974
enabling/disabling options. Run 'wasm-split -h' for the list of options. But you
6075
should NOT add --manifest, because this will be generated from this script.
6176
""")
62-
parser.add_argument('wasm', help='Path to the input wasm file')
63-
parser.add_argument('paths_file', help='Path to the input file containing paths')
77+
parser.add_argument('wasm', nargs='?', help='Path to the input wasm file')
78+
parser.add_argument('paths_file', nargs='?', help='Path to the input file containing paths')
6479
parser.add_argument('-s', '--sourcemap', help='Force source map file')
6580
parser.add_argument('-v', '--verbose', action='store_true',
6681
help='Print verbose info for debugging this script')
6782
parser.add_argument('--wasm-split', help='Path to wasm-split executable')
6883
parser.add_argument('--preserve-manifest', action='store_true',
6984
help='Preserve generated manifest file. This sets --verbose too.')
70-
args, forwarded_args = parser.parse_known_args()
85+
parser.add_argument('--print-sources', action='store_true',
86+
help='Print the list of sources in the source map to help figure out splitting boundaries. Does NOT perform the splitting.')
7187

88+
args, forwarded_args = parser.parse_known_args()
7289
if args.preserve_manifest:
7390
args.verbose = True
7491
if not args.wasm_split:
7592
args.wasm_split = os.path.join(building.get_binaryen_bin(), shared.exe_suffix('wasm-split'))
7693

7794
if '--manifest' in forwarded_args:
7895
parser.error('manifest file will be generated by this script and should not be given')
96+
97+
if args.print_sources:
98+
if not args.wasm and not args.sourcemap:
99+
parser.error('--print-sources requires either wasm or --sourcemap')
100+
return args, forwarded_args
101+
102+
if not args.wasm and not args.paths_file:
103+
parser.error("the following arguments are required: wasm, paths_file")
104+
if not args.paths_file:
105+
parser.error("the following arguments are required: paths_file")
79106
if '-o' not in forwarded_args and '--output' not in forwarded_args:
80107
parser.error('-o (--output) is required')
81108
return args, forwarded_args
@@ -88,22 +115,33 @@ def check_errors(args):
88115
exit_with_error(f"'{args.paths_file}' was not found or not a file")
89116

90117
if args.sourcemap:
91-
if not os.path.isfile(args.sourcemap):
92-
exit_with_error(f"'{args.sourcemap}' was not found or not a file")
118+
sourcemap = args.sourcemap
93119

94120
if args.wasm:
95121
with webassembly.Module(args.wasm) as module:
96-
if not args.sourcemap and not emsymbolizer.get_sourceMappingURL_section(module):
97-
exit_with_error('sourceMappingURL section does not exist')
98-
sourcemap = module.get_sourceMappingURL()
99-
if not os.path.isfile(sourcemap):
100-
exit_with_error(f"'{sourcemap}' was not found or not a file")
122+
if not args.sourcemap:
123+
if not emsymbolizer.get_sourceMappingURL_section(module):
124+
exit_with_error('sourceMappingURL section does not exist')
125+
sourcemap = module.get_sourceMappingURL()
101126
if not module.has_name_section():
102-
exit_with_error('Name section does not eixst')
127+
exit_with_error('Name section does not exist')
103128

129+
if not os.path.isfile(sourcemap):
130+
exit_with_error(f"'{sourcemap}' was not found or not a file")
104131
if not os.path.isfile(args.wasm_split):
105132
exit_with_error(f"'{args.wasm_split}' was not found or not a file")
106133

134+
# Check source map validity. Just perform simple checks to make sure mandatory
135+
# fields exist.
136+
try:
137+
with open(sourcemap) as f:
138+
source_map_data = json.load(f)
139+
except json.JSONDecodeError:
140+
exit_with_error(f'Invalid JSON format in file {args.sourcemap}')
141+
for field in ['version', 'sources', 'mappings']:
142+
if field not in source_map_data:
143+
exit_with_error(f"Field '{field}' is missing in the source map")
144+
107145

108146
def get_sourceMappingURL(wasm, arg_sourcemap):
109147
if arg_sourcemap:
@@ -112,6 +150,14 @@ def get_sourceMappingURL(wasm, arg_sourcemap):
112150
return module.get_sourceMappingURL()
113151

114152

153+
def print_sources(sourcemap):
154+
with open(sourcemap) as f:
155+
sources = json.load(f).get('sources')
156+
assert(isinstance(sources, list))
157+
for src in sources:
158+
print(src)
159+
160+
115161
def get_path_to_functions_map(wasm, sourcemap, paths):
116162
def is_synthesized_func(func):
117163
# TODO There can be more
@@ -202,6 +248,9 @@ def main():
202248
check_errors(args)
203249

204250
sourcemap = get_sourceMappingURL(args.wasm, args.sourcemap)
251+
if args.print_sources:
252+
print_sources(sourcemap)
253+
return
205254

206255
paths = utils.read_file(args.paths_file).splitlines()
207256
paths = [utils.normalize_path(path.strip()) for path in paths if path.strip()]
@@ -221,7 +270,7 @@ def main():
221270
if not path_to_funcs[path]:
222271
diagnostics.warn(f'{path} does not match any functions')
223272
if args.verbose:
224-
print(path)
273+
print(f'{path}: {len(path_to_funcs[path])} functions')
225274
for func in path_to_funcs[path]:
226275
print(' ' + func)
227276
print()

0 commit comments

Comments
 (0)