Skip to content

Commit 1fab6e2

Browse files
authored
[empath-split] Support multi-paths modules (#25577)
This changes the format of the user input "paths" file so that multiple paths can be split into a single module. The new paths file structure is now very similar to wasm-split's manifest file, with functions replaced with paths. For example, the format will be like ``` module1 path/to/a path/to/b module2 path/to/c ``` Where `module1` and `module2` are module names.
1 parent fae1074 commit 1fab6e2

File tree

2 files changed

+104
-33
lines changed

2 files changed

+104
-33
lines changed

test/test_other.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15169,16 +15169,21 @@ def test_empath_split(self):
1516915169
#include <iostream>
1517015170
void foo() { std::cout << "foo" << std::endl; }
1517115171
''')
15172-
create_file('path_list', r'''
15172+
create_file('path_list.txt', r'''
15173+
myapp
1517315174
main.cpp
1517415175
foo.cpp
15176+
15177+
lib1
1517515178
/emsdk/emscripten/system
15179+
15180+
lib2
1517615181
/emsdk/emscripten/system/lib/libc/musl
1517715182
/emsdk/emscripten/system/lib/libcxx
1517815183
''')
1517915184

1518015185
self.run_process([EMCC, 'main.cpp', 'foo.cpp', '-gsource-map', '-g2', '-o', 'test.js'])
15181-
self.run_process([empath_split, 'test.wasm', 'path_list', '-g', '-o', 'test_primary.wasm', '--out-prefix=test_'])
15186+
self.run_process([empath_split, 'test.wasm', 'path_list.txt', '-g', '-o', 'test_primary.wasm', '--out-prefix=test_'])
1518215187

1518315188
# Check if functions are correctly assigned and split with the specified
1518415189
# paths. When one path contains another, the inner path should take its
@@ -15190,17 +15195,17 @@ def has_defined_function(file, func):
1519015195
return pattern.search(f.read()) is not None
1519115196

1519215197
# main.cpp
15193-
self.assertTrue(has_defined_function('test_0.wasm', '__original_main'))
15198+
self.assertTrue(has_defined_function('test_myapp.wasm', '__original_main'))
1519415199
# foo.cpp
15195-
self.assertTrue(has_defined_function('test_1.wasm', r'foo\\28\\29'))
15200+
self.assertTrue(has_defined_function('test_myapp.wasm', r'foo\\28\\29'))
1519615201
# /emsdk/emscripten/system
15197-
self.assertTrue(has_defined_function('test_2.wasm', '__abort_message'))
15198-
self.assertTrue(has_defined_function('test_2.wasm', 'pthread_cond_wait'))
15202+
self.assertTrue(has_defined_function('test_lib1.wasm', '__abort_message'))
15203+
self.assertTrue(has_defined_function('test_lib1.wasm', 'pthread_cond_wait'))
1519915204
# /emsdk/emscripten/system/lib/libc/musl
15200-
self.assertTrue(has_defined_function('test_3.wasm', 'strcmp'))
15205+
self.assertTrue(has_defined_function('test_lib2.wasm', 'strcmp'))
1520115206
# /emsdk/emscripten/system/lib/libcxx
15202-
self.assertTrue(has_defined_function('test_4.wasm', r'std::__2::ios_base::getloc\\28\\29\\20const'))
15203-
self.assertTrue(has_defined_function('test_4.wasm', r'std::uncaught_exceptions\\28\\29'))
15207+
self.assertTrue(has_defined_function('test_lib2.wasm', r'std::__2::ios_base::getloc\\28\\29\\20const'))
15208+
self.assertTrue(has_defined_function('test_lib2.wasm', r'std::uncaught_exceptions\\28\\29'))
1520415209

1520515210
# Check --print-sources option
1520615211
out = self.run_process([empath_split, 'test.wasm', '--print-sources'], stdout=PIPE).stdout

tools/empath-split.py

Lines changed: 90 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,26 @@
2121
$ emcc -g2 -gsource-map a.o b.o -o result.js
2222
See https://emscripten.org/docs/porting/Debugging.html for more details.
2323
24-
This takes a wasm file and a paths file, which is a text file containing a list
25-
of paths as inputs. The paths file should contain a single path per line. A
26-
single split module will be generated per specified path. If a specified path
27-
contains another specified path, functions contained in the inner path will be
28-
split as the inner path's module, and the rest of the functions will be split as
29-
the outer path's module. Functions that do not belong to any of the specified
30-
paths will remain in the primary module.
24+
This takes a wasm file and a paths file as inputs. The paths file defines how
25+
to split modules. The format is similar to the manifest file for wasm-split, but
26+
with paths instead of function names. A module is defined by a name on a line,
27+
followed by paths on subsequent lines. Modules are separated by empty lines.
28+
For example:
29+
module1
30+
path/to/a
31+
path/to/b
32+
33+
module2
34+
path/to/c
35+
36+
This will create two modules, 'module1' and 'module2'. 'module1' will contain
37+
functions from source files under path/to/a and path/to/b. 'module2' will
38+
contain functions from source files under path/to/c.
39+
40+
If a specified path contains another specified path, functions contained in the
41+
inner path will be split as the inner path's module, and the rest of the
42+
functions will be split as the outer path's module. Functions that do not belong
43+
to any of the specified paths will remain in the primary module.
3144
3245
The paths in the paths file can be either absolute or relative, but they should
3346
match those of 'sources' field in the source map file. Sometimes a source map's
@@ -238,6 +251,50 @@ def is_synthesized_func(func):
238251
return path_to_funcs
239252

240253

254+
# 1. Strip whitespaces
255+
# 2. Normalize separators
256+
# 3. Make /a/b/c and /a/b/c/ equivalent
257+
def normalize_path(path):
258+
return utils.normalize_path(path.strip()).rstrip(os.sep)
259+
260+
261+
def parse_paths_file(paths_file_content):
262+
module_to_paths = {}
263+
path_to_module = {}
264+
cur_module = None
265+
cur_paths = []
266+
267+
for line in paths_file_content.splitlines():
268+
line = line.strip()
269+
if not line:
270+
if cur_module:
271+
if not cur_paths:
272+
diagnostics.warn(f"Module '{cur_module}' has no paths specified.")
273+
module_to_paths[cur_module] = cur_paths
274+
cur_module = None
275+
cur_paths = []
276+
continue
277+
278+
if not cur_module:
279+
cur_module = line
280+
else:
281+
path = normalize_path(line)
282+
if path in path_to_module:
283+
exit_with_error("Path '{path}' cannot be assigned to module '{cur_module}; it is already assigned to module '{path_to_module[path]}'")
284+
cur_paths.append(path)
285+
path_to_module[path] = cur_module
286+
287+
if cur_module:
288+
if not cur_paths:
289+
diagnostics.warn(f"Module '{cur_module}' has no paths specified.")
290+
module_to_paths[cur_module] = cur_paths
291+
292+
if not module_to_paths:
293+
exit_with_error('The paths file is empty or invalid.')
294+
295+
return module_to_paths
296+
297+
241298
def main():
242299
args, forwarded_args = parse_args()
243300
check_errors(args)
@@ -247,32 +304,41 @@ def main():
247304
print_sources(sourcemap)
248305
return
249306

250-
paths = utils.read_file(args.paths_file).splitlines()
251-
paths = [utils.normalize_path(path.strip()) for path in paths if path.strip()]
252-
# To make /a/b/c and /a/b/c/ equivalent
253-
paths = [path.rstrip(os.sep) for path in paths]
254-
# Remove duplicates
255-
paths = list(dict.fromkeys(paths))
307+
content = utils.read_file(args.paths_file)
308+
module_to_paths = parse_paths_file(content)
256309

257310
# Compute {path: list of functions} map
258-
path_to_funcs = get_path_to_functions_map(args.wasm, sourcemap, paths)
311+
all_paths = []
312+
for paths in module_to_paths.values():
313+
all_paths.extend(paths)
314+
path_to_funcs = get_path_to_functions_map(args.wasm, sourcemap, all_paths)
259315

260316
# Write .manifest file
261317
with tempfile.NamedTemporaryFile(suffix=".manifest", mode='w+', delete=args.preserve_manifest) as f:
262318
manifest = f.name
263-
for i, path in enumerate(paths):
264-
f.write(f'{i}\n')
265-
if not path_to_funcs[path]:
266-
diagnostics.warn(f'{path} does not match any functions')
319+
for i, (module, paths) in enumerate(module_to_paths.items()):
320+
if i != 0: # Unless we are the first entry add a newline separator
321+
f.write('\n')
322+
funcs = []
323+
for path in paths:
324+
if not path_to_funcs[path]:
325+
diagnostics.warn(f'{path} does not match any functions')
326+
funcs += path_to_funcs[path]
327+
if not funcs:
328+
diagnostics.warn(f"Module '{module}' does not match any functions")
329+
267330
if args.verbose:
268-
print(f'{path}: {len(path_to_funcs[path])} functions')
269-
for func in path_to_funcs[path]:
270-
print(' ' + func)
331+
print(f'{module}: {len(funcs)} functions')
332+
for path in paths:
333+
if path in path_to_funcs:
334+
print(f' {path}: {len(path_to_funcs[path])} functions')
335+
for func in path_to_funcs[path]:
336+
print(' ' + func)
271337
print()
272-
for func in path_to_funcs[path]:
338+
339+
f.write(f'{module}\n')
340+
for func in funcs:
273341
f.write(func + '\n')
274-
if i < len(paths) - 1:
275-
f.write('\n')
276342
f.flush()
277343

278344
cmd = [args.wasm_split, '--multi-split', args.wasm, '--manifest', manifest]

0 commit comments

Comments
 (0)