diff --git a/.circleci/config.yml b/.circleci/config.yml index ca64502b3bc31..2fd4fb0e9de81 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -68,12 +68,23 @@ commands: install-rust: steps: - run: - name: install rust + name: install rust and wasm-bindgen + # rust and wasm-bindgen are always installed together so there is no + # CI environment with one but not the other. The wasm-bindgen-cli + # version is pinned to match the library the test crate depends on; + # wasm-bindgen requires the CLI and the library to be the exact same + # version. + # TODO: revert to the published `--version 0.2.126` crate once the + # bracket-access export fix lands and releases + # (wasm-bindgen/wasm-bindgen#5217). The branch is 0.2.126 plus that + # fix, so its schema still matches the `=0.2.126` library the test + # crates depend on. command: | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y export PATH=${HOME}/.cargo/bin:${PATH} rustup target add wasm32-unknown-emscripten echo "export PATH=\"\$HOME/.cargo/bin:\$PATH\"" >> $BASH_ENV + cargo install wasm-bindgen-cli --git https://github.com/guybedford/wasm-bindgen --branch emscripten-identifier --locked install-node-version: description: "install a specific version of node" parameters: diff --git a/site/source/docs/tools_reference/settings_reference.rst b/site/source/docs/tools_reference/settings_reference.rst index b5fc2d858d109..03bca14c35a1b 100644 --- a/site/source/docs/tools_reference/settings_reference.rst +++ b/site/source/docs/tools_reference/settings_reference.rst @@ -3352,7 +3352,12 @@ Default value: [] WASM_BINDGEN ============ -Run wasm-bindgen and integrate the rust-exported symbols into the rest of Emscripten's JS output. +Run wasm-bindgen and integrate the rust-exported symbols into the rest of +Emscripten's JS output. +Set to 1 to always run wasm-bindgen (e.g. a C/C++ build linking a Rust +staticlib). Set to 'auto' to run it only when the linked wasm carries +wasm-bindgen's marker section, which is how cargo/rustc opts in when driving +emcc as the linker; otherwise 'auto' is a no-op. Default value: 0 diff --git a/src/jsifier.mjs b/src/jsifier.mjs index 84dc251827b75..e607fcd324199 100644 --- a/src/jsifier.mjs +++ b/src/jsifier.mjs @@ -934,6 +934,10 @@ var proxiedFunctionTable = [ '//FORWARDED_DATA:' + JSON.stringify({ librarySymbols, + // The final EXPORTED_FUNCTIONS set, including additions made by JS + // libraries (e.g. wasm-bindgen self-registering its exports), so the + // caller can re-derive which library symbols were exported. + exportedFunctions: Array.from(EXPORTED_FUNCTIONS), nativeAliases, warnings: warningOccured(), asyncFuncs, diff --git a/src/postamble.js b/src/postamble.js index f57f24b76d773..83293777b9882 100644 --- a/src/postamble.js +++ b/src/postamble.js @@ -238,7 +238,14 @@ function checkUnflushedContent() { #endif // EXIT_RUNTIME #endif // ASSERTIONS +#if WASM_ESM_INTEGRATION +// Provide the aggregate exports object for code that reaches the wasm exports by +// name (e.g. wasm-bindgen's glue) via a namespace import. Emscripten's own named +// imports are unaffected and remain tree-shakable. +import * as wasmExports from './{{{ WASM_BINARY_FILE }}}'; +#else var wasmExports; +#endif #if SPLIT_MODULE var wasmRawExports; #endif diff --git a/src/settings.js b/src/settings.js index 1262f8e7e51a3..b2689575b7467 100644 --- a/src/settings.js +++ b/src/settings.js @@ -2221,7 +2221,12 @@ var LEGACY_RUNTIME = false; // [link] var SIGNATURE_CONVERSIONS = []; -// Run wasm-bindgen and integrate the rust-exported symbols into the rest of Emscripten's JS output. +// Run wasm-bindgen and integrate the rust-exported symbols into the rest of +// Emscripten's JS output. +// Set to 1 to always run wasm-bindgen (e.g. a C/C++ build linking a Rust +// staticlib). Set to 'auto' to run it only when the linked wasm carries +// wasm-bindgen's marker section, which is how cargo/rustc opts in when driving +// emcc as the linker; otherwise 'auto' is a no-op. // [link] var WASM_BINDGEN = 0; diff --git a/test/rust/bindgen_greeter/.cargo/config.toml b/test/rust/bindgen_greeter/.cargo/config.toml new file mode 100644 index 0000000000000..c9be0d51ea09a --- /dev/null +++ b/test/rust/bindgen_greeter/.cargo/config.toml @@ -0,0 +1,7 @@ +[build] +target = "wasm32-unknown-emscripten" +rustflags = [ + "-Cllvm-args=-enable-emscripten-cxx-exceptions=0", + "-Cpanic=abort", + "-Crelocation-model=static", +] diff --git a/test/rust/bindgen_greeter/Cargo.toml b/test/rust/bindgen_greeter/Cargo.toml new file mode 100644 index 0000000000000..ae11b6329c60a --- /dev/null +++ b/test/rust/bindgen_greeter/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "bindgen_greeter" +edition = "2021" + +[[bin]] +name = "bindgen_greeter" +path = "src/main.rs" + +[dependencies] +wasm-bindgen = "=0.2.126" diff --git a/test/rust/bindgen_greeter/src/main.rs b/test/rust/bindgen_greeter/src/main.rs new file mode 100644 index 0000000000000..e4afb889b0c19 --- /dev/null +++ b/test/rust/bindgen_greeter/src/main.rs @@ -0,0 +1,23 @@ +use wasm_bindgen::prelude::*; + +#[wasm_bindgen] +pub struct Greeter { + greeting: String, +} + +#[wasm_bindgen] +impl Greeter { + #[wasm_bindgen(constructor)] + pub fn new(greeting: String) -> Greeter { + Greeter { greeting } + } + + pub fn greet(&self, name: String) -> String { + format!("{}, {}!", self.greeting, name) + } +} + +fn main() { + // Matches the emscripten idiom: main runs automatically on init. + println!("main ran"); +} diff --git a/test/test_other.py b/test/test_other.py index 111557ed32e6c..c4e68dd59eb0c 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -271,6 +271,11 @@ def requires_rust(func): return requires_tool('cargo', 'RUST')(func) +def requires_wasm_bindgen(func): + assert callable(func) + return requires_tool('wasm-bindgen', 'WASM_BINDGEN')(func) + + def requires_pkg_config(func): assert callable(func) @@ -15017,20 +15022,106 @@ def test_rust_integration_basics(self): self.do_runf('main.cpp', 'Hello from rust!', cflags=[lib]) @requires_rust - def test_wasm_bindgen_integration(self): + @requires_wasm_bindgen + @parameterized({ + '': ([],), + # -O3 turns on binaryen's metadce pass, which must keep the wasm-bindgen + # glue the generated library reaches by name (e.g. __wbindgen_start, + # __wbindgen_externrefs) alive rather than DCE-ing it as an unused export. + 'O3': (['-O3'],), + }) + def test_wasm_bindgen_integration(self, opt): copytree(test_file('rust/bindgen_integration'), '.') - self.run_process(['cargo', 'add', 'wasm-bindgen']) + # Pin the library to the (managed) wasm-bindgen-cli version on PATH; + # wasm-bindgen requires the CLI and the library to match exactly. + self.run_process(['cargo', 'add', 'wasm-bindgen@=0.2.126']) self.run_process(['cargo', 'build']) lib = 'target/wasm32-unknown-emscripten/debug/libbindgen_integration.a' self.assertExists(lib) - create_file('empty.c', '') + # A hand-written EMSCRIPTEN_KEEPALIVE C export must remain surfaced + # alongside wasm-bindgen's self-registered API; the wasm-bindgen glue + # suppression must not drop it. + create_file('native.c', ''' + #include + EMSCRIPTEN_KEEPALIVE int em_double(int x) { return x * 2; } + ''') create_file('post.js', ''' - Module.onRuntimeInitialized = () => out(Module.rs_add(17, 25)); + Module.onRuntimeInitialized = () => { + out('rs_add=' + Module.rs_add(17, 25)); + out('em_double=' + Module._em_double(20)); + }; ''') - self.run_process(['cargo', 'install', 'wasm-bindgen-cli']) - self.do_runf('empty.c', '42', cflags=[lib, '-sWASM_BINDGEN', '--post-js=post.js', '-lexports.js']) + output = self.do_runf('native.c', cflags=[lib, '-sWASM_BINDGEN', '--post-js=post.js', '-lexports.js'] + opt) + self.assertContained('rs_add=42', output) + self.assertContained('em_double=40', output) + + # ESM-integration and factory (MODULARIZE) surface the clean wasm-bindgen API + # differently (named ESM exports vs `Module.`). Both must expose exactly + # the `Greeter` class and none of the raw wasm exports rustc lists. + @requires_rust + @requires_wasm_bindgen + @parameterized({ + 'esm': (['-sWASM_ESM_INTEGRATION'], False), + 'factory': (['-sMODULARIZE', '-sEXPORT_ES6'], False), + # -O3 turns on binaryen's import/export minifier, which renames the wasm + # export names; the wasm-bindgen glue reaches them by name from JS, so the + # JS call sites must be renamed in lockstep. Built with --release because a + # cargo debug (-g) build at -O3 trips an unrelated binaryen DWARF assertion. + 'factory_optimized': (['-sMODULARIZE', '-sEXPORT_ES6', '-O3'], True), + }) + def test_wasm_bindgen_rustc_driven(self, cflags, release): + if '-sWASM_ESM_INTEGRATION' in cflags: + prelude = ''' + import init, * as mod from './bindgen_greeter.js'; + await init(); + ''' + else: + prelude = ''' + import Module from './bindgen_greeter.js'; + const mod = await Module(); + ''' + # cargo/rustc links via emcc; pass -sWASM_BINDGEN=auto (plus the output-mode + # settings) through so emcc detects wasm-bindgen's marker section in the + # linked wasm and runs wasm-bindgen as a post-link step. + copytree(test_file('rust/bindgen_greeter'), '.') + # rustc invokes emcc as the linker; ensure it uses *this* emcc and pass the + # link settings through. + cargo_cmd = ['cargo', 'build'] + profile_dir = 'debug' + if release: + cargo_cmd.append('--release') + profile_dir = 'release' + with env_modify({'CARGO_TARGET_WASM32_UNKNOWN_EMSCRIPTEN_LINKER': EMCC, + 'EMCC_CFLAGS': ' '.join(['-sWASM_BINDGEN=auto'] + cflags)}): + self.run_process(cargo_cmd) + + # cargo copies only the .js and .wasm; the ESM support module and snippets + # stay in deps/, so run from there. + out_dir = f'target/wasm32-unknown-emscripten/{profile_dir}/deps' + create_file(os.path.join(out_dir, 'run.mjs'), prelude + ''' + const greeting = new mod.Greeter('Hello').greet('world'); + if (greeting !== 'Hello, world!') throw new Error('unexpected greeting: ' + greeting); + // None of the raw wasm exports leak into the user-facing API. + for (const name of ['_main', 'greeter_greet', '_greeter_greet', + '__wbindgen_malloc', '___wbindgen_malloc']) { + if (mod[name] !== undefined) throw new Error('leaked export: ' + name); + } + console.log(greeting); + ''') + self.node_args += ['--experimental-wasm-modules', '--no-warnings'] + output = self.run_js(os.path.join(out_dir, 'run.mjs')) + self.assertContained('Hello, world!', output) + # `main` runs automatically on init (matching the emscripten C++ idiom), + # even though `_main` is not surfaced as a user-facing export. + self.assertContained('main ran', output) + + def test_wasm_bindgen_auto_no_marker(self): + # -sWASM_BINDGEN=auto is a no-op for an ordinary build with no wasm-bindgen + # marker section: wasm-bindgen is never invoked (so it need not be installed) + # and the program builds and runs normally. + self.do_runf('hello_world.c', 'Hello, world!', cflags=['-sWASM_BINDGEN=auto']) def test_relative_em_cache(self): with env_modify({'EM_CACHE': 'foo'}): diff --git a/tools/building.py b/tools/building.py index ea95f2e7d66ba..f1e7ab1d75cb5 100644 --- a/tools/building.py +++ b/tools/building.py @@ -24,7 +24,7 @@ utils, webassembly, ) -from .settings import settings +from .settings import settings, user_settings from .shared import ( CLANG_CC, CLANG_CXX, @@ -57,6 +57,14 @@ _is_ar_cache: dict[str, bool] = {} # the exports the user requested user_requested_exports: set[str] = set() +# JS library symbols that were exported (MODULARIZE=instance), derived from the +# JS compiler's librarySymbols and EXPORTED_FUNCTIONS; the WASM_ESM_INTEGRATION +# wrapper re-exports them. +exported_js_library_symbols: set[str] = set() +# The raw wasm exports wasm-bindgen's generated bindings reach by name (the +# supplied/expansion glue), mangled. These are suppressed from the public +# surface; EMSCRIPTEN_KEEPALIVE exports are not in this set and remain. +wasm_bindgen_internal_exports: set[str] = set() # A list of feature flags to pass to each binaryen invocation (like `wasm-opt`, # etc.). This is received by the first call to binaryen (e.g. `wasm-emscripten-finalize`) # which reads it using `--detect-features`. @@ -300,7 +308,12 @@ def lld_flags(args, linker_inputs=None): # grouping. args = [a for a in args if a not in {'--start-group', '--end-group'}] - if settings.WASM_BINDGEN: + # Retain the wasm exports wasm-bindgen's glue reaches by name. This is the + # emcc-driven staticlib flow (explicit -sWASM_BINDGEN), where nobody else + # computed the export set, so we discover it here. The cargo/rustc-driven flow + # (-sWASM_BINDGEN=auto, still unresolved at link time) supplies EXPORTED_FUNCTIONS + # itself and never needs this. + if settings.WASM_BINDGEN == 1 and 'EXPORTED_FUNCTIONS' not in user_settings: exported_symbols = get_wasm_bindgen_exported_symbols(linker_inputs) args.extend(f'--export={e}' for e in exported_symbols) @@ -814,6 +827,11 @@ def metadce(js_file, wasm_file, debug_info, last): graph = json.loads(txt) # ensure that functions expected to be exported to the outside are roots required_symbols = user_requested_exports.union(set(settings.SIDE_MODULE_IMPORTS)) + # wasm-bindgen's generated JS library reaches some wasm exports by name only + # indirectly (e.g. __wbindgen_start / __wbindgen_externrefs, invoked from the + # wasm start function), which emitDCEGraph can't trace to a root. They are kept + # off the public JS surface but must stay live in the wasm, so root them here. + required_symbols |= wasm_bindgen_internal_exports for item in graph: if 'export' in item: export = asmjs_mangle(item['export']) @@ -1285,6 +1303,14 @@ def run_wasm_opt(infile, outfile=None, args=[], **kwargs): # noqa return run_binaryen_command('wasm-opt', infile, outfile, args=args, **kwargs) +def is_wasm_bindgen_module(wasm_file): + # wasm-bindgen marks modules built for the emscripten target with this custom + # section so emcc, when used as the linker (e.g. by cargo/rustc), can detect + # under `-sWASM_BINDGEN=auto` that wasm-bindgen needs to run as a post-link step. + with webassembly.Module(wasm_file) as module: + return module.get_custom_section('__wasm_bindgen_emscripten_marker') is not None + + def run_wasm_bindgen(infile): bindgen_out_dir = os.path.join(get_emscripten_temp_dir(), 'bindgen_out') @@ -1299,16 +1325,36 @@ def run_wasm_bindgen(infile): '--out-dir', bindgen_out_dir, ] + exports_before = {e.name for e in webassembly.get_exports(infile)} + check_call(cmd) # Don't try to predict the .wasm filename that wasm-bindgen outputs. Instead # just grab the .wasm file itself. all_output_files = os.listdir(bindgen_out_dir) new_wasm_file = [x for x in all_output_files if x.endswith('.wasm')][0] - - shutil.copyfile(os.path.join(bindgen_out_dir, new_wasm_file), infile) - - return os.path.join(bindgen_out_dir, 'library_bindgen.js') + new_wasm_path = os.path.join(bindgen_out_dir, new_wasm_file) + + exports_after = {e.name for e in webassembly.get_exports(new_wasm_path)} + # Report which placeholder exports wasm-bindgen consumed so the caller can + # drop them from EXPORTED_FUNCTIONS, and which exports its expansion added so + # the caller can keep them off the public surface. + removed_exports = exports_before - exports_after + added_exports = exports_after - exports_before + + shutil.copyfile(new_wasm_path, infile) + + # wasm-bindgen emits imported JS snippets into `snippets/` and the `import` + # statements referencing them into `library_bindgen.extern-pre.js`, only when + # the crate actually imports JS. + extern_pre_js = os.path.join(bindgen_out_dir, 'library_bindgen.extern-pre.js') + if not os.path.exists(extern_pre_js): + extern_pre_js = None + snippets_dir = os.path.join(bindgen_out_dir, 'snippets') + if not os.path.isdir(snippets_dir): + snippets_dir = None + + return os.path.join(bindgen_out_dir, 'library_bindgen.js'), removed_exports, added_exports, extern_pre_js, snippets_dir intermediate_counter = 0 diff --git a/tools/emscripten.py b/tools/emscripten.py index fd3f53f93db33..998bc227edf58 100644 --- a/tools/emscripten.py +++ b/tools/emscripten.py @@ -440,6 +440,13 @@ def emscript(in_wasm, out_wasm, outfile_js, js_syms, finalize=True, base_metadat pre += "}\n" report_missing_exports(forwarded_json['librarySymbols']) + # A JS library symbol is exported (MODULARIZE=instance) when it is in + # EXPORTED_FUNCTIONS; derive that set rather than tracking it separately. The + # forwarded EXPORTED_FUNCTIONS includes additions made by JS libraries (e.g. + # wasm-bindgen self-registering its exports). + exported_functions = set(forwarded_json['exportedFunctions']) + building.exported_js_library_symbols.update( + s for s in forwarded_json['librarySymbols'] if s in exported_functions) asm_const_pairs = ['%s: %s' % (key, value) for key, value in asm_consts] if asm_const_pairs or settings.MAIN_MODULE: @@ -610,8 +617,15 @@ def finalize_wasm(infile, outfile, js_syms): unexpected_exports = [asmjs_mangle(e) for e in unexpected_exports] unexpected_exports = [e for e in unexpected_exports if e not in expected_exports] + # Under WASM_BINDGEN, `main` runs automatically on init via the entry but + # `_main` is not surfaced as a public export (it is part of wasm-bindgen's + # internal export set, see below). if (not settings.STANDALONE_WASM and 'main' in metadata.all_exports) or '__main_argc_argv' in metadata.all_exports: - if 'EXPORTED_FUNCTIONS' in user_settings and '_main' not in settings.USER_EXPORTS: + if settings.WASM_BINDGEN: + # `main` stays a wasm export so it runs automatically on init, but `_main` + # is internal and is not surfaced. + pass + elif 'EXPORTED_FUNCTIONS' in user_settings and '_main' not in settings.USER_EXPORTS: # If `_main` was unexpectedly exported we assume it was added to # EXPORT_IF_DEFINED by `phase_linker_setup` in order that we can detect # it and report this warning. After reporting the warning we explicitly @@ -626,6 +640,13 @@ def finalize_wasm(infile, outfile, js_syms): else: unexpected_exports.append('_main') + # Keep wasm-bindgen's internal glue exports (the raw symbols its generated + # bindings reach by name, including `_main`) off the public surface. Genuine + # EMSCRIPTEN_KEEPALIVE exports are not in this set and remain. + if settings.WASM_BINDGEN: + unexpected_exports = [e for e in unexpected_exports + if e not in building.wasm_bindgen_internal_exports] + building.user_requested_exports.update(unexpected_exports) settings.EXPORTED_FUNCTIONS.extend(unexpected_exports) diff --git a/tools/link.py b/tools/link.py index 5750443ebb33f..f56697a2508d5 100644 --- a/tools/link.py +++ b/tools/link.py @@ -1912,9 +1912,38 @@ def phase_post_link(options, in_wasm, wasm_target, target, js_syms, base_metadat settings.TARGET_JS_NAME = os.path.basename(js_target) + # -sWASM_BINDGEN=auto runs wasm-bindgen only when the linked wasm carries + # wasm-bindgen's marker section (cargo/rustc linking via emcc); otherwise it is + # a no-op. This lets rustc opt in without emcc guessing for ordinary builds. + if settings.WASM_BINDGEN == 'auto': + settings.WASM_BINDGEN = 1 if building.is_wasm_bindgen_module(in_wasm) else 0 + if settings.WASM_BINDGEN: - bindgen_jslib = building.run_wasm_bindgen(in_wasm) + bindgen_jslib, removed_exports, added_exports, extern_pre_js, snippets_dir = building.run_wasm_bindgen(in_wasm) settings.JS_LIBRARIES.append(bindgen_jslib) + # The exports the wasm-bindgen expansion reaches by name (the supplied + # EXPORTED_FUNCTIONS - method shims, the __wbindgen_* runtime, the marker, + # main - plus anything its expansion added) are internal, not a user-facing + # API: wasm-bindgen self-registers the real API via its JS library. Capture + # that set so it can be kept off every export layer - the ESM wrapper + # (user_requested_exports), the factory Module attachment (EXPORTED_FUNCTIONS, + # via should_export), and the keepalive pass in finalize_wasm. A genuine + # EMSCRIPTEN_KEEPALIVE C/C++ export is not in this set and is still surfaced. + removed = {shared.asmjs_mangle(e) for e in removed_exports} + building.wasm_bindgen_internal_exports = ( + set(settings.USER_EXPORTS) | {shared.asmjs_mangle(e) for e in added_exports}) + # Also drop the placeholder symbols wasm-bindgen consumed (__wbindgen_describe*, + # __externref_*, ...) so they aren't reported as undefined exports. + drop = removed | building.wasm_bindgen_internal_exports + settings.EXPORTED_FUNCTIONS = [e for e in settings.EXPORTED_FUNCTIONS if e not in drop] + settings.USER_EXPORTS = [] + building.user_requested_exports.clear() + # Imported JS: emit wasm-bindgen's `import` statements as extern-pre-js and + # place the snippet files alongside the output so relative imports resolve. + if extern_pre_js: + options.extern_pre_js.append(extern_pre_js) + if snippets_dir: + shutil.copytree(snippets_dir, os.path.join(os.path.dirname(js_target), 'snippets'), dirs_exist_ok=True) metadata = phase_emscript(in_wasm, wasm_target, js_syms, base_metadata) @@ -2149,6 +2178,9 @@ def node_detection_code(): def create_esm_wrapper(wrapper_file, support_target, wasm_target): js_exports = building.user_requested_exports.union(settings.EXPORTED_RUNTIME_METHODS) + # JS library symbols the support module exports at declaration (e.g. + # wasm-bindgen's); the wrapper must forward these too. + js_exports |= building.exported_js_library_symbols js_exports = ', '.join(sorted(js_exports)) wrapper = [] diff --git a/tools/settings.py b/tools/settings.py index 8b75d5cd5bb09..06aa86e147864 100644 --- a/tools/settings.py +++ b/tools/settings.py @@ -411,7 +411,7 @@ def __setattr__(self, name, value): def check_type(self, name, value): # These settings have a variable type so cannot be easily type checked. - if name in {'EXECUTABLE', 'SUPPORT_LONGJMP', 'PTHREAD_POOL_SIZE', 'SEPARATE_DWARF', 'LTO', 'MODULARIZE'}: + if name in {'EXECUTABLE', 'SUPPORT_LONGJMP', 'PTHREAD_POOL_SIZE', 'SEPARATE_DWARF', 'LTO', 'MODULARIZE', 'WASM_BINDGEN'}: return expected_type = self.types.get(name) if not expected_type: