diff --git a/pyproject.toml b/pyproject.toml index ba2a20533640a..72314ccd37a15 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ lint.ignore = [ "PLW0603", "PLW1510", "PLW2901", + "PLC0415", "UP030", # TODO "UP031", # TODO "UP032", # TODO diff --git a/src/settings_internal.js b/src/settings_internal.js index ae89b4b194730..d475180b40954 100644 --- a/src/settings_internal.js +++ b/src/settings_internal.js @@ -260,6 +260,11 @@ var ASYNCIFY_IMPORTS_EXCEPT_JS_LIBS = []; var WARN_DEPRECATED = true; +// Enable fast math optimizations in wasm-opt when -ffast-math is passed. +// This enables aggressive floating-point optimizations that may violate +// IEEE 754 semantics but can improve performance. +var FAST_MATH = 0; + // WebGL 2 provides new garbage-free entry points to call to WebGL. Use // those always when possible. // We currently set this to false for certain browser when large memory sizes diff --git a/test/other/test_fast_math.c b/test/other/test_fast_math.c new file mode 100644 index 0000000000000..787dd999b7d3c --- /dev/null +++ b/test/other/test_fast_math.c @@ -0,0 +1,47 @@ +#include +#include + +static double mandelbrot_iter(double cx, double cy, int max_iter) { + double x = 0.0, y = 0.0; + for (int i = 0; i < max_iter; i++) { + if (x*x + y*y > 4.0) return (double)i; + double tx = x*x - y*y + cx; + y = 2.0*x*y + cy; + x = tx; + } + return (double)max_iter; +} + +static double newton_raphson(double x, int iterations) { + for (int i = 0; i < iterations; i++) { + double fx = x*x*x - x - 1.0; + double fpx = 3.0*x*x - 1.0; + if (fabs(fpx) < 1e-10) break; + x = x - fx / fpx; + } + return x; +} + +int main() { + double result = 0.0; + + for (int i = 0; i < 100; i++) { + double x = (i - 50) * 0.02; + for (int j = 0; j < 100; j++) { + double y = (j - 50) * 0.02; + result += mandelbrot_iter(x, y, 50); + } + } + + for (int i = 0; i < 50; i++) { + result += newton_raphson(1.5 + i * 0.1, 20); + } + + for (int i = 0; i < 1000; i++) { + double angle = i * 0.01; + result += sin(angle) * cos(angle) + tan(angle); + } + + printf("Result: %f\n", result); + return 0; +} diff --git a/test/test_other.py b/test/test_other.py index 2089a7da5e79e..4e38e0a210d5f 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -15799,3 +15799,20 @@ def has_defined_function(file, func): self.assertIn('main.cpp', out) self.assertIn('foo.cpp', out) self.assertIn('/emsdk/emscripten/system/lib/libc/musl/src/string/strcmp.c', out) + + def test_binaryen_fast_math(self): + # Use a simple input; contents don't matter for -v flag inspection + err = self.run_process([EMCC, test_file('hello_world.c'), '-v', '-O2', '-ffast-math'], stderr=PIPE).stderr + self.assertContained('--fast-math', err) + + err_no_fast = self.run_process([EMCC, test_file('hello_world.c'), '-v', '-O2'], stderr=PIPE).stderr + self.assertNotContained('--fast-math', err_no_fast) + + def test_fast_math(self): + self.run_process([EMCC, test_file('other/test_fast_math.c'), '-O2', '-o', 'no_fast.wasm']) + no_fast_size = os.path.getsize('no_fast.wasm') + self.run_process([EMCC, test_file('other/test_fast_math.c'), '-O2', '-ffast-math', '-o', 'with_fast.wasm']) + with_fast_size = os.path.getsize('with_fast.wasm') + print(f'no_fast_size={no_fast_size} with_fast_size={with_fast_size}') + + self.assertLessEqual(with_fast_size, no_fast_size) diff --git a/tools/cmdline.py b/tools/cmdline.py index f263110faf3d7..e96da1d8e496a 100644 --- a/tools/cmdline.py +++ b/tools/cmdline.py @@ -294,9 +294,8 @@ def consume_arg_file(): settings.SHRINK_LEVEL = 0 settings.DEBUG_LEVEL = max(settings.DEBUG_LEVEL, 1) elif requested_level == 'fast': - # TODO(https://github.com/emscripten-core/emscripten/issues/21497): - # If we ever map `-ffast-math` to `wasm-opt --fast-math` then - # then we should enable that too here. + # -Ofast typically includes -ffast-math semantics + settings.FAST_MATH = 1 requested_level = 3 settings.SHRINK_LEVEL = 0 else: @@ -545,6 +544,8 @@ def consume_arg_file(): settings.WASM_EXCEPTIONS = 1 elif arg == '-fignore-exceptions': settings.DISABLE_EXCEPTION_CATCHING = 1 + elif arg == '-ffast-math': + settings.FAST_MATH = 1 elif check_arg('--default-obj-ext'): exit_with_error('--default-obj-ext is no longer supported by emcc') elif arg.startswith('-fsanitize=cfi'): diff --git a/tools/link.py b/tools/link.py index f06b05d650f83..7a24b252ee8dc 100644 --- a/tools/link.py +++ b/tools/link.py @@ -373,6 +373,8 @@ def get_binaryen_passes(): passes += ['--pass-arg=post-emscripten-side-module'] if optimizing: passes += [building.opt_level_to_str(settings.OPT_LEVEL, settings.SHRINK_LEVEL)] + if settings.FAST_MATH: + passes += ['--fast-math'] # when optimizing, use the fact that low memory is never used (1024 is a # hardcoded value in the binaryen pass). we also cannot do it when the stack # is first, as then the stack is in the low memory that should be unused.