Skip to content

Commit 54b171d

Browse files
authored
Merge branch 'main' into cw-audio-memory64
2 parents 21f7761 + db69527 commit 54b171d

26 files changed

+3222
-113
lines changed

ChangeLog.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,17 @@ to browse the changes between the tags.
1818

1919
See docs/process.md for more on how version tagging works.
2020

21-
4.0.1 (in development)
21+
4.0.2 (in development)
2222
----------------------
23+
- Added support for compiling AVX2 intrinsics, 256-bit wide intrinsic is emulated
24+
on top of 128-bit Wasm SIMD instruction set. (#23035). Pass `-msimd128 -mavx2`
25+
to enable targeting AVX2.
2326
- The system JS libraries in `src/` were renamed from `library_foo.js` to
2427
`lib/libfoo.js`. They are still included via the same `-lfoo.js` flag so
2528
this should not be a user-visible change. (#23348)
29+
30+
4.0.1 - 01/17/25
31+
----------------
2632
- The minimum version of node required to run emscripten was bumped from v16.20
2733
to v18. Version 4.0 was mistakenly shipped with a change that required v20,
2834
but that was reverted. (#23410)

emcc.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@
7676
'fetchSettings'
7777
]
7878

79-
SIMD_INTEL_FEATURE_TOWER = ['-msse', '-msse2', '-msse3', '-mssse3', '-msse4.1', '-msse4.2', '-msse4', '-mavx']
79+
SIMD_INTEL_FEATURE_TOWER = ['-msse', '-msse2', '-msse3', '-mssse3', '-msse4.1', '-msse4.2', '-msse4', '-mavx', '-mavx2']
8080
SIMD_NEON_FLAGS = ['-mfpu=neon']
8181
LINK_ONLY_FLAGS = {
8282
'--bind', '--closure', '--cpuprofiler', '--embed-file',
@@ -474,6 +474,9 @@ def array_contains_any_of(hay, needles):
474474
if array_contains_any_of(user_args, SIMD_INTEL_FEATURE_TOWER[7:]):
475475
cflags += ['-D__AVX__=1']
476476

477+
if array_contains_any_of(user_args, SIMD_INTEL_FEATURE_TOWER[8:]):
478+
cflags += ['-D__AVX2__=1']
479+
477480
if array_contains_any_of(user_args, SIMD_NEON_FLAGS):
478481
cflags += ['-D__ARM_NEON__=1']
479482

@@ -738,11 +741,11 @@ def phase_parse_arguments(state):
738741

739742

740743
def separate_linker_flags(state, newargs):
741-
"""Process argument list separating out intput files, compiler flags
744+
"""Process argument list separating out input files, compiler flags
742745
and linker flags.
743746
744747
- Linker flags are stored in state.link_flags
745-
- Input files and compiler-only flags are return as two separate lists.
748+
- Input files and compiler-only flags are returned as two separate lists.
746749
747750
Both linker flags and input files are stored as pairs of (i, entry) where
748751
`i` is the orginal index in the command line arguments. This allow the two

emscripten-version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
4.0.1-git
1+
4.0.2-git

eslint.config.mjs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ export default [{
5454
'src/settings_internal.js',
5555
'src/growableHeap.js',
5656
'src/emrun_prejs.js',
57-
'src/arrayUtils.js',
5857
'src/deterministic.js',
5958
'src/base64Decode.js',
6059
'src/proxyWorker.js',

site/source/docs/porting/connecting_cpp_and_javascript/embind.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,7 @@ Class properties can be defined several ways as seen below.
860860
class_<Person>("Person")
861861
.constructor<>()
862862
// Bind directly to a class member with automatically generated getters/setters using a
863-
// reference return policy so the object does not need to be deleted JS.
863+
// reference return policy so the object does not need to be deleted from JS.
864864
.property("location", &Person::location, return_value_policy::reference())
865865
// Same as above, but this will return a copy and the object must be deleted or it will
866866
// leak!

site/source/docs/porting/simd.rst

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Emscripten supports the `WebAssembly SIMD <https://github.com/webassembly/simd/>
1212
1. Enable LLVM/Clang SIMD autovectorizer to automatically target WebAssembly SIMD, without requiring changes to C/C++ source code.
1313
2. Write SIMD code using the GCC/Clang SIMD Vector Extensions (``__attribute__((vector_size(16)))``)
1414
3. Write SIMD code using the WebAssembly SIMD intrinsics (``#include <wasm_simd128.h>``)
15-
4. Compile existing SIMD code that uses the x86 SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2 or AVX intrinsics (``#include <*mmintrin.h>``)
15+
4. Compile existing SIMD code that uses the x86 SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX or AVX2 intrinsics (``#include <*mmintrin.h>``)
1616
5. Compile existing SIMD code that uses the ARM NEON intrinsics (``#include <arm_neon.h>``)
1717

1818
These techniques can be freely combined in a single program.
@@ -153,6 +153,7 @@ Emscripten supports compiling existing codebases that use x86 SSE instructions b
153153
* **SSE4.1**: pass ``-msse4.1`` and ``#include <smmintrin.h>``. Use ``#ifdef __SSE4_1__`` to gate code.
154154
* **SSE4.2**: pass ``-msse4.2`` and ``#include <nmmintrin.h>``. Use ``#ifdef __SSE4_2__`` to gate code.
155155
* **AVX**: pass ``-mavx`` and ``#include <immintrin.h>``. Use ``#ifdef __AVX__`` to gate code.
156+
* **AVX2**: pass ``-mavx2`` and ``#include <immintrin.h>``. Use ``#ifdef __AVX2__`` to gate code.
156157

157158
Currently only the SSE1, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, and AVX instruction sets are supported. Each of these instruction sets add on top of the previous ones, so e.g. when targeting SSE3, the instruction sets SSE1 and SSE2 are also available.
158159

@@ -1145,6 +1146,90 @@ The following table highlights the availability and expected performance of diff
11451146

11461147
Only the 128-bit wide instructions from AVX instruction set are listed. The 256-bit wide AVX instructions are emulated by two 128-bit wide instructions.
11471148

1149+
The following table highlights the availability and expected performance of different AVX2 intrinsics. Refer to `Intel Intrinsics Guide on AVX2 <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avxnewtechs=AVX2>`_.
1150+
1151+
.. list-table:: x86 AVX2 intrinsics available via #include <immintrin.h> and -mavx2
1152+
:widths: 20 30
1153+
:header-rows: 1
1154+
1155+
* - Intrinsic name
1156+
- WebAssembly SIMD support
1157+
* - _mm_broadcastss_ps
1158+
- 💡 emulated with a general shuffle
1159+
* - _mm_broadcastsd_pd
1160+
- 💡 emulated with a general shuffle
1161+
* - _mm_blend_epi32
1162+
- 💡 emulated with a general shuffle
1163+
* - _mm_broadcastb_epi8
1164+
- 💡 emulated with a general shuffle
1165+
* - _mm_broadcastw_epi16
1166+
- 💡 emulated with a general shuffle
1167+
* - _mm_broadcastd_epi32
1168+
- 💡 emulated with a general shuffle
1169+
* - _mm_broadcastq_epi64
1170+
- 💡 emulated with a general shuffle
1171+
* - _mm256_permutevar8x32_epi32
1172+
- ❌ scalarized
1173+
* - _mm256_permute4x64_pd
1174+
- 💡 emulated with two general shuffle
1175+
* - _mm256_permutevar8x32_ps
1176+
- ❌ scalarized
1177+
* - _mm256_permute4x64_epi64
1178+
- 💡 emulated with two general shuffle
1179+
* - _mm_maskload_epi32
1180+
- ❌ scalarized
1181+
* - _mm_maskload_epi64
1182+
- ❌ scalarized
1183+
* - _mm_maskstore_epi32
1184+
- ❌ scalarized
1185+
* - _mm_maskstore_epi64
1186+
- ❌ scalarized
1187+
* - _mm_sllv_epi32
1188+
- ❌ scalarized
1189+
* - _mm_sllv_epi64
1190+
- ❌ scalarized
1191+
* - _mm_srav_epi32
1192+
- ❌ scalarized
1193+
* - _mm_srlv_epi32
1194+
- ❌ scalarized
1195+
* - _mm_srlv_epi64
1196+
- ❌ scalarized
1197+
* - _mm_mask_i32gather_pd
1198+
- ❌ scalarized
1199+
* - _mm_mask_i64gather_pd
1200+
- ❌ scalarized
1201+
* - _mm_mask_i32gather_ps
1202+
- ❌ scalarized
1203+
* - _mm_mask_i64gather_ps
1204+
- ❌ scalarized
1205+
* - _mm_mask_i32gather_epi32
1206+
- ❌ scalarized
1207+
* - _mm_mask_i64gather_epi32
1208+
- ❌ scalarized
1209+
* - _mm_mask_i32gather_epi64
1210+
- ❌ scalarized
1211+
* - _mm_mask_i64gather_epi64
1212+
- ❌ scalarized
1213+
* - _mm_i32gather_pd
1214+
- ❌ scalarized
1215+
* - _mm_i64gather_pd
1216+
- ❌ scalarized
1217+
* - _mm_i32gather_ps
1218+
- ❌ scalarized
1219+
* - _mm_i64gather_ps
1220+
- ❌ scalarized
1221+
* - _mm_i32gather_epi32
1222+
- ❌ scalarized
1223+
* - _mm_i64gather_epi32
1224+
- ❌ scalarized
1225+
* - _mm_i32gather_epi64
1226+
- ❌ scalarized
1227+
* - _mm_i64gather_epi64
1228+
- ❌ scalarized
1229+
1230+
All the 128-bit wide instructions from AVX2 instruction set are listed.
1231+
Only a small part of the 256-bit AVX2 instruction set are listed, most of the
1232+
256-bit wide AVX2 instructions are emulated by two 128-bit wide instructions.
11481233

11491234
======================================================
11501235
Compiling SIMD code targeting ARM NEON instruction set

src/arrayUtils.js

Lines changed: 0 additions & 29 deletions
This file was deleted.

src/lib/libstrings.js

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
* SPDX-License-Identifier: MIT
55
*/
66

7-
#include "arrayUtils.js"
8-
97
addToLibrary({
108
// TextDecoder constructor defaults to UTF-8
119
#if TEXTDECODER == 2
@@ -256,8 +254,28 @@ addToLibrary({
256254
257255
$intArrayFromString__docs: '/** @type {function(string, boolean=, number=)} */',
258256
$intArrayFromString__deps: ['$lengthBytesUTF8', '$stringToUTF8Array'],
259-
$intArrayFromString: intArrayFromString,
260-
$intArrayToString: intArrayToString,
257+
$intArrayFromString: (stringy, dontAddNull, length) => {
258+
var len = length > 0 ? length : lengthBytesUTF8(stringy)+1;
259+
var u8array = new Array(len);
260+
var numBytesWritten = stringToUTF8Array(stringy, u8array, 0, u8array.length);
261+
if (dontAddNull) u8array.length = numBytesWritten;
262+
return u8array;
263+
},
264+
265+
$intArrayToString: (array) => {
266+
var ret = [];
267+
for (var i = 0; i < array.length; i++) {
268+
var chr = array[i];
269+
if (chr > 0xFF) {
270+
#if ASSERTIONS
271+
assert(false, `Character code ${chr} (${String.fromCharCode(chr)}) at offset ${i} not in 0x00-0xFF.`);
272+
#endif
273+
chr &= 0xFF;
274+
}
275+
ret.push(String.fromCharCode(chr));
276+
}
277+
return ret.join('');
278+
},
261279

262280
// Given a pointer 'ptr' to a null-terminated ASCII-encoded string in the
263281
// emscripten HEAP, returns a copy of that string as a Javascript String

src/preamble.js

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -225,9 +225,6 @@ function initRuntime() {
225225
function preMain() {
226226
#if STACK_OVERFLOW_CHECK
227227
checkStackCookie();
228-
#endif
229-
#if PTHREADS
230-
if (ENVIRONMENT_IS_PTHREAD) return; // PThreads reuse the runtime from the main thread.
231228
#endif
232229
<<< ATMAINS >>>
233230
callRuntimeCallbacks(__ATMAIN__);
@@ -630,11 +627,7 @@ function getBinarySync(file) {
630627
async function getWasmBinary(binaryFile) {
631628
#if !SINGLE_FILE
632629
// If we don't have the binary yet, load it asynchronously using readAsync.
633-
if (!wasmBinary
634-
#if SUPPORT_BASE64_EMBEDDING
635-
|| isDataURI(binaryFile)
636-
#endif
637-
) {
630+
if (!wasmBinary) {
638631
// Fetch the binary using readAsync
639632
try {
640633
var response = await readAsync(binaryFile);

src/shell.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ if (ENVIRONMENT_IS_NODE) {
122122
#endif // ENVIRONMENT_MAY_BE_NODE
123123

124124
#if WASM_WORKERS
125-
var ENVIRONMENT_IS_WASM_WORKER = Module['$ww'];
125+
var ENVIRONMENT_IS_WASM_WORKER = !!Module['$ww'];
126126
#endif
127127

128128
// --pre-jses are emitted after the Module integration code, so that they can

0 commit comments

Comments
 (0)