@@ -267,6 +267,8 @@ simde_vld1q_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
267267 r_ .v128 = wasm_v128_load (ptr );
268268 #elif defined(SIMDE_RISCV_V_NATIVE ) && defined(SIMDE_ARCH_RISCV_ZVFH )
269269 r_ .sv128 = __riscv_vle16_v_f16m1 ((_Float16 * )ptr , 8 );
270+ #elif defined(SIMDE_X86_AVX512FP16_NATIVE ) && defined(SIMDE_X86_AVX512VL_NATIVE )
271+ r_ .m128h = _mm_loadu_ph (SIMDE_ALIGN_CAST (__m128h const * , ptr ));
270272 #else
271273 simde_memcpy (& r_ , ptr , sizeof (r_ ));
272274 #endif
@@ -289,6 +291,8 @@ simde_vld1q_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(4)]) {
289291 r_ .v128 = wasm_v128_load (ptr );
290292 #elif defined(SIMDE_RISCV_V_NATIVE )
291293 r_ .sv128 = __riscv_vle32_v_f32m1 (ptr , 4 );
294+ #elif defined(SIMDE_X86_SSE_NATIVE )
295+ r_ .m128 = _mm_loadu_ps (ptr );
292296 #else
293297 simde_memcpy (& r_ , ptr , sizeof (r_ ));
294298 #endif
@@ -311,6 +315,8 @@ simde_vld1q_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(2)]) {
311315 r_ .v128 = wasm_v128_load (ptr );
312316 #elif defined(SIMDE_RISCV_V_NATIVE )
313317 r_ .sv128 = __riscv_vle64_v_f64m1 (ptr , 2 );
318+ #elif defined(SIMDE_X86_SSE2_NATIVE )
319+ r_ .m128d = _mm_loadu_pd (ptr );
314320 #else
315321 simde_memcpy (& r_ , ptr , sizeof (r_ ));
316322 #endif
@@ -333,6 +339,10 @@ simde_vld1q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
333339 r_ .v128 = wasm_v128_load (ptr );
334340 #elif defined(SIMDE_RISCV_V_NATIVE )
335341 r_ .sv128 = __riscv_vle8_v_i8m1 (ptr , 16 );
342+ #elif defined(SIMDE_X86_SSE3_NATIVE )
343+ r_ .m128i = _mm_lddqu_si128 (SIMDE_ALIGN_CAST (__m128i const * , ptr ));
344+ #elif defined(SIMDE_X86_SSE2_NATIVE )
345+ r_ .m128i = _mm_loadu_si128 (SIMDE_ALIGN_CAST (__m128i const * , ptr ));
336346 #else
337347 simde_memcpy (& r_ , ptr , sizeof (r_ ));
338348 #endif
@@ -355,6 +365,10 @@ simde_vld1q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
355365 r_ .v128 = wasm_v128_load (ptr );
356366 #elif defined(SIMDE_RISCV_V_NATIVE )
357367 r_ .sv128 = __riscv_vle16_v_i16m1 (ptr , 8 );
368+ #elif defined(SIMDE_X86_SSE3_NATIVE )
369+ r_ .m128i = _mm_lddqu_si128 (SIMDE_ALIGN_CAST (__m128i const * , ptr ));
370+ #elif defined(SIMDE_X86_SSE2_NATIVE )
371+ r_ .m128i = _mm_loadu_si128 (SIMDE_ALIGN_CAST (__m128i const * , ptr ));
358372 #else
359373 simde_memcpy (& r_ , ptr , sizeof (r_ ));
360374 #endif
@@ -377,6 +391,10 @@ simde_vld1q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
377391 r_ .v128 = wasm_v128_load (ptr );
378392 #elif defined(SIMDE_RISCV_V_NATIVE )
379393 r_ .sv128 = __riscv_vle32_v_i32m1 (ptr , 4 );
394+ #elif defined(SIMDE_X86_SSE3_NATIVE )
395+ r_ .m128i = _mm_lddqu_si128 (SIMDE_ALIGN_CAST (__m128i const * , ptr ));
396+ #elif defined(SIMDE_X86_SSE2_NATIVE )
397+ r_ .m128i = _mm_loadu_si128 (SIMDE_ALIGN_CAST (__m128i const * , ptr ));
380398 #else
381399 simde_memcpy (& r_ , ptr , sizeof (r_ ));
382400 #endif
@@ -399,6 +417,10 @@ simde_vld1q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
399417 r_ .v128 = wasm_v128_load (ptr );
400418 #elif defined(SIMDE_RISCV_V_NATIVE )
401419 r_ .sv128 = __riscv_vle64_v_i64m1 (ptr , 2 );
420+ #elif defined(SIMDE_X86_SSE3_NATIVE )
421+ r_ .m128i = _mm_lddqu_si128 (SIMDE_ALIGN_CAST (__m128i const * , ptr ));
422+ #elif defined(SIMDE_X86_SSE2_NATIVE )
423+ r_ .m128i = _mm_loadu_si128 (SIMDE_ALIGN_CAST (__m128i const * , ptr ));
402424 #else
403425 simde_memcpy (& r_ , ptr , sizeof (r_ ));
404426 #endif
@@ -421,6 +443,10 @@ simde_vld1q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) {
421443 r_ .v128 = wasm_v128_load (ptr );
422444 #elif defined(SIMDE_RISCV_V_NATIVE )
423445 r_ .sv128 = __riscv_vle8_v_u8m1 (ptr , 16 );
446+ #elif defined(SIMDE_X86_SSE3_NATIVE )
447+ r_ .m128i = _mm_lddqu_si128 (SIMDE_ALIGN_CAST (__m128i const * , ptr ));
448+ #elif defined(SIMDE_X86_SSE2_NATIVE )
449+ r_ .m128i = _mm_loadu_si128 (SIMDE_ALIGN_CAST (__m128i const * , ptr ));
424450 #else
425451 simde_memcpy (& r_ , ptr , sizeof (r_ ));
426452 #endif
@@ -443,6 +469,10 @@ simde_vld1q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) {
443469 r_ .v128 = wasm_v128_load (ptr );
444470 #elif defined(SIMDE_RISCV_V_NATIVE )
445471 r_ .sv128 = __riscv_vle16_v_u16m1 (ptr , 8 );
472+ #elif defined(SIMDE_X86_SSE3_NATIVE )
473+ r_ .m128i = _mm_lddqu_si128 (SIMDE_ALIGN_CAST (__m128i const * , ptr ));
474+ #elif defined(SIMDE_X86_SSE2_NATIVE )
475+ r_ .m128i = _mm_loadu_si128 (SIMDE_ALIGN_CAST (__m128i const * , ptr ));
446476 #else
447477 simde_memcpy (& r_ , ptr , sizeof (r_ ));
448478 #endif
@@ -465,6 +495,10 @@ simde_vld1q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) {
465495 r_ .v128 = wasm_v128_load (ptr );
466496 #elif defined(SIMDE_RISCV_V_NATIVE )
467497 r_ .sv128 = __riscv_vle32_v_u32m1 (ptr , 4 );
498+ #elif defined(SIMDE_X86_SSE3_NATIVE )
499+ r_ .m128i = _mm_lddqu_si128 (SIMDE_ALIGN_CAST (__m128i const * , ptr ));
500+ #elif defined(SIMDE_X86_SSE2_NATIVE )
501+ r_ .m128i = _mm_loadu_si128 (SIMDE_ALIGN_CAST (__m128i const * , ptr ));
468502 #else
469503 simde_memcpy (& r_ , ptr , sizeof (r_ ));
470504 #endif
@@ -487,6 +521,10 @@ simde_vld1q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) {
487521 r_ .v128 = wasm_v128_load (ptr );
488522 #elif defined(SIMDE_RISCV_V_NATIVE )
489523 r_ .sv128 = __riscv_vle64_v_u64m1 (ptr , 2 );
524+ #elif defined(SIMDE_X86_SSE3_NATIVE )
525+ r_ .m128i = _mm_lddqu_si128 (SIMDE_ALIGN_CAST (__m128i const * , ptr ));
526+ #elif defined(SIMDE_X86_SSE2_NATIVE )
527+ r_ .m128i = _mm_loadu_si128 (SIMDE_ALIGN_CAST (__m128i const * , ptr ));
490528 #else
491529 simde_memcpy (& r_ , ptr , sizeof (r_ ));
492530 #endif
0 commit comments