@@ -380,7 +380,7 @@ namespace xsimd
380380 template <class A >
381381 inline batch_bool<float , A> eq (batch_bool<float , A> const & self, batch_bool<float , A> const & other, requires_arch<wasm>) noexcept
382382 {
383- return wasm_f32x4_eq (self, other);
383+ return wasm_i32x4_eq (self, other);
384384 }
385385 template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
386386 inline batch_bool<T, A> eq (batch<T, A> const & self, batch<T, A> const & other, requires_arch<wasm>) noexcept
@@ -440,7 +440,7 @@ namespace xsimd
440440 template <class A >
441441 inline batch_bool<double , A> eq (batch_bool<double , A> const & self, batch_bool<double , A> const & other, requires_arch<wasm>) noexcept
442442 {
443- return wasm_f64x2_eq (self, other);
443+ return wasm_i64x2_eq (self, other);
444444 }
445445
446446 // fast_cast
@@ -579,6 +579,30 @@ namespace xsimd
579579 0xFFFFFF00 ,
580580 0xFFFFFFFF ,
581581 };
582+ alignas (A::alignment ()) static const uint32_t lut16[][4 ] = {
583+ { 0x00000000 , 0x00000000 , 0x00000000 , 0x00000000 },
584+ { 0xFFFFFFFF , 0x00000000 , 0x00000000 , 0x00000000 },
585+ { 0x00000000 , 0xFFFFFFFF , 0x00000000 , 0x00000000 },
586+ { 0xFFFFFFFF , 0xFFFFFFFF , 0x00000000 , 0x00000000 },
587+ { 0x00000000 , 0x00000000 , 0xFFFFFFFF , 0x00000000 },
588+ { 0xFFFFFFFF , 0x00000000 , 0xFFFFFFFF , 0x00000000 },
589+ { 0x00000000 , 0xFFFFFFFF , 0xFFFFFFFF , 0x00000000 },
590+ { 0xFFFFFFFF , 0xFFFFFFFF , 0xFFFFFFFF , 0x00000000 },
591+ { 0x00000000 , 0x00000000 , 0x00000000 , 0xFFFFFFFF },
592+ { 0xFFFFFFFF , 0x00000000 , 0x00000000 , 0xFFFFFFFF },
593+ { 0x00000000 , 0xFFFFFFFF , 0x00000000 , 0xFFFFFFFF },
594+ { 0xFFFFFFFF , 0xFFFFFFFF , 0x00000000 , 0xFFFFFFFF },
595+ { 0x00000000 , 0x00000000 , 0xFFFFFFFF , 0xFFFFFFFF },
596+ { 0xFFFFFFFF , 0x00000000 , 0xFFFFFFFF , 0xFFFFFFFF },
597+ { 0x00000000 , 0xFFFFFFFF , 0xFFFFFFFF , 0xFFFFFFFF },
598+ { 0xFFFFFFFF , 0xFFFFFFFF , 0xFFFFFFFF , 0xFFFFFFFF },
599+ };
600+ alignas (A::alignment ()) static const uint64_t lut8[][4 ] = {
601+ { 0x0000000000000000ul , 0x0000000000000000ul },
602+ { 0xFFFFFFFFFFFFFFFFul , 0x0000000000000000ul },
603+ { 0x0000000000000000ul , 0xFFFFFFFFFFFFFFFFul },
604+ { 0xFFFFFFFFFFFFFFFFul , 0xFFFFFFFFFFFFFFFFul },
605+ };
582606 XSIMD_IF_CONSTEXPR (sizeof (T) == 1 )
583607 {
584608 assert (!(mask & ~0xFFFF ) && " inbound mask" );
@@ -587,15 +611,17 @@ namespace xsimd
587611 else XSIMD_IF_CONSTEXPR (sizeof (T) == 2 )
588612 {
589613 assert (!(mask & ~0xFF ) && " inbound mask" );
590- return wasm_i64x2_make (lut64[mask >> 4 ], lut64[mask & 0xF ]);
614+ return wasm_i64x2_make (lut64[mask & 0xF ], lut64[mask >> 4 ]);
591615 }
592616 else XSIMD_IF_CONSTEXPR (sizeof (T) == 4 )
593617 {
594- return batch_bool_cast<T>(from_mask (batch_bool<float , A> {}, mask, wasm {}));
618+ assert (!(mask & ~0xFul ) && " inbound mask" );
619+ return wasm_v128_load ((const v128_t *)lut16[mask]);
595620 }
596621 else XSIMD_IF_CONSTEXPR (sizeof (T) == 8 )
597622 {
598- return batch_bool_cast<T>(from_mask (batch_bool<double , A> {}, mask, wasm {}));
623+ assert (!(mask & ~0x3ul ) && " inbound mask" );
624+ return wasm_v128_load ((const v128_t *)lut8[mask]);
599625 }
600626 }
601627
@@ -1114,44 +1140,6 @@ namespace xsimd
11141140 return wasm_f64x2_extract_lane (tmp2, 0 );
11151141 }
11161142
1117- // reduce_max
1118- template <class A , class T , class _ = typename std::enable_if<(sizeof (T) <= 2 ), void >::type>
1119- inline T reduce_max (batch<T, A> const & self, requires_arch<wasm>) noexcept
1120- {
1121- batch<T, A> step0 = wasm_i32x4_shuffle (self, wasm_i32x4_splat (0 ), 2 , 3 , 0 , 0 );
1122- batch<T, A> acc0 = max (self, step0);
1123-
1124- batch<T, A> step1 = wasm_i32x4_shuffle (self, wasm_i32x4_splat (0 ), 1 , 0 , 0 , 0 );
1125- batch<T, A> acc1 = max (acc0, step1);
1126-
1127- batch<T, A> step2 = wasm_i16x8_shuffle (acc1, wasm_i16x8_splat (0 ), 1 , 0 , 0 , 0 , 4 , 5 , 6 , 7 );
1128- batch<T, A> acc2 = max (acc1, step2);
1129- if (sizeof (T) == 2 )
1130- return acc2.get (0 );
1131- batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t >(acc2) >> 8 );
1132- batch<T, A> acc3 = max (acc2, step3);
1133- return acc3.get (0 );
1134- }
1135-
1136- // reduce_min
1137- template <class A , class T , class _ = typename std::enable_if<(sizeof (T) <= 2 ), void >::type>
1138- inline T reduce_min (batch<T, A> const & self, requires_arch<wasm>) noexcept
1139- {
1140- batch<T, A> step0 = wasm_i32x4_shuffle (self, wasm_i32x4_splat (0 ), 2 , 3 , 0 , 0 );
1141- batch<T, A> acc0 = min (self, step0);
1142-
1143- batch<T, A> step1 = wasm_i32x4_shuffle (self, wasm_i32x4_splat (0 ), 1 , 0 , 0 , 0 );
1144- batch<T, A> acc1 = min (acc0, step1);
1145-
1146- batch<T, A> step2 = wasm_i16x8_shuffle (acc1, wasm_i16x8_splat (0 ), 1 , 0 , 0 , 0 , 4 , 5 , 6 , 7 );
1147- batch<T, A> acc2 = min (acc1, step2);
1148- if (sizeof (T) == 2 )
1149- return acc2.get (0 );
1150- batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t >(acc2) >> 8 );
1151- batch<T, A> acc3 = min (acc2, step3);
1152- return acc3.get (0 );
1153- }
1154-
11551143 // rsqrt
11561144 template <class A >
11571145 inline batch<float , A> rsqrt (batch<float , A> const & self, requires_arch<wasm>) noexcept
@@ -1259,29 +1247,15 @@ namespace xsimd
12591247
12601248 // shuffle
12611249 template <class A , class ITy , ITy I0, ITy I1, ITy I2, ITy I3>
1262- inline batch<float , A> shuffle (batch<float , A> const & x, batch<float , A> const & y, batch_constant<batch<ITy, A>, I0, I1, I2, I3> mask , requires_arch<wasm>) noexcept
1250+ inline batch<float , A> shuffle (batch<float , A> const & x, batch<float , A> const & y, batch_constant<batch<ITy, A>, I0, I1, I2, I3>, requires_arch<wasm>) noexcept
12631251 {
1264- // shuffle within lane
1265- if (I0 < 4 && I1 < 4 && I2 >= 4 && I3 >= 4 )
1266- return wasm_i32x4_shuffle (x, y, I0, I1, I2, I3);
1267-
1268- // shuffle within opposite lane
1269- if (I0 >= 4 && I1 >= 4 && I2 < 4 && I3 < 4 )
1270- return wasm_i32x4_shuffle (y, x, I0, I1, I2, I3);
1271- return shuffle (x, y, mask, generic {});
1252+ return wasm_i32x4_shuffle (x, y, I0, I1, I2, I3);
12721253 }
12731254
12741255 template <class A , class ITy , ITy I0, ITy I1>
1275- inline batch<double , A> shuffle (batch<double , A> const & x, batch<double , A> const & y, batch_constant<batch<ITy, A>, I0, I1> mask , requires_arch<wasm>) noexcept
1256+ inline batch<double , A> shuffle (batch<double , A> const & x, batch<double , A> const & y, batch_constant<batch<ITy, A>, I0, I1>, requires_arch<wasm>) noexcept
12761257 {
1277- // shuffle within lane
1278- if (I0 < 2 && I1 >= 2 )
1279- return wasm_i64x2_shuffle (x, y, I0, I1);
1280-
1281- // shuffle within opposite lane
1282- if (I0 >= 2 && I1 < 2 )
1283- return wasm_i64x2_shuffle (y, x, I0, I1);
1284- return shuffle (x, y, mask, generic {});
1258+ return wasm_i64x2_shuffle (x, y, I0, I1);
12851259 }
12861260
12871261 // set
@@ -1500,7 +1474,6 @@ namespace xsimd
15001474 }
15011475
15021476 // swizzle
1503-
15041477 template <class A , uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
15051478 inline batch<float , A> swizzle (batch<float , A> const & self, batch_constant<batch<uint32_t , A>, V0, V1, V2, V3>, requires_arch<wasm>) noexcept
15061479 {
@@ -1516,7 +1489,7 @@ namespace xsimd
15161489 template <class A , uint64_t V0, uint64_t V1>
15171490 inline batch<uint64_t , A> swizzle (batch<uint64_t , A> const & self, batch_constant<batch<uint64_t , A>, V0, V1>, requires_arch<wasm>) noexcept
15181491 {
1519- return wasm_i32x4_shuffle (self, wasm_i32x4_splat ( 0 ), 2 * V0, 2 * V0 + 1 , 2 * V1, 2 * V1 + 1 );
1492+ return wasm_i64x2_shuffle (self, self, V0, V1 );
15201493 }
15211494
15221495 template <class A , uint64_t V0, uint64_t V1>
@@ -1528,7 +1501,7 @@ namespace xsimd
15281501 template <class A , uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
15291502 inline batch<uint32_t , A> swizzle (batch<uint32_t , A> const & self, batch_constant<batch<uint32_t , A>, V0, V1, V2, V3>, requires_arch<wasm>) noexcept
15301503 {
1531- return wasm_i32x4_shuffle (self, wasm_i32x4_splat ( 0 ) , V0, V1, V2, V3);
1504+ return wasm_i32x4_shuffle (self, self , V0, V1, V2, V3);
15321505 }
15331506
15341507 template <class A , uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
@@ -1537,6 +1510,32 @@ namespace xsimd
15371510 return bitwise_cast<int32_t >(swizzle (bitwise_cast<uint32_t >(self), mask, wasm {}));
15381511 }
15391512
1513+ template <class A , uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>
1514+ inline batch<uint16_t , A> swizzle (batch<uint16_t , A> const & self, batch_constant<batch<uint16_t , A>, V0, V1, V2, V3, V4, V5, V6, V7>, requires_arch<wasm>) noexcept
1515+ {
1516+ return wasm_i16x8_shuffle (self, self, V0, V1, V2, V3, V4, V5, V6, V7);
1517+ }
1518+
1519+ template <class A , uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>
1520+ inline batch<int16_t , A> swizzle (batch<int16_t , A> const & self, batch_constant<batch<uint16_t , A>, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<wasm>) noexcept
1521+ {
1522+ return bitwise_cast<int16_t >(swizzle (bitwise_cast<uint16_t >(self), mask, wasm {}));
1523+ }
1524+
1525+ template <class A , uint8_t V0, uint8_t V1, uint8_t V2, uint8_t V3, uint8_t V4, uint8_t V5, uint8_t V6, uint8_t V7,
1526+ uint8_t V8, uint8_t V9, uint8_t V10, uint8_t V11, uint8_t V12, uint8_t V13, uint8_t V14, uint8_t V15>
1527+ inline batch<uint8_t , A> swizzle (batch<uint8_t , A> const & self, batch_constant<batch<uint8_t , A>, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15>, requires_arch<wasm>) noexcept
1528+ {
1529+ return wasm_i8x16_shuffle (self, self, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15);
1530+ }
1531+
1532+ template <class A , uint8_t V0, uint8_t V1, uint8_t V2, uint8_t V3, uint8_t V4, uint8_t V5, uint8_t V6, uint8_t V7,
1533+ uint8_t V8, uint8_t V9, uint8_t V10, uint8_t V11, uint8_t V12, uint8_t V13, uint8_t V14, uint8_t V15>
1534+ inline batch<int8_t , A> swizzle (batch<int8_t , A> const & self, batch_constant<batch<uint8_t , A>, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15> mask, requires_arch<wasm>) noexcept
1535+ {
1536+ return bitwise_cast<int8_t >(swizzle (bitwise_cast<uint8_t >(self), mask, wasm {}));
1537+ }
1538+
15401539 // trunc
15411540 template <class A >
15421541 inline batch<float , A> trunc (batch<float , A> const & self, requires_arch<wasm>) noexcept
@@ -1625,4 +1624,4 @@ namespace xsimd
16251624 }
16261625}
16271626
1628- #endif
1627+ #endif
0 commit comments