Skip to content

Commit 058bf3b

Browse files
Fix wasm implementation
1 parent 2d7ef25 commit 058bf3b

File tree

2 files changed

+66
-69
lines changed

2 files changed

+66
-69
lines changed

.github/workflows/emscripten.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ concurrency:
66
jobs:
77
test:
88
runs-on: ubuntu-latest
9-
9+
1010
steps:
1111
- name: Checkout
1212
uses: actions/checkout@v3
@@ -19,11 +19,9 @@ jobs:
1919
python
2020
init-shell: bash
2121

22-
23-
2422
- name: Build script
2523
shell: bash -el {0}
2624
run: |
2725
echo "Build script for wasm"
2826
playwright install
29-
./test/test_wasm/test_wasm.sh
27+
./test/test_wasm/test_wasm.sh

include/xsimd/arch/xsimd_wasm.hpp

Lines changed: 64 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ namespace xsimd
380380
template <class A>
381381
inline batch_bool<float, A> eq(batch_bool<float, A> const& self, batch_bool<float, A> const& other, requires_arch<wasm>) noexcept
382382
{
383-
return wasm_f32x4_eq(self, other);
383+
return wasm_i32x4_eq(self, other);
384384
}
385385
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
386386
inline batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
@@ -440,7 +440,7 @@ namespace xsimd
440440
template <class A>
441441
inline batch_bool<double, A> eq(batch_bool<double, A> const& self, batch_bool<double, A> const& other, requires_arch<wasm>) noexcept
442442
{
443-
return wasm_f64x2_eq(self, other);
443+
return wasm_i64x2_eq(self, other);
444444
}
445445

446446
// fast_cast
@@ -579,6 +579,30 @@ namespace xsimd
579579
0xFFFFFF00,
580580
0xFFFFFFFF,
581581
};
582+
alignas(A::alignment()) static const uint32_t lut16[][4] = {
583+
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
584+
{ 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000 },
585+
{ 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 },
586+
{ 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 },
587+
{ 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 },
588+
{ 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 },
589+
{ 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 },
590+
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 },
591+
{ 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF },
592+
{ 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF },
593+
{ 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF },
594+
{ 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF },
595+
{ 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF },
596+
{ 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF },
597+
{ 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
598+
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
599+
};
600+
alignas(A::alignment()) static const uint64_t lut8[][4] = {
601+
{ 0x0000000000000000ul, 0x0000000000000000ul },
602+
{ 0xFFFFFFFFFFFFFFFFul, 0x0000000000000000ul },
603+
{ 0x0000000000000000ul, 0xFFFFFFFFFFFFFFFFul },
604+
{ 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul },
605+
};
582606
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
583607
{
584608
assert(!(mask & ~0xFFFF) && "inbound mask");
@@ -587,15 +611,17 @@ namespace xsimd
587611
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
588612
{
589613
assert(!(mask & ~0xFF) && "inbound mask");
590-
return wasm_i64x2_make(lut64[mask >> 4], lut64[mask & 0xF]);
614+
return wasm_i64x2_make(lut64[mask & 0xF], lut64[mask >> 4]);
591615
}
592616
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
593617
{
594-
return batch_bool_cast<T>(from_mask(batch_bool<float, A> {}, mask, wasm {}));
618+
assert(!(mask & ~0xFul) && "inbound mask");
619+
return wasm_v128_load((const v128_t*)lut16[mask]);
595620
}
596621
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
597622
{
598-
return batch_bool_cast<T>(from_mask(batch_bool<double, A> {}, mask, wasm {}));
623+
assert(!(mask & ~0x3ul) && "inbound mask");
624+
return wasm_v128_load((const v128_t*)lut8[mask]);
599625
}
600626
}
601627

@@ -1114,44 +1140,6 @@ namespace xsimd
11141140
return wasm_f64x2_extract_lane(tmp2, 0);
11151141
}
11161142

1117-
// reduce_max
1118-
template <class A, class T, class _ = typename std::enable_if<(sizeof(T) <= 2), void>::type>
1119-
inline T reduce_max(batch<T, A> const& self, requires_arch<wasm>) noexcept
1120-
{
1121-
batch<T, A> step0 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 2, 3, 0, 0);
1122-
batch<T, A> acc0 = max(self, step0);
1123-
1124-
batch<T, A> step1 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 1, 0, 0, 0);
1125-
batch<T, A> acc1 = max(acc0, step1);
1126-
1127-
batch<T, A> step2 = wasm_i16x8_shuffle(acc1, wasm_i16x8_splat(0), 1, 0, 0, 0, 4, 5, 6, 7);
1128-
batch<T, A> acc2 = max(acc1, step2);
1129-
if (sizeof(T) == 2)
1130-
return acc2.get(0);
1131-
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
1132-
batch<T, A> acc3 = max(acc2, step3);
1133-
return acc3.get(0);
1134-
}
1135-
1136-
// reduce_min
1137-
template <class A, class T, class _ = typename std::enable_if<(sizeof(T) <= 2), void>::type>
1138-
inline T reduce_min(batch<T, A> const& self, requires_arch<wasm>) noexcept
1139-
{
1140-
batch<T, A> step0 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 2, 3, 0, 0);
1141-
batch<T, A> acc0 = min(self, step0);
1142-
1143-
batch<T, A> step1 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 1, 0, 0, 0);
1144-
batch<T, A> acc1 = min(acc0, step1);
1145-
1146-
batch<T, A> step2 = wasm_i16x8_shuffle(acc1, wasm_i16x8_splat(0), 1, 0, 0, 0, 4, 5, 6, 7);
1147-
batch<T, A> acc2 = min(acc1, step2);
1148-
if (sizeof(T) == 2)
1149-
return acc2.get(0);
1150-
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
1151-
batch<T, A> acc3 = min(acc2, step3);
1152-
return acc3.get(0);
1153-
}
1154-
11551143
// rsqrt
11561144
template <class A>
11571145
inline batch<float, A> rsqrt(batch<float, A> const& self, requires_arch<wasm>) noexcept
@@ -1259,29 +1247,15 @@ namespace xsimd
12591247

12601248
// shuffle
12611249
template <class A, class ITy, ITy I0, ITy I1, ITy I2, ITy I3>
1262-
inline batch<float, A> shuffle(batch<float, A> const& x, batch<float, A> const& y, batch_constant<batch<ITy, A>, I0, I1, I2, I3> mask, requires_arch<wasm>) noexcept
1250+
inline batch<float, A> shuffle(batch<float, A> const& x, batch<float, A> const& y, batch_constant<batch<ITy, A>, I0, I1, I2, I3>, requires_arch<wasm>) noexcept
12631251
{
1264-
// shuffle within lane
1265-
if (I0 < 4 && I1 < 4 && I2 >= 4 && I3 >= 4)
1266-
return wasm_i32x4_shuffle(x, y, I0, I1, I2, I3);
1267-
1268-
// shuffle within opposite lane
1269-
if (I0 >= 4 && I1 >= 4 && I2 < 4 && I3 < 4)
1270-
return wasm_i32x4_shuffle(y, x, I0, I1, I2, I3);
1271-
return shuffle(x, y, mask, generic {});
1252+
return wasm_i32x4_shuffle(x, y, I0, I1, I2, I3);
12721253
}
12731254

12741255
template <class A, class ITy, ITy I0, ITy I1>
1275-
inline batch<double, A> shuffle(batch<double, A> const& x, batch<double, A> const& y, batch_constant<batch<ITy, A>, I0, I1> mask, requires_arch<wasm>) noexcept
1256+
inline batch<double, A> shuffle(batch<double, A> const& x, batch<double, A> const& y, batch_constant<batch<ITy, A>, I0, I1>, requires_arch<wasm>) noexcept
12761257
{
1277-
// shuffle within lane
1278-
if (I0 < 2 && I1 >= 2)
1279-
return wasm_i64x2_shuffle(x, y, I0, I1);
1280-
1281-
// shuffle within opposite lane
1282-
if (I0 >= 2 && I1 < 2)
1283-
return wasm_i64x2_shuffle(y, x, I0, I1);
1284-
return shuffle(x, y, mask, generic {});
1258+
return wasm_i64x2_shuffle(x, y, I0, I1);
12851259
}
12861260

12871261
// set
@@ -1500,7 +1474,6 @@ namespace xsimd
15001474
}
15011475

15021476
// swizzle
1503-
15041477
template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
15051478
inline batch<float, A> swizzle(batch<float, A> const& self, batch_constant<batch<uint32_t, A>, V0, V1, V2, V3>, requires_arch<wasm>) noexcept
15061479
{
@@ -1516,7 +1489,7 @@ namespace xsimd
15161489
template <class A, uint64_t V0, uint64_t V1>
15171490
inline batch<uint64_t, A> swizzle(batch<uint64_t, A> const& self, batch_constant<batch<uint64_t, A>, V0, V1>, requires_arch<wasm>) noexcept
15181491
{
1519-
return wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 2 * V0, 2 * V0 + 1, 2 * V1, 2 * V1 + 1);
1492+
return wasm_i64x2_shuffle(self, self, V0, V1);
15201493
}
15211494

15221495
template <class A, uint64_t V0, uint64_t V1>
@@ -1528,7 +1501,7 @@ namespace xsimd
15281501
template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
15291502
inline batch<uint32_t, A> swizzle(batch<uint32_t, A> const& self, batch_constant<batch<uint32_t, A>, V0, V1, V2, V3>, requires_arch<wasm>) noexcept
15301503
{
1531-
return wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), V0, V1, V2, V3);
1504+
return wasm_i32x4_shuffle(self, self, V0, V1, V2, V3);
15321505
}
15331506

15341507
template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
@@ -1537,6 +1510,32 @@ namespace xsimd
15371510
return bitwise_cast<int32_t>(swizzle(bitwise_cast<uint32_t>(self), mask, wasm {}));
15381511
}
15391512

1513+
template <class A, uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>
1514+
inline batch<uint16_t, A> swizzle(batch<uint16_t, A> const& self, batch_constant<batch<uint16_t, A>, V0, V1, V2, V3, V4, V5, V6, V7>, requires_arch<wasm>) noexcept
1515+
{
1516+
return wasm_i16x8_shuffle(self, self, V0, V1, V2, V3, V4, V5, V6, V7);
1517+
}
1518+
1519+
template <class A, uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>
1520+
inline batch<int16_t, A> swizzle(batch<int16_t, A> const& self, batch_constant<batch<uint16_t, A>, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<wasm>) noexcept
1521+
{
1522+
return bitwise_cast<int16_t>(swizzle(bitwise_cast<uint16_t>(self), mask, wasm {}));
1523+
}
1524+
1525+
template <class A, uint8_t V0, uint8_t V1, uint8_t V2, uint8_t V3, uint8_t V4, uint8_t V5, uint8_t V6, uint8_t V7,
1526+
uint8_t V8, uint8_t V9, uint8_t V10, uint8_t V11, uint8_t V12, uint8_t V13, uint8_t V14, uint8_t V15>
1527+
inline batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch_constant<batch<uint8_t, A>, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15>, requires_arch<wasm>) noexcept
1528+
{
1529+
return wasm_i8x16_shuffle(self, self, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15);
1530+
}
1531+
1532+
template <class A, uint8_t V0, uint8_t V1, uint8_t V2, uint8_t V3, uint8_t V4, uint8_t V5, uint8_t V6, uint8_t V7,
1533+
uint8_t V8, uint8_t V9, uint8_t V10, uint8_t V11, uint8_t V12, uint8_t V13, uint8_t V14, uint8_t V15>
1534+
inline batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch_constant<batch<uint8_t, A>, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15> mask, requires_arch<wasm>) noexcept
1535+
{
1536+
return bitwise_cast<int8_t>(swizzle(bitwise_cast<uint8_t>(self), mask, wasm {}));
1537+
}
1538+
15401539
// trunc
15411540
template <class A>
15421541
inline batch<float, A> trunc(batch<float, A> const& self, requires_arch<wasm>) noexcept
@@ -1625,4 +1624,4 @@ namespace xsimd
16251624
}
16261625
}
16271626

1628-
#endif
1627+
#endif

0 commit comments

Comments
 (0)