Skip to content

Commit c5c2101

Browse files
authored
Merge pull request #987 from xtensor-stack/feature/fix-version-values
Fix various problems with architecture version handling
2 parents 27ec4ff + 7941abf commit c5c2101

File tree

8 files changed

+93
-109
lines changed

8 files changed

+93
-109
lines changed

.github/workflows/emscripten.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ concurrency:
66
jobs:
77
test:
88
runs-on: ubuntu-latest
9-
9+
1010
steps:
1111
- name: Checkout
1212
uses: actions/checkout@v3
@@ -19,11 +19,9 @@ jobs:
1919
python
2020
init-shell: bash
2121

22-
23-
2422
- name: Build script
2523
shell: bash -el {0}
2624
run: |
2725
echo "Build script for wasm"
2826
playwright install
29-
./test/test_wasm/test_wasm.sh
27+
./test/test_wasm/test_wasm.sh

include/xsimd/arch/xsimd_wasm.hpp

Lines changed: 64 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ namespace xsimd
380380
template <class A>
381381
inline batch_bool<float, A> eq(batch_bool<float, A> const& self, batch_bool<float, A> const& other, requires_arch<wasm>) noexcept
382382
{
383-
return wasm_f32x4_eq(self, other);
383+
return wasm_i32x4_eq(self, other);
384384
}
385385
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
386386
inline batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
@@ -440,7 +440,7 @@ namespace xsimd
440440
template <class A>
441441
inline batch_bool<double, A> eq(batch_bool<double, A> const& self, batch_bool<double, A> const& other, requires_arch<wasm>) noexcept
442442
{
443-
return wasm_f64x2_eq(self, other);
443+
return wasm_i64x2_eq(self, other);
444444
}
445445

446446
// fast_cast
@@ -579,6 +579,30 @@ namespace xsimd
579579
0xFFFFFF00,
580580
0xFFFFFFFF,
581581
};
582+
alignas(A::alignment()) static const uint32_t lut16[][4] = {
583+
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
584+
{ 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000 },
585+
{ 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 },
586+
{ 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 },
587+
{ 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 },
588+
{ 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 },
589+
{ 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 },
590+
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 },
591+
{ 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF },
592+
{ 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF },
593+
{ 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF },
594+
{ 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF },
595+
{ 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF },
596+
{ 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF },
597+
{ 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
598+
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
599+
};
600+
alignas(A::alignment()) static const uint64_t lut8[][4] = {
601+
{ 0x0000000000000000ul, 0x0000000000000000ul },
602+
{ 0xFFFFFFFFFFFFFFFFul, 0x0000000000000000ul },
603+
{ 0x0000000000000000ul, 0xFFFFFFFFFFFFFFFFul },
604+
{ 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul },
605+
};
582606
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
583607
{
584608
assert(!(mask & ~0xFFFF) && "inbound mask");
@@ -587,15 +611,17 @@ namespace xsimd
587611
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
588612
{
589613
assert(!(mask & ~0xFF) && "inbound mask");
590-
return wasm_i64x2_make(lut64[mask >> 4], lut64[mask & 0xF]);
614+
return wasm_i64x2_make(lut64[mask & 0xF], lut64[mask >> 4]);
591615
}
592616
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
593617
{
594-
return batch_bool_cast<T>(from_mask(batch_bool<float, A> {}, mask, wasm {}));
618+
assert(!(mask & ~0xFul) && "inbound mask");
619+
return wasm_v128_load((const v128_t*)lut16[mask]);
595620
}
596621
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
597622
{
598-
return batch_bool_cast<T>(from_mask(batch_bool<double, A> {}, mask, wasm {}));
623+
assert(!(mask & ~0x3ul) && "inbound mask");
624+
return wasm_v128_load((const v128_t*)lut8[mask]);
599625
}
600626
}
601627

@@ -1114,44 +1140,6 @@ namespace xsimd
11141140
return wasm_f64x2_extract_lane(tmp2, 0);
11151141
}
11161142

1117-
// reduce_max
1118-
template <class A, class T, class _ = typename std::enable_if<(sizeof(T) <= 2), void>::type>
1119-
inline T reduce_max(batch<T, A> const& self, requires_arch<wasm>) noexcept
1120-
{
1121-
batch<T, A> step0 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 2, 3, 0, 0);
1122-
batch<T, A> acc0 = max(self, step0);
1123-
1124-
batch<T, A> step1 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 1, 0, 0, 0);
1125-
batch<T, A> acc1 = max(acc0, step1);
1126-
1127-
batch<T, A> step2 = wasm_i16x8_shuffle(acc1, wasm_i16x8_splat(0), 1, 0, 0, 0, 4, 5, 6, 7);
1128-
batch<T, A> acc2 = max(acc1, step2);
1129-
if (sizeof(T) == 2)
1130-
return acc2.get(0);
1131-
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
1132-
batch<T, A> acc3 = max(acc2, step3);
1133-
return acc3.get(0);
1134-
}
1135-
1136-
// reduce_min
1137-
template <class A, class T, class _ = typename std::enable_if<(sizeof(T) <= 2), void>::type>
1138-
inline T reduce_min(batch<T, A> const& self, requires_arch<wasm>) noexcept
1139-
{
1140-
batch<T, A> step0 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 2, 3, 0, 0);
1141-
batch<T, A> acc0 = min(self, step0);
1142-
1143-
batch<T, A> step1 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 1, 0, 0, 0);
1144-
batch<T, A> acc1 = min(acc0, step1);
1145-
1146-
batch<T, A> step2 = wasm_i16x8_shuffle(acc1, wasm_i16x8_splat(0), 1, 0, 0, 0, 4, 5, 6, 7);
1147-
batch<T, A> acc2 = min(acc1, step2);
1148-
if (sizeof(T) == 2)
1149-
return acc2.get(0);
1150-
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
1151-
batch<T, A> acc3 = min(acc2, step3);
1152-
return acc3.get(0);
1153-
}
1154-
11551143
// rsqrt
11561144
template <class A>
11571145
inline batch<float, A> rsqrt(batch<float, A> const& self, requires_arch<wasm>) noexcept
@@ -1259,29 +1247,15 @@ namespace xsimd
12591247

12601248
// shuffle
12611249
template <class A, class ITy, ITy I0, ITy I1, ITy I2, ITy I3>
1262-
inline batch<float, A> shuffle(batch<float, A> const& x, batch<float, A> const& y, batch_constant<batch<ITy, A>, I0, I1, I2, I3> mask, requires_arch<wasm>) noexcept
1250+
inline batch<float, A> shuffle(batch<float, A> const& x, batch<float, A> const& y, batch_constant<batch<ITy, A>, I0, I1, I2, I3>, requires_arch<wasm>) noexcept
12631251
{
1264-
// shuffle within lane
1265-
if (I0 < 4 && I1 < 4 && I2 >= 4 && I3 >= 4)
1266-
return wasm_i32x4_shuffle(x, y, I0, I1, I2, I3);
1267-
1268-
// shuffle within opposite lane
1269-
if (I0 >= 4 && I1 >= 4 && I2 < 4 && I3 < 4)
1270-
return wasm_i32x4_shuffle(y, x, I0, I1, I2, I3);
1271-
return shuffle(x, y, mask, generic {});
1252+
return wasm_i32x4_shuffle(x, y, I0, I1, I2, I3);
12721253
}
12731254

12741255
template <class A, class ITy, ITy I0, ITy I1>
1275-
inline batch<double, A> shuffle(batch<double, A> const& x, batch<double, A> const& y, batch_constant<batch<ITy, A>, I0, I1> mask, requires_arch<wasm>) noexcept
1256+
inline batch<double, A> shuffle(batch<double, A> const& x, batch<double, A> const& y, batch_constant<batch<ITy, A>, I0, I1>, requires_arch<wasm>) noexcept
12761257
{
1277-
// shuffle within lane
1278-
if (I0 < 2 && I1 >= 2)
1279-
return wasm_i64x2_shuffle(x, y, I0, I1);
1280-
1281-
// shuffle within opposite lane
1282-
if (I0 >= 2 && I1 < 2)
1283-
return wasm_i64x2_shuffle(y, x, I0, I1);
1284-
return shuffle(x, y, mask, generic {});
1258+
return wasm_i64x2_shuffle(x, y, I0, I1);
12851259
}
12861260

12871261
// set
@@ -1500,7 +1474,6 @@ namespace xsimd
15001474
}
15011475

15021476
// swizzle
1503-
15041477
template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
15051478
inline batch<float, A> swizzle(batch<float, A> const& self, batch_constant<batch<uint32_t, A>, V0, V1, V2, V3>, requires_arch<wasm>) noexcept
15061479
{
@@ -1516,7 +1489,7 @@ namespace xsimd
15161489
template <class A, uint64_t V0, uint64_t V1>
15171490
inline batch<uint64_t, A> swizzle(batch<uint64_t, A> const& self, batch_constant<batch<uint64_t, A>, V0, V1>, requires_arch<wasm>) noexcept
15181491
{
1519-
return wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 2 * V0, 2 * V0 + 1, 2 * V1, 2 * V1 + 1);
1492+
return wasm_i64x2_shuffle(self, self, V0, V1);
15201493
}
15211494

15221495
template <class A, uint64_t V0, uint64_t V1>
@@ -1528,7 +1501,7 @@ namespace xsimd
15281501
template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
15291502
inline batch<uint32_t, A> swizzle(batch<uint32_t, A> const& self, batch_constant<batch<uint32_t, A>, V0, V1, V2, V3>, requires_arch<wasm>) noexcept
15301503
{
1531-
return wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), V0, V1, V2, V3);
1504+
return wasm_i32x4_shuffle(self, self, V0, V1, V2, V3);
15321505
}
15331506

15341507
template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
@@ -1537,6 +1510,32 @@ namespace xsimd
15371510
return bitwise_cast<int32_t>(swizzle(bitwise_cast<uint32_t>(self), mask, wasm {}));
15381511
}
15391512

1513+
template <class A, uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>
1514+
inline batch<uint16_t, A> swizzle(batch<uint16_t, A> const& self, batch_constant<batch<uint16_t, A>, V0, V1, V2, V3, V4, V5, V6, V7>, requires_arch<wasm>) noexcept
1515+
{
1516+
return wasm_i16x8_shuffle(self, self, V0, V1, V2, V3, V4, V5, V6, V7);
1517+
}
1518+
1519+
template <class A, uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>
1520+
inline batch<int16_t, A> swizzle(batch<int16_t, A> const& self, batch_constant<batch<uint16_t, A>, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<wasm>) noexcept
1521+
{
1522+
return bitwise_cast<int16_t>(swizzle(bitwise_cast<uint16_t>(self), mask, wasm {}));
1523+
}
1524+
1525+
template <class A, uint8_t V0, uint8_t V1, uint8_t V2, uint8_t V3, uint8_t V4, uint8_t V5, uint8_t V6, uint8_t V7,
1526+
uint8_t V8, uint8_t V9, uint8_t V10, uint8_t V11, uint8_t V12, uint8_t V13, uint8_t V14, uint8_t V15>
1527+
inline batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch_constant<batch<uint8_t, A>, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15>, requires_arch<wasm>) noexcept
1528+
{
1529+
return wasm_i8x16_shuffle(self, self, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15);
1530+
}
1531+
1532+
template <class A, uint8_t V0, uint8_t V1, uint8_t V2, uint8_t V3, uint8_t V4, uint8_t V5, uint8_t V6, uint8_t V7,
1533+
uint8_t V8, uint8_t V9, uint8_t V10, uint8_t V11, uint8_t V12, uint8_t V13, uint8_t V14, uint8_t V15>
1534+
inline batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch_constant<batch<uint8_t, A>, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15> mask, requires_arch<wasm>) noexcept
1535+
{
1536+
return bitwise_cast<int8_t>(swizzle(bitwise_cast<uint8_t>(self), mask, wasm {}));
1537+
}
1538+
15401539
// trunc
15411540
template <class A>
15421541
inline batch<float, A> trunc(batch<float, A> const& self, requires_arch<wasm>) noexcept
@@ -1625,4 +1624,4 @@ namespace xsimd
16251624
}
16261625
}
16271626

1628-
#endif
1627+
#endif

include/xsimd/config/xsimd_arch.hpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,22 +57,22 @@ namespace xsimd
5757
{
5858
};
5959

60-
template <class... Archs>
60+
template <unsigned... Vals>
6161
struct is_sorted;
6262

6363
template <>
6464
struct is_sorted<> : std::true_type
6565
{
6666
};
6767

68-
template <class Arch>
69-
struct is_sorted<Arch> : std::true_type
68+
template <unsigned Val>
69+
struct is_sorted<Val> : std::true_type
7070
{
7171
};
7272

73-
template <class A0, class A1, class... Archs>
74-
struct is_sorted<A0, A1, Archs...>
75-
: std::conditional<(A0::version() >= A1::version()), is_sorted<Archs...>,
73+
template <unsigned V0, unsigned V1, unsigned... Vals>
74+
struct is_sorted<V0, V1, Vals...>
75+
: std::conditional<(V0 >= V1), is_sorted<V1, Vals...>,
7676
std::false_type>::type
7777
{
7878
};
@@ -111,7 +111,7 @@ namespace xsimd
111111
struct arch_list
112112
{
113113
#ifndef NDEBUG
114-
static_assert(detail::is_sorted<Archs...>::value,
114+
static_assert(detail::is_sorted<Archs::version()...>::value,
115115
"architecture list must be sorted by version");
116116
#endif
117117

@@ -190,13 +190,13 @@ namespace xsimd
190190
struct unsupported
191191
{
192192
};
193-
using all_x86_architectures = arch_list<avx512bw, avx512dq, avx512cd, avx512f, fma3<avx2>, avx2, fma3<avx>, avx, fma4, fma3<sse4_2>, sse4_2, sse4_1, /*sse4a,*/ ssse3, sse3, sse2>;
193+
using all_x86_architectures = arch_list<avx512vnni, avx512vbmi, avx512ifma, avx512pf, avx512bw, avx512er, avx512dq, avx512cd, avx512f, avxvnni, fma3<avx2>, avx2, fma3<avx>, avx, fma4, fma3<sse4_2>, sse4_2, sse4_1, /*sse4a,*/ ssse3, sse3, sse2>;
194194
using all_sve_architectures = arch_list<detail::sve<512>, detail::sve<256>, detail::sve<128>>;
195195
using all_rvv_architectures = arch_list<detail::rvv<512>, detail::rvv<256>, detail::rvv<128>>;
196196
using all_arm_architectures = typename detail::join<all_sve_architectures, arch_list<neon64, neon>>::type;
197197
using all_riscv_architectures = all_rvv_architectures;
198198
using all_wasm_architectures = arch_list<wasm>;
199-
using all_architectures = typename detail::join<all_arm_architectures, all_x86_architectures, all_riscv_architectures, all_wasm_architectures>::type;
199+
using all_architectures = typename detail::join<all_riscv_architectures, all_wasm_architectures, all_arm_architectures, all_x86_architectures>::type;
200200

201201
using supported_architectures = typename detail::supported<all_architectures>::type;
202202

include/xsimd/types/xsimd_avx512ifma_register.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#ifndef XSIMD_AVX512IFMA_REGISTER_HPP
1313
#define XSIMD_AVX512IFMA_REGISTER_HPP
1414

15-
#include "./xsimd_avx512dq_register.hpp"
15+
#include "./xsimd_avx512bw_register.hpp"
1616

1717
namespace xsimd
1818
{
@@ -22,11 +22,11 @@ namespace xsimd
2222
*
2323
* AVX512IFMA instructions
2424
*/
25-
struct avx512ifma : avx512dq
25+
struct avx512ifma : avx512bw
2626
{
2727
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512IFMA; }
2828
static constexpr bool available() noexcept { return true; }
29-
static constexpr unsigned version() noexcept { return generic::version(3, 4, 0); }
29+
static constexpr unsigned version() noexcept { return generic::version(3, 5, 0); }
3030
static constexpr char const* name() noexcept { return "avx512ifma"; }
3131
};
3232

@@ -40,7 +40,7 @@ namespace xsimd
4040
using type = simd_avx512_bool_register<T>;
4141
};
4242

43-
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512ifma, avx512dq);
43+
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512ifma, avx512bw);
4444

4545
}
4646
#endif

include/xsimd/types/xsimd_avx512vbmi_register.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ namespace xsimd
2626
{
2727
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VBMI; }
2828
static constexpr bool available() noexcept { return true; }
29-
static constexpr unsigned version() noexcept { return generic::version(3, 5, 0); }
29+
static constexpr unsigned version() noexcept { return generic::version(3, 6, 0); }
3030
static constexpr char const* name() noexcept { return "avx512vbmi"; }
3131
};
3232

include/xsimd/types/xsimd_avx512vnni_register.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ namespace xsimd
2626
{
2727
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VNNI; }
2828
static constexpr bool available() noexcept { return true; }
29-
static constexpr unsigned version() noexcept { return generic::version(3, 6, 0); }
29+
static constexpr unsigned version() noexcept { return generic::version(3, 7, 0); }
3030
static constexpr char const* name() noexcept { return "avx512vnni"; }
3131
};
3232

include/xsimd/types/xsimd_sve_register.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ namespace xsimd
3636
static constexpr bool available() noexcept { return true; }
3737
static constexpr bool requires_alignment() noexcept { return true; }
3838
static constexpr std::size_t alignment() noexcept { return 16; }
39-
static constexpr unsigned version() noexcept { return generic::version(9, 0, 0); }
39+
static constexpr unsigned version() noexcept { return generic::version(9, Width / 32, 0); }
4040
static constexpr char const* name() noexcept { return "arm64+sve"; }
4141
};
4242
}

0 commit comments

Comments
 (0)