Skip to content

Commit 795dee4

Browse files
committed
Add slide_left&slide_right for avx512vbmi
1 parent 140a3d7 commit 795dee4

File tree

1 file changed

+62
-0
lines changed

1 file changed

+62
-0
lines changed

include/xsimd/arch/xsimd_avx512vbmi.hpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,66 @@
1717

1818
#include "../types/xsimd_avx512vbmi_register.hpp"
1919

20+
namespace xsimd
21+
{
22+
23+
namespace kernel
24+
{
25+
using namespace types;
26+
27+
namespace detail
28+
{
29+
template <size_t N, size_t... Is>
30+
constexpr std::array<uint8_t, sizeof...(Is)> make_slide_left_bytes_pattern(::xsimd::detail::index_sequence<Is...>)
31+
{
32+
return { (Is >= N ? Is - N : 0)... };
33+
}
34+
35+
template <size_t N, size_t... Is>
36+
constexpr std::array<uint8_t, sizeof...(Is)> make_slide_right_bytes_pattern(::xsimd::detail::index_sequence<Is...>)
37+
{
38+
return { (Is < (64 - N) ? Is + N : 0)... };
39+
}
40+
}
41+
42+
// slide_left
43+
template <size_t N, class A, class T>
44+
XSIMD_INLINE batch<T, A> slide_left(batch<T, A> const& x, requires_arch<avx512vbmi>) noexcept
45+
{
46+
constexpr unsigned BitCount = N * 8;
47+
if (BitCount == 0)
48+
{
49+
return x;
50+
}
51+
if (BitCount >= 512)
52+
{
53+
return batch<T, A>(T(0));
54+
}
55+
56+
__mmask64 mask = 0xFFFFFFFFFFFFFFFFull << N;
57+
alignas(A::alignment()) auto slide_pattern = detail::make_slide_left_bytes_pattern<N>(::xsimd::detail::make_index_sequence<512 / 8>());
58+
return _mm512_maskz_permutexvar_epi8(mask, _mm512_load_epi32(slide_pattern.data()), x);
59+
}
60+
61+
// slide_right
62+
template <size_t N, class A, class T>
63+
XSIMD_INLINE batch<T, A> slide_right(batch<T, A> const& x, requires_arch<avx512vbmi>) noexcept
64+
{
65+
constexpr unsigned BitCount = N * 8;
66+
if (BitCount == 0)
67+
{
68+
return x;
69+
}
70+
if (BitCount >= 512)
71+
{
72+
return batch<T, A>(T(0));
73+
}
74+
__mmask64 mask = 0xFFFFFFFFFFFFFFFFull >> N;
75+
alignas(A::alignment()) auto slide_pattern = detail::make_slide_right_bytes_pattern<N>(::xsimd::detail::make_index_sequence<512 / 8>());
76+
return _mm512_maskz_permutexvar_epi8(mask, _mm512_load_epi32(slide_pattern.data()), x);
77+
}
78+
79+
}
80+
}
81+
2082
#endif

0 commit comments

Comments
 (0)