Skip to content

Commit 83d884f

Browse files
junparserserge-sans-paille
authored andcommitted
add compress&expand in avx512vbmi2
1 parent bffe22b commit 83d884f

File tree

2 files changed

+55
-2
lines changed

2 files changed

+55
-2
lines changed

include/xsimd/arch/xsimd_avx512vbmi2.hpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,57 @@
1717

1818
#include "../types/xsimd_avx512vbmi2_register.hpp"
1919

20+
namespace xsimd
21+
{
22+
23+
namespace kernel
24+
{
25+
using namespace types;
26+
27+
// compress
28+
template <class A>
29+
XSIMD_INLINE batch<int16_t, A> compress(batch<int16_t, A> const& self, batch_bool<int16_t, A> const& mask, requires_arch<avx512vbmi2>) noexcept
30+
{
31+
return _mm512_maskz_compress_epi16(mask.mask(), self);
32+
}
33+
template <class A>
34+
XSIMD_INLINE batch<uint16_t, A> compress(batch<uint16_t, A> const& self, batch_bool<uint16_t, A> const& mask, requires_arch<avx512vbmi2>) noexcept
35+
{
36+
return _mm512_maskz_compress_epi16(mask.mask(), self);
37+
}
38+
template <class A>
39+
XSIMD_INLINE batch<int8_t, A> compress(batch<int8_t, A> const& self, batch_bool<int8_t, A> const& mask, requires_arch<avx512vbmi2>) noexcept
40+
{
41+
return _mm512_maskz_compress_epi8(mask.mask(), self);
42+
}
43+
template <class A>
44+
XSIMD_INLINE batch<uint8_t, A> compress(batch<uint8_t, A> const& self, batch_bool<uint8_t, A> const& mask, requires_arch<avx512vbmi2>) noexcept
45+
{
46+
return _mm512_maskz_compress_epi8(mask.mask(), self);
47+
}
48+
49+
// expand
50+
template <class A>
51+
XSIMD_INLINE batch<int16_t, A> expand(batch<int16_t, A> const& self, batch_bool<int16_t, A> const& mask, requires_arch<avx512vbmi2>) noexcept
52+
{
53+
return _mm512_maskz_expand_epi16(mask.mask(), self);
54+
}
55+
template <class A>
56+
XSIMD_INLINE batch<uint16_t, A> expand(batch<uint16_t, A> const& self, batch_bool<uint16_t, A> const& mask, requires_arch<avx512vbmi2>) noexcept
57+
{
58+
return _mm512_maskz_expand_epi16(mask.mask(), self);
59+
}
60+
template <class A>
61+
XSIMD_INLINE batch<int8_t, A> expand(batch<int8_t, A> const& self, batch_bool<int8_t, A> const& mask, requires_arch<avx512vbmi2>) noexcept
62+
{
63+
return _mm512_maskz_expand_epi8(mask.mask(), self);
64+
}
65+
template <class A>
66+
XSIMD_INLINE batch<uint8_t, A> expand(batch<uint8_t, A> const& self, batch_bool<uint8_t, A> const& mask, requires_arch<avx512vbmi2>) noexcept
67+
{
68+
return _mm512_maskz_expand_epi8(mask.mask(), self);
69+
}
70+
}
71+
}
72+
2073
#endif

test/test_shuffle.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ struct compress_test
347347
}
348348
};
349349

350-
TEST_CASE_TEMPLATE("[compress]", B, BATCH_FLOAT_TYPES, xsimd::batch<uint32_t>, xsimd::batch<int32_t>, xsimd::batch<uint64_t>, xsimd::batch<int64_t>)
350+
TEST_CASE_TEMPLATE("[compress]", B, BATCH_FLOAT_TYPES, xsimd::batch<uint8_t>, xsimd::batch<int8_t>, xsimd::batch<uint16_t>, xsimd::batch<int16_t>, xsimd::batch<uint32_t>, xsimd::batch<int32_t>, xsimd::batch<uint64_t>, xsimd::batch<int64_t>)
351351
{
352352
compress_test<B> Test;
353353
SUBCASE("empty")
@@ -443,7 +443,7 @@ struct expand_test
443443
}
444444
};
445445

446-
TEST_CASE_TEMPLATE("[expand]", B, BATCH_FLOAT_TYPES, xsimd::batch<uint32_t>, xsimd::batch<int32_t>, xsimd::batch<uint64_t>, xsimd::batch<int64_t>)
446+
TEST_CASE_TEMPLATE("[expand]", B, BATCH_FLOAT_TYPES, xsimd::batch<uint8_t>, xsimd::batch<int8_t>, xsimd::batch<uint16_t>, xsimd::batch<int16_t>, xsimd::batch<uint32_t>, xsimd::batch<int32_t>, xsimd::batch<uint64_t>, xsimd::batch<int64_t>)
447447
{
448448
expand_test<B> Test;
449449
SUBCASE("empty")

0 commit comments

Comments
 (0)