Skip to content

Commit d94d7dd

Browse files
Generic, simple implementation fox xsimd::compress (#981)
* Generic, simple implementation fox xsimd::compress Related to #975 * fixup! Generic, simple implementation fox xsimd::compress * fixup! Generic, simple implementation fox xsimd::compress
1 parent 997d9d9 commit d94d7dd

File tree

5 files changed

+173
-0
lines changed

5 files changed

+173
-0
lines changed

docs/source/api/data_transfer.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ In place:
5656
+---------------------------------------+----------------------------------------------------+
5757
| :cpp:func:`insert` | modify a single batch slot |
5858
+---------------------------------------+----------------------------------------------------+
59+
| :cpp:func:`compress` | pack elements according to a mask |
60+
+---------------------------------------+----------------------------------------------------+
5961

6062
Between batches:
6163

include/xsimd/arch/generic/xsimd_generic_memory.hpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,36 @@ namespace xsimd
3232

3333
using namespace types;
3434

35+
// compress
36+
namespace detail
37+
{
38+
template <class IT, class A, class I, size_t... Is>
39+
inline batch<IT, A> create_compress_swizzle_mask(I bitmask, ::xsimd::detail::index_sequence<Is...>)
40+
{
41+
batch<IT, A> swizzle_mask(IT(0));
42+
size_t i = 0;
43+
alignas(A::alignment()) IT mask_buffer[batch<IT, A>::size] = { Is... };
44+
size_t inserted = 0;
45+
for (size_t i = 0; i < sizeof...(Is); ++i)
46+
if ((bitmask >> i) & 1u)
47+
std::swap(mask_buffer[inserted++], mask_buffer[i]);
48+
return batch<IT, A>::load_aligned(&mask_buffer[0]);
49+
}
50+
}
51+
52+
template <typename A, typename T>
53+
inline batch<T, A>
54+
compress(batch<T, A> const& x, batch_bool<T, A> const& mask,
55+
kernel::requires_arch<generic>) noexcept
56+
{
57+
using IT = as_unsigned_integer_t<T>;
58+
constexpr std::size_t size = batch_bool<T, A>::size;
59+
auto bitmask = mask.mask();
60+
auto z = select(mask, x, batch<T, A>((T)0));
61+
auto compress_mask = detail::create_compress_swizzle_mask<IT, A>(bitmask, ::xsimd::detail::make_index_sequence<size>());
62+
return swizzle(z, compress_mask);
63+
}
64+
3565
// extract_pair
3666
template <class A, class T>
3767
inline batch<T, A> extract_pair(batch<T, A> const& self, batch<T, A> const& other, std::size_t i, requires_arch<generic>) noexcept

include/xsimd/arch/xsimd_avx512f.hpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,38 @@ namespace xsimd
661661
return _mm512_roundscale_pd(self, _MM_FROUND_TO_POS_INF);
662662
}
663663

664+
// compress
665+
template <class A>
666+
inline batch<float, A> compress(batch<float, A> const& self, batch_bool<float, A> const& mask, requires_arch<avx512f>) noexcept
667+
{
668+
return _mm512_maskz_compress_ps(mask.mask(), self);
669+
}
670+
template <class A>
671+
inline batch<double, A> compress(batch<double, A> const& self, batch_bool<double, A> const& mask, requires_arch<avx512f>) noexcept
672+
{
673+
return _mm512_maskz_compress_pd(mask.mask(), self);
674+
}
675+
template <class A>
676+
inline batch<int32_t, A> compress(batch<int32_t, A> const& self, batch_bool<int32_t, A> const& mask, requires_arch<avx512f>) noexcept
677+
{
678+
return _mm512_maskz_compress_epi32(mask.mask(), self);
679+
}
680+
template <class A>
681+
inline batch<uint32_t, A> compress(batch<uint32_t, A> const& self, batch_bool<uint32_t, A> const& mask, requires_arch<avx512f>) noexcept
682+
{
683+
return _mm512_maskz_compress_epi32(mask.mask(), self);
684+
}
685+
template <class A>
686+
inline batch<int64_t, A> compress(batch<int64_t, A> const& self, batch_bool<int64_t, A> const& mask, requires_arch<avx512f>) noexcept
687+
{
688+
return _mm512_maskz_compress_epi64(mask.mask(), self);
689+
}
690+
template <class A>
691+
inline batch<uint64_t, A> compress(batch<uint64_t, A> const& self, batch_bool<uint64_t, A> const& mask, requires_arch<avx512f>) noexcept
692+
{
693+
return _mm512_maskz_compress_epi64(mask.mask(), self);
694+
}
695+
664696
// convert
665697
namespace detail
666698
{

include/xsimd/types/xsimd_api.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,19 @@ namespace xsimd
530530
return kernel::clip(x, lo, hi, A {});
531531
}
532532

533+
/**
534+
* @ingroup batch_data_transfer
535+
*
536+
* Pick elements from \c x selected by \c mask, and append them to the
537+
* resulting vector, zeroing the remaining slots
538+
*/
539+
template <class T, class A>
540+
inline batch<T, A> compress(batch<T, A> const& x, batch_bool<T, A> const& mask) noexcept
541+
{
542+
detail::static_check_supported_config<T, A>();
543+
return kernel::compress<A>(x, mask, A {});
544+
}
545+
533546
/**
534547
* @ingroup batch_complex
535548
*

test/test_shuffle.cpp

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,102 @@ TEST_CASE_TEMPLATE("[slide]", B, BATCH_INT_TYPES)
272272

273273
#endif
274274

275+
template <class B>
276+
struct compress_test
277+
{
278+
using batch_type = B;
279+
using value_type = typename B::value_type;
280+
using mask_batch_type = typename B::batch_bool_type;
281+
282+
static constexpr size_t size = B::size;
283+
std::array<value_type, size> input;
284+
std::array<bool, size> mask;
285+
std::array<value_type, size> expected;
286+
287+
compress_test()
288+
{
289+
for (size_t i = 0; i < size; ++i)
290+
{
291+
input[i] = i;
292+
}
293+
}
294+
295+
void full()
296+
{
297+
std::fill(mask.begin(), mask.end(), true);
298+
299+
for (size_t i = 0; i < size; ++i)
300+
expected[i] = input[i];
301+
302+
auto b = xsimd::compress(
303+
batch_type::load_unaligned(input.data()),
304+
mask_batch_type::load_unaligned(mask.data()));
305+
CHECK_BATCH_EQ(b, expected);
306+
}
307+
308+
void empty()
309+
{
310+
std::fill(mask.begin(), mask.end(), false);
311+
312+
for (size_t i = 0; i < size; ++i)
313+
expected[i] = 0;
314+
315+
auto b = xsimd::compress(
316+
batch_type::load_unaligned(input.data()),
317+
mask_batch_type::load_unaligned(mask.data()));
318+
CHECK_BATCH_EQ(b, expected);
319+
}
320+
321+
void interleave()
322+
{
323+
for (size_t i = 0; i < size; ++i)
324+
mask[i] = i % 2 == 0;
325+
326+
for (size_t i = 0, j = 0; i < size; ++i)
327+
expected[i] = i < size / 2 ? input[2 * i] : 0;
328+
329+
auto b = xsimd::compress(
330+
batch_type::load_unaligned(input.data()),
331+
mask_batch_type::load_unaligned(mask.data()));
332+
CHECK_BATCH_EQ(b, expected);
333+
}
334+
335+
void generic()
336+
{
337+
for (size_t i = 0; i < size; ++i)
338+
mask[i] = i % 3 == 0;
339+
340+
for (size_t i = 0, j = 0; i < size; ++i)
341+
expected[i] = i < size / 3 ? input[3 * i] : 0;
342+
343+
auto b = xsimd::compress(
344+
batch_type::load_unaligned(input.data()),
345+
mask_batch_type::load_unaligned(mask.data()));
346+
CHECK_BATCH_EQ(b, expected);
347+
}
348+
};
349+
350+
TEST_CASE_TEMPLATE("[compress]", B, BATCH_FLOAT_TYPES, xsimd::batch<uint32_t>, xsimd::batch<int32_t>, xsimd::batch<uint64_t>, xsimd::batch<int64_t>)
351+
{
352+
compress_test<B> Test;
353+
SUBCASE("empty")
354+
{
355+
Test.empty();
356+
}
357+
SUBCASE("full")
358+
{
359+
Test.full();
360+
}
361+
// SUBCASE("interleave")
362+
//{
363+
// Test.interleave();
364+
// }
365+
// SUBCASE("generic")
366+
//{
367+
// Test.generic();
368+
// }
369+
}
370+
275371
template <class B>
276372
struct shuffle_test
277373
{

0 commit comments

Comments
 (0)