@@ -315,6 +315,18 @@ namespace xsimd
315315 return detail::compare_int_avx512bw<A, T, _MM_CMPINT_LT>(self, other);
316316 }
317317
318+ // load
319+ template <class A , class T >
320+ XSIMD_INLINE batch_bool<T, A> load (bool const * mem, batch_bool<T, A>, requires_arch<avx512bw>) noexcept
321+ {
322+ using register_type = typename batch_bool<T, A>::register_type;
323+ constexpr auto size = batch_bool<T, A>::size;
324+ __mmask64 mask = size >= 64 ? ~(__mmask64)0 : (1ULL << size) - 1 ;
325+ __m512i zeros = _mm512_setzero_si512 ();
326+ __m512i bool_val = _mm512_mask_loadu_epi8 (zeros, mask, (void *)mem);
327+ return (register_type)_mm512_cmpgt_epu8_mask (bool_val, zeros);
328+ }
329+
318330 // max
319331 template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
320332 XSIMD_INLINE batch<T, A> max (batch<T, A> const & self, batch<T, A> const & other, requires_arch<avx512bw>) noexcept
@@ -628,6 +640,16 @@ namespace xsimd
628640 }
629641 }
630642
643+ // store
644+ template <class T , class A >
645+ XSIMD_INLINE void store (batch_bool<T, A> const & self, bool * mem, requires_arch<avx512bw>) noexcept
646+ {
647+ constexpr auto size = batch_bool<T, A>::size;
648+ __m512i bool_val = _mm512_maskz_set1_epi8 (self.data , 0x01 );
649+ __mmask64 mask = size >= 64 ? ~(__mmask64)0 : (1ULL << size) - 1 ;
650+ _mm512_mask_storeu_epi8 ((void *)mem, mask, bool_val);
651+ }
652+
631653 // sub
632654 template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
633655 XSIMD_INLINE batch<T, A> sub (batch<T, A> const & self, batch<T, A> const & other, requires_arch<avx512bw>) noexcept
0 commit comments