@@ -259,6 +259,18 @@ namespace xsimd
259259 return detail::compare_int_avx512bw<A, T, _MM_CMPINT_LT>(self, other);
260260 }
261261
262+ // load
263+ template <class A , class T >
264+ XSIMD_INLINE batch_bool<T, A> load (bool const * mem, batch_bool<T, A>, requires_arch<avx512bw>) noexcept
265+ {
266+ using register_type = typename batch_bool<T, A>::register_type;
267+ constexpr auto size = batch_bool<T, A>::size;
268+ __mmask64 mask = size >= 64 ? ~(__mmask64)0 : (1ULL << size) - 1 ;
269+ __m512i zeros = _mm512_setzero_si512 ();
270+ __m512i bool_val = _mm512_mask_loadu_epi8 (zeros, mask, (void *)mem);
271+ return (register_type)_mm512_cmpgt_epu8_mask (bool_val, zeros);
272+ }
273+
262274 // max
263275 template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
264276 XSIMD_INLINE batch<T, A> max (batch<T, A> const & self, batch<T, A> const & other, requires_arch<avx512bw>) noexcept
@@ -572,6 +584,16 @@ namespace xsimd
572584 }
573585 }
574586
587+ // store
588+ template <class T , class A >
589+ XSIMD_INLINE void store (batch_bool<T, A> const & self, bool * mem, requires_arch<avx512bw>) noexcept
590+ {
591+ constexpr auto size = batch_bool<T, A>::size;
592+ __m512i bool_val = _mm512_maskz_set1_epi8 (self.data , 0x01 );
593+ __mmask64 mask = size >= 64 ? ~(__mmask64)0 : (1ULL << size) - 1 ;
594+ _mm512_mask_storeu_epi8 ((void *)mem, mask, bool_val);
595+ }
596+
575597 // sub
576598 template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
577599 XSIMD_INLINE batch<T, A> sub (batch<T, A> const & self, batch<T, A> const & other, requires_arch<avx512bw>) noexcept
0 commit comments