@@ -330,6 +330,41 @@ namespace xsimd
330330 return _mm512_cmpgt_epu8_mask (bool_val, _mm512_setzero_si512 ());
331331 }
332332
333+ template <class A , class T , class = typename std::enable_if<batch_bool<T, A>::size == 32 , void >::type>
334+ XSIMD_INLINE batch_bool<T, A> load_unaligned (bool const * mem, batch_bool<T, A>, requires_arch<avx512bw>) noexcept
335+ {
336+ __m256i bpack = _mm256_loadu_si256 ((__m256i const *)mem);
337+ return { _mm512_sub_epi16 (_mm512_set1_epi8 (0 ), _mm512_cvtepu8_epi16 (bpack)) };
338+ }
339+
340+ template <class A , class T , class = typename std::enable_if<batch_bool<T, A>::size == 32 , void >::type>
341+ XSIMD_INLINE batch_bool<T, A> load_aligned (bool const * mem, batch_bool<T, A>, requires_arch<avx512bw>) noexcept
342+ {
343+ __m256i bpack = _mm256_load_si256 ((__m256i const *)mem);
344+ return { _mm512_sub_epi16 (_mm512_set1_epi8 (0 ), _mm512_cvtepu8_epi16 (bpack)) };
345+ }
346+
347+ template <class A , class T , class = typename std::enable_if<batch_bool<T, A>::size == 16 , void >::type>
348+ XSIMD_INLINE batch_bool<T, A> load_unaligned (bool const * mem, batch_bool<T, A>, requires_arch<avx512bw>) noexcept
349+ {
350+ __m128i bpack = _mm_loadu_si128 ((__m128i const *)mem);
351+ return { _mm512_sub_epi16 (_mm512_set1_epi8 (0 ), _mm512_cvtepu8_epi32 (bpack)) };
352+ }
353+
354+ template <class A , class T , class = typename std::enable_if<batch_bool<T, A>::size == 16 , void >::type>
355+ XSIMD_INLINE batch_bool<T, A> load_aligned (bool const * mem, batch_bool<T, A>, requires_arch<avx512bw>) noexcept
356+ {
357+ __m128i bpack = _mm_load_si128 ((__m128i const *)mem);
358+ return { _mm512_sub_epi16 (_mm512_set1_epi8 (0 ), _mm512_cvtepu8_epi32 (bpack)) };
359+ }
360+
361+ template <class A , class T , class = typename std::enable_if<batch_bool<T, A>::size == 8 , void >::type>
362+ XSIMD_INLINE batch_bool<T, A> load_unaligned (bool const * mem, batch_bool<T, A>, requires_arch<avx512bw>) noexcept
363+ {
364+ __m128i bpack = _mm_loadl_epi64 ((__m128i const *)mem);
365+ return { _mm512_sub_epi16 (_mm512_set1_epi8 (0 ), _mm512_cvtepu8_epi64 (bpack)) };
366+ }
367+
333368 // max
334369 template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
335370 XSIMD_INLINE batch<T, A> max (batch<T, A> const & self, batch<T, A> const & other, requires_arch<avx512bw>) noexcept
0 commit comments