Skip to content

Commit c316b16

Browse files
Extend #1172 approach to avx512
1 parent 2096779 commit c316b16

File tree

1 file changed

+35
-0
lines changed

1 file changed

+35
-0
lines changed

include/xsimd/arch/xsimd_avx512bw.hpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,41 @@ namespace xsimd
330330
return _mm512_cmpgt_epu8_mask(bool_val, _mm512_setzero_si512());
331331
}
332332

333+
template <class A, class T, class = typename std::enable_if<batch_bool<T, A>::size == 32, void>::type>
334+
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<avx512bw>) noexcept
335+
{
336+
__m256i bpack = _mm256_loadu_si256((__m256i const*)mem);
337+
return { _mm512_sub_epi16(_mm512_set1_epi8(0), _mm512_cvtepu8_epi16(bpack)) };
338+
}
339+
340+
template <class A, class T, class = typename std::enable_if<batch_bool<T, A>::size == 32, void>::type>
341+
XSIMD_INLINE batch_bool<T, A> load_aligned(bool const* mem, batch_bool<T, A>, requires_arch<avx512bw>) noexcept
342+
{
343+
__m256i bpack = _mm256_load_si256((__m256i const*)mem);
344+
return { _mm512_sub_epi16(_mm512_set1_epi8(0), _mm512_cvtepu8_epi16(bpack)) };
345+
}
346+
347+
template <class A, class T, class = typename std::enable_if<batch_bool<T, A>::size == 16, void>::type>
348+
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<avx512bw>) noexcept
349+
{
350+
__m128i bpack = _mm_loadu_si128((__m128i const*)mem);
351+
return { _mm512_sub_epi16(_mm512_set1_epi8(0), _mm512_cvtepu8_epi32(bpack)) };
352+
}
353+
354+
template <class A, class T, class = typename std::enable_if<batch_bool<T, A>::size == 16, void>::type>
355+
XSIMD_INLINE batch_bool<T, A> load_aligned(bool const* mem, batch_bool<T, A>, requires_arch<avx512bw>) noexcept
356+
{
357+
__m128i bpack = _mm_load_si128((__m128i const*)mem);
358+
return { _mm512_sub_epi16(_mm512_set1_epi8(0), _mm512_cvtepu8_epi32(bpack)) };
359+
}
360+
361+
template <class A, class T, class = typename std::enable_if<batch_bool<T, A>::size == 8, void>::type>
362+
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<avx512bw>) noexcept
363+
{
364+
__m128i bpack = _mm_loadl_epi64 ((__m128i const*)mem);
365+
return { _mm512_sub_epi16(_mm512_set1_epi8(0), _mm512_cvtepu8_epi64(bpack)) };
366+
}
367+
333368
// max
334369
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
335370
XSIMD_INLINE batch<T, A> max(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept

0 commit comments

Comments
 (0)