@@ -497,22 +497,16 @@ namespace xsimd
497497 {
498498 return { (Is >= N ? Is - N : 0 )... };
499499 }
500- template <size_t N, size_t ... Is>
501- constexpr std::array<uint16_t , sizeof ...(Is)> make_slide_left_mask (::xsimd::detail::index_sequence<Is...>)
502- {
503- return { (Is >= N ? 0xFFFF : 0x0000 )... };
504- }
505500 }
506501
507502 template <size_t N, class A , class T >
508503 XSIMD_INLINE batch<T, A> slide_left (batch<T, A> const & x, requires_arch<avx512bw>) noexcept
509504 {
510- constexpr unsigned BitCount = N * 8 ;
511- if (BitCount == 0 )
505+ if (N == 0 )
512506 {
513507 return x;
514508 }
515- if (BitCount >= 512 )
509+ if (N >= 64 )
516510 {
517511 return batch<T, A>(T (0 ));
518512 }
@@ -538,9 +532,9 @@ namespace xsimd
538532 {
539533 xx = x;
540534 }
535+ __mmask32 mask = 0xFFFFFFFFu << ((N / 2 ) & 31 );
541536 alignas (A::alignment ()) auto slide_pattern = detail::make_slide_left_pattern<N / 2 >(::xsimd::detail::make_index_sequence<512 / 16 >());
542- alignas (A::alignment ()) auto slide_mask = detail::make_slide_left_mask<N / 2 >(::xsimd::detail::make_index_sequence<512 / 16 >());
543- return _mm512_and_si512 (_mm512_permutexvar_epi16 (_mm512_load_epi32 (slide_pattern.data ()), xx), _mm512_load_epi32 (slide_mask.data ()));
537+ return _mm512_maskz_permutexvar_epi16 (mask, _mm512_load_epi32 (slide_pattern.data ()), xx);
544538 }
545539
546540 // slide_right
@@ -557,21 +551,15 @@ namespace xsimd
557551 {
558552 return { (Is < (32 - N) ? Is + N : 0 )... };
559553 }
560- template <size_t N, size_t ... Is>
561- constexpr std::array<uint16_t , sizeof ...(Is)> make_slide_right_mask (::xsimd::detail::index_sequence<Is...>)
562- {
563- return { (Is < 32 - N ? 0xFFFF : 0x0000 )... };
564- }
565554 }
566555 template <size_t N, class A , class T >
567556 XSIMD_INLINE batch<T, A> slide_right (batch<T, A> const & x, requires_arch<avx512bw>) noexcept
568557 {
569- constexpr unsigned BitCount = N * 8 ;
570- if (BitCount == 0 )
558+ if (N == 0 )
571559 {
572560 return x;
573561 }
574- if (BitCount >= 512 )
562+ if (N >= 64 )
575563 {
576564 return batch<T, A>(T (0 ));
577565 }
@@ -590,9 +578,9 @@ namespace xsimd
590578 {
591579 xx = x;
592580 }
581+ __mmask32 mask = 0xFFFFFFFFu >> ((N / 2 ) & 31 );
593582 alignas (A::alignment ()) auto slide_pattern = detail::make_slide_right_pattern<N / 2 >(::xsimd::detail::make_index_sequence<512 / 16 >());
594- alignas (A::alignment ()) auto slide_mask = detail::make_slide_right_mask<N / 2 >(::xsimd::detail::make_index_sequence<512 / 16 >());
595- return _mm512_and_si512 (_mm512_permutexvar_epi16 (_mm512_load_epi32 (slide_pattern.data ()), xx), _mm512_load_epi32 (slide_mask.data ()));
583+ return _mm512_maskz_permutexvar_epi16 (mask, _mm512_load_epi32 (slide_pattern.data ()), xx);
596584 }
597585
598586 // ssub
0 commit comments