@@ -486,17 +486,22 @@ namespace xsimd
486486 // slide_left
487487 namespace detail
488488 {
489- template <size_t ... Is>
490- constexpr std::array<uint64_t , sizeof ...(Is)> make_slide_perm_hi (::xsimd::detail::index_sequence<Is...>)
489+ struct make_slide_perm_hi
491490 {
492- return { (Is == 0 ? 8 : Is - 1 )... };
493- }
491+ static constexpr uint64_t get (size_t i, size_t )
492+ {
493+ return i == 0 ? 8 : i - 1 ;
494+ }
495+ };
494496
495- template <size_t N, size_t ... Is >
496- constexpr std::array< uint16_t , sizeof ...(Is)> make_slide_left_pattern (::xsimd::detail::index_sequence<Is...>)
497+ template <size_t N>
498+ struct make_slide_left_pattern
497499 {
498- return { (Is >= N ? Is - N : 0 )... };
499- }
500+ static constexpr uint16_t get (size_t i, size_t )
501+ {
502+ return i >= N ? i - N : 0 ;
503+ }
504+ };
500505 }
501506
502507 template <size_t N, class A , class T >
@@ -520,10 +525,10 @@ namespace xsimd
520525 buffer[0 ] = buffer[0 ] << 8 ;
521526 xx = _mm512_load_epi64 (&buffer[0 ]);
522527
523- alignas ( A::alignment ()) auto slide_perm = detail::make_slide_perm_hi (:: xsimd::detail::make_index_sequence< 512 / 64 >() );
528+ auto slide_perm = xsimd::make_batch_constant< uint64_t , detail::make_slide_perm_hi, A>( );
524529 __m512i xl = _mm512_slli_epi64 (x, 8 );
525530 __m512i xr = _mm512_srli_epi64 (x, 56 );
526- xr = _mm512_permutex2var_epi64 (xr, _mm512_load_epi64 ( slide_perm.data () ), _mm512_setzero_si512 ());
531+ xr = _mm512_permutex2var_epi64 (xr, slide_perm.as_batch ( ), _mm512_setzero_si512 ());
527532 xx = _mm512_or_si512 (xr, xl);
528533 if (N == 1 )
529534 return xx;
@@ -533,24 +538,29 @@ namespace xsimd
533538 xx = x;
534539 }
535540 __mmask32 mask = 0xFFFFFFFFu << ((N / 2 ) & 31 );
536- alignas ( A::alignment ()) auto slide_pattern = detail::make_slide_left_pattern<N / 2 >(::xsimd::detail::make_index_sequence< 512 / 16 >() );
537- return _mm512_maskz_permutexvar_epi16 (mask, _mm512_load_epi32 ( slide_pattern.data () ), xx);
541+ auto slide_pattern = xsimd::make_batch_constant< uint16_t , detail::make_slide_left_pattern<N / 2 >, A>( );
542+ return _mm512_maskz_permutexvar_epi16 (mask, slide_pattern.as_batch ( ), xx);
538543 }
539544
540545 // slide_right
541546 namespace detail
542547 {
543- template <size_t ... Is>
544- constexpr std::array<uint64_t , sizeof ...(Is)> make_slide_perm_low (::xsimd::detail::index_sequence<Is...>)
548+ struct make_slide_perm_low
545549 {
546- return { (Is + 1 )... };
547- }
550+ static constexpr uint64_t get (size_t i, size_t )
551+ {
552+ return i + 1 ;
553+ }
554+ };
548555
549- template <size_t N, size_t ... Is >
550- constexpr std::array< uint16_t , sizeof ...(Is)> make_slide_right_pattern (::xsimd::detail::index_sequence<Is...>)
556+ template <size_t N>
557+ struct make_slide_right_pattern
551558 {
552- return { (Is < (32 - N) ? Is + N : 0 )... };
553- }
559+ static constexpr uint16_t get (size_t i, size_t n)
560+ {
561+ return i < (n - N) ? i + N : 0 ;
562+ }
563+ };
554564 }
555565 template <size_t N, class A , class T >
556566 XSIMD_INLINE batch<T, A> slide_right (batch<T, A> const & x, requires_arch<avx512bw>) noexcept
@@ -566,10 +576,10 @@ namespace xsimd
566576 batch<T, A> xx;
567577 if (N & 1 )
568578 {
569- alignas ( A::alignment ()) auto slide_perm = detail::make_slide_perm_low (:: xsimd::detail::make_index_sequence< 512 / 64 >() );
579+ auto slide_perm = xsimd::make_batch_constant< uint64_t , detail::make_slide_perm_low, A>( );
570580 __m512i xr = _mm512_srli_epi64 (x, 8 );
571581 __m512i xl = _mm512_slli_epi64 (x, 56 );
572- xl = _mm512_permutex2var_epi64 (xl, _mm512_load_epi64 ( slide_perm.data () ), _mm512_setzero_si512 ());
582+ xl = _mm512_permutex2var_epi64 (xl, slide_perm.as_batch ( ), _mm512_setzero_si512 ());
573583 xx = _mm512_or_si512 (xr, xl);
574584 if (N == 1 )
575585 return xx;
@@ -579,8 +589,8 @@ namespace xsimd
579589 xx = x;
580590 }
581591 __mmask32 mask = 0xFFFFFFFFu >> ((N / 2 ) & 31 );
582- alignas ( A::alignment ()) auto slide_pattern = detail::make_slide_right_pattern<N / 2 >(::xsimd::detail::make_index_sequence< 512 / 16 >() );
583- return _mm512_maskz_permutexvar_epi16 (mask, _mm512_load_epi32 ( slide_pattern.data () ), xx);
592+ auto slide_pattern = xsimd::make_batch_constant< uint16_t , detail::make_slide_right_pattern<N / 2 >, A>( );
593+ return _mm512_maskz_permutexvar_epi16 (mask, slide_pattern.as_batch ( ), xx);
584594 }
585595
586596 // ssub
0 commit comments