Skip to content

Commit 85414a7

Browse files
junparserliangbin.mj
authored andcommitted
Move batch<int8_t, A> swizzle to avx512vbmi with _mm512_permutexvar_epi8
1 parent 8ede86e commit 85414a7

File tree

2 files changed

+25
-24
lines changed

2 files changed

+25
-24
lines changed

include/xsimd/arch/xsimd_avx512bw.hpp

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -684,18 +684,6 @@ namespace xsimd
684684
return bitwise_cast<int16_t>(swizzle(bitwise_cast<uint16_t>(self), mask, avx512bw {}));
685685
}
686686

687-
template <class A>
688-
XSIMD_INLINE batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch<uint8_t, A> mask, requires_arch<avx512bw>) noexcept
689-
{
690-
return _mm512_shuffle_epi8(self, mask);
691-
}
692-
693-
template <class A>
694-
XSIMD_INLINE batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch<uint8_t, A> mask, requires_arch<avx512bw>) noexcept
695-
{
696-
return bitwise_cast<int8_t>(swizzle(bitwise_cast<uint8_t>(self), mask, avx512bw {}));
697-
}
698-
699687
// swizzle (static version)
700688
template <class A, uint16_t... Vs>
701689
XSIMD_INLINE batch<uint16_t, A> swizzle(batch<uint16_t, A> const& self, batch_constant<uint16_t, A, Vs...> mask, requires_arch<avx512bw>) noexcept
@@ -709,18 +697,6 @@ namespace xsimd
709697
return swizzle(self, mask.as_batch(), avx512bw {});
710698
}
711699

712-
template <class A, uint8_t... Vs>
713-
XSIMD_INLINE batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch_constant<uint8_t, A, Vs...> mask, requires_arch<avx512bw>) noexcept
714-
{
715-
return swizzle(self, mask.as_batch(), avx512bw {});
716-
}
717-
718-
template <class A, uint8_t... Vs>
719-
XSIMD_INLINE batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch_constant<uint8_t, A, Vs...> mask, requires_arch<avx512bw>) noexcept
720-
{
721-
return swizzle(self, mask.as_batch(), avx512bw {});
722-
}
723-
724700
// zip_hi
725701
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
726702
XSIMD_INLINE batch<T, A> zip_hi(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept

include/xsimd/arch/xsimd_avx512vbmi.hpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,31 @@ namespace xsimd
7474
return _mm512_maskz_permutexvar_epi8(mask, _mm512_load_epi32(slide_pattern.data()), x);
7575
}
7676

77+
// swizzle (dynamic version)
78+
template <class A>
79+
XSIMD_INLINE batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch<uint8_t, A> mask, requires_arch<avx512vbmi>) noexcept
80+
{
81+
return _mm512_permutexvar_epi8(mask, self);
82+
}
83+
84+
template <class A>
85+
XSIMD_INLINE batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch<uint8_t, A> mask, requires_arch<avx512vbmi>) noexcept
86+
{
87+
return bitwise_cast<int8_t>(swizzle(bitwise_cast<uint8_t>(self), mask, avx512vbmi {}));
88+
}
89+
90+
// swizzle (static version)
91+
template <class A, uint8_t... Vs>
92+
XSIMD_INLINE batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch_constant<uint8_t, A, Vs...> mask, requires_arch<avx512vbmi>) noexcept
93+
{
94+
return swizzle(self, mask.as_batch(), avx512vbmi {});
95+
}
96+
97+
template <class A, uint8_t... Vs>
98+
XSIMD_INLINE batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch_constant<uint8_t, A, Vs...> mask, requires_arch<avx512vbmi>) noexcept
99+
{
100+
return swizzle(self, mask.as_batch(), avx512vbmi {});
101+
}
77102
}
78103
}
79104

0 commit comments

Comments
 (0)