Skip to content

Commit 5252a08

Browse files
committed
support insert in avx512
1 parent f10a943 commit 5252a08

File tree

2 files changed

+49
-0
lines changed

2 files changed

+49
-0
lines changed

include/xsimd/arch/xsimd_avx512bw.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,24 @@ namespace xsimd
245245
return detail::compare_int_avx512bw<A, T, _MM_CMPINT_GT>(self, other);
246246
}
247247

248+
// insert
249+
template <class A, class T, size_t I, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
250+
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I> pos, requires_arch<avx512bw>) noexcept
251+
{
252+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
253+
{
254+
return _mm512_mask_set1_epi8(self, __mmask64(1ULL << (I & 63)), val);
255+
}
256+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
257+
{
258+
return _mm512_mask_set1_epi16(self, __mmask32(1 << (I & 31)), val);
259+
}
260+
else
261+
{
262+
return insert(self, val, pos, avx512dq {});
263+
}
264+
}
265+
248266
// le
249267
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
250268
XSIMD_INLINE batch_bool<T, A> le(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept

include/xsimd/arch/xsimd_avx512f.hpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ namespace xsimd
2626
using namespace types;
2727

2828
// fwd
29+
template <class A, class T, size_t I>
30+
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<generic>) noexcept;
2931
template <class A>
3032
XSIMD_INLINE void transpose(batch<uint16_t, A>* matrix_begin, batch<uint16_t, A>* matrix_end, requires_arch<generic>) noexcept;
3133
template <class A>
@@ -1073,6 +1075,35 @@ namespace xsimd
10731075
return _mm512_add_pd(tmpx, tmpy);
10741076
}
10751077

1078+
// insert
1079+
template <class A, size_t I>
1080+
XSIMD_INLINE batch<float, A> insert(batch<float, A> const& self, float val, index<I>, requires_arch<avx512f>) noexcept
1081+
{
1082+
return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_castps_si512(self), __mmask16(1 << (I & 15)), *(int32_t*)&val));
1083+
}
1084+
1085+
template <class A, size_t I>
1086+
XSIMD_INLINE batch<double, A> insert(batch<double, A> const& self, double val, index<I>, requires_arch<avx512f>) noexcept
1087+
{
1088+
return _mm512_castsi512_pd(_mm512_mask_set1_epi64(_mm512_castpd_si512(self), __mmask8(1 << (I & 7)), *(int64_t*)&val));
1089+
}
1090+
template <class A, class T, size_t I, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1091+
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I> pos, requires_arch<avx512f>) noexcept
1092+
{
1093+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1094+
{
1095+
return _mm512_mask_set1_epi32(self, __mmask16(1 << (I & 15)), val);
1096+
}
1097+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1098+
{
1099+
return _mm512_mask_set1_epi64(self, __mmask8(1 << (I & 7)), val);
1100+
}
1101+
else
1102+
{
1103+
return insert(self, val, pos, generic {});
1104+
}
1105+
}
1106+
10761107
// isnan
10771108
template <class A>
10781109
XSIMD_INLINE batch_bool<float, A> isnan(batch<float, A> const& self, requires_arch<avx512f>) noexcept

0 commit comments

Comments
 (0)