@@ -26,6 +26,8 @@ namespace xsimd
2626 using namespace types ;
2727
2828 // fwd
29+ template <class A , class T , size_t I>
30+ XSIMD_INLINE batch<T, A> insert (batch<T, A> const & self, T val, index<I>, requires_arch<generic>) noexcept ;
2931 template <class A >
3032 XSIMD_INLINE void transpose (batch<uint16_t , A>* matrix_begin, batch<uint16_t , A>* matrix_end, requires_arch<generic>) noexcept ;
3133 template <class A >
@@ -1073,6 +1075,35 @@ namespace xsimd
10731075 return _mm512_add_pd (tmpx, tmpy);
10741076 }
10751077
1078+ // insert
1079+ template <class A , size_t I>
1080+ XSIMD_INLINE batch<float , A> insert (batch<float , A> const & self, float val, index<I>, requires_arch<avx512f>) noexcept
1081+ {
1082+ return _mm512_castsi512_ps (_mm512_mask_set1_epi32 (_mm512_castps_si512 (self), __mmask16 (1 << (I & 15 )), *(int32_t *)&val));
1083+ }
1084+
1085+ template <class A , size_t I>
1086+ XSIMD_INLINE batch<double , A> insert (batch<double , A> const & self, double val, index<I>, requires_arch<avx512f>) noexcept
1087+ {
1088+ return _mm512_castsi512_pd (_mm512_mask_set1_epi64 (_mm512_castpd_si512 (self), __mmask8 (1 << (I & 7 )), *(int64_t *)&val));
1089+ }
1090+ template <class A , class T , size_t I, class = typename std::enable_if<std::is_integral<T>::value, void >::type>
1091+ XSIMD_INLINE batch<T, A> insert (batch<T, A> const & self, T val, index<I> pos, requires_arch<avx512f>) noexcept
1092+ {
1093+ XSIMD_IF_CONSTEXPR (sizeof (T) == 4 )
1094+ {
1095+ return _mm512_mask_set1_epi32 (self, __mmask16 (1 << (I & 15 )), val);
1096+ }
1097+ else XSIMD_IF_CONSTEXPR (sizeof (T) == 8 )
1098+ {
1099+ return _mm512_mask_set1_epi64 (self, __mmask8 (1 << (I & 7 )), val);
1100+ }
1101+ else
1102+ {
1103+ return insert (self, val, pos, generic {});
1104+ }
1105+ }
1106+
10761107 // isnan
10771108 template <class A >
10781109 XSIMD_INLINE batch_bool<float , A> isnan (batch<float , A> const & self, requires_arch<avx512f>) noexcept
0 commit comments