@@ -26,6 +26,10 @@ namespace xsimd
2626 using namespace types ;
2727
2828 // fwd
29+ template <class A , class T , class Mask >
30+ XSIMD_INLINE batch<T, A> decr_if (batch<T, A> const & self, Mask const & mask, requires_arch<generic>) noexcept ;
31+ template <class A , class T , class Mask >
32+ XSIMD_INLINE batch<T, A> incr_if (batch<T, A> const & self, Mask const & mask, requires_arch<generic>) noexcept ;
2933 template <class A , class T , size_t I>
3034 XSIMD_INLINE batch<T, A> insert (batch<T, A> const & self, T val, index<I>, requires_arch<generic>) noexcept ;
3135 template <class A >
@@ -759,6 +763,24 @@ namespace xsimd
759763 return _mm512_permutex2var_pd (self.real (), idx, self.imag ());
760764 }
761765 }
766+ // incr_if
767+ template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
768+ XSIMD_INLINE batch<T, A> decr_if (batch<T, A> const & self, batch_bool<T, A> const & mask, requires_arch<avx512f>) noexcept
769+ {
770+
771+ XSIMD_IF_CONSTEXPR (sizeof (T) == 4 )
772+ {
773+ return _mm512_mask_sub_epi32 (self, mask.data , self, _mm512_set1_epi32 (1 ));
774+ }
775+ else XSIMD_IF_CONSTEXPR (sizeof (T) == 8 )
776+ {
777+ return _mm512_mask_sub_epi64 (self, mask.data , self, _mm512_set1_epi64 (1 ));
778+ }
779+ else
780+ {
781+ return decr_if (self, mask, generic {});
782+ }
783+ }
762784
763785 // div
764786 template <class A >
@@ -1075,10 +1097,30 @@ namespace xsimd
10751097 return _mm512_add_pd (tmpx, tmpy);
10761098 }
10771099
1100+ // incr_if
1101+ template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
1102+ XSIMD_INLINE batch<T, A> incr_if (batch<T, A> const & self, batch_bool<T, A> const & mask, requires_arch<avx512f>) noexcept
1103+ {
1104+
1105+ XSIMD_IF_CONSTEXPR (sizeof (T) == 4 )
1106+ {
1107+ return _mm512_mask_add_epi32 (self, mask.data , self, _mm512_set1_epi32 (1 ));
1108+ }
1109+ else XSIMD_IF_CONSTEXPR (sizeof (T) == 8 )
1110+ {
1111+ return _mm512_mask_add_epi64 (self, mask.data , self, _mm512_set1_epi64 (1 ));
1112+ }
1113+ else
1114+ {
1115+ return incr_if (self, mask, generic {});
1116+ }
1117+ }
1118+
10781119 // insert
10791120 template <class A , size_t I>
10801121 XSIMD_INLINE batch<float , A> insert (batch<float , A> const & self, float val, index<I>, requires_arch<avx512f>) noexcept
10811122 {
1123+
10821124 return _mm512_castsi512_ps (_mm512_mask_set1_epi32 (_mm512_castps_si512 (self), __mmask16 (1 << (I & 15 )), *(int32_t *)&val));
10831125 }
10841126
0 commit comments