Skip to content

Commit a2f0b04

Browse files
committed
support incr_if&decr_if in avx512
1 parent 231c868 commit a2f0b04

File tree

2 files changed

+80
-0
lines changed

2 files changed

+80
-0
lines changed

include/xsimd/arch/xsimd_avx512bw.hpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,25 @@ namespace xsimd
224224
}
225225
}
226226

227+
// decr_if
228+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
229+
XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx512bw>) noexcept
230+
{
231+
232+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
233+
{
234+
return _mm512_mask_sub_epi8(self, mask.data, self, _mm512_set1_epi8(1));
235+
}
236+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
237+
{
238+
return _mm512_mask_sub_epi16(self, mask.data, self, _mm512_set1_epi16(1));
239+
}
240+
else
241+
{
242+
return decr_if(self, mask, avx512dq {});
243+
}
244+
}
245+
227246
// eq
228247
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
229248
XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
@@ -245,6 +264,25 @@ namespace xsimd
245264
return detail::compare_int_avx512bw<A, T, _MM_CMPINT_GT>(self, other);
246265
}
247266

267+
// incr_if
268+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
269+
XSIMD_INLINE batch<T, A> incr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx512bw>) noexcept
270+
{
271+
272+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
273+
{
274+
return _mm512_mask_add_epi8(self, mask.data, self, _mm512_set1_epi8(1));
275+
}
276+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
277+
{
278+
return _mm512_mask_add_epi16(self, mask.data, self, _mm512_set1_epi16(1));
279+
}
280+
else
281+
{
282+
return incr_if(self, mask, avx512dq {});
283+
}
284+
}
285+
248286
// insert
249287
template <class A, class T, size_t I, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
250288
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I> pos, requires_arch<avx512bw>) noexcept

include/xsimd/arch/xsimd_avx512f.hpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ namespace xsimd
2626
using namespace types;
2727

2828
// fwd
29+
template <class A, class T, class Mask>
30+
XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept;
31+
template <class A, class T, class Mask>
32+
XSIMD_INLINE batch<T, A> incr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept;
2933
template <class A, class T, size_t I>
3034
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<generic>) noexcept;
3135
template <class A>
@@ -759,6 +763,24 @@ namespace xsimd
759763
return _mm512_permutex2var_pd(self.real(), idx, self.imag());
760764
}
761765
}
766+
// incr_if
767+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
768+
XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx512f>) noexcept
769+
{
770+
771+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
772+
{
773+
return _mm512_mask_sub_epi32(self, mask.data, self, _mm512_set1_epi32(1));
774+
}
775+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
776+
{
777+
return _mm512_mask_sub_epi64(self, mask.data, self, _mm512_set1_epi64(1));
778+
}
779+
else
780+
{
781+
return decr_if(self, mask, generic {});
782+
}
783+
}
762784

763785
// div
764786
template <class A>
@@ -1075,10 +1097,30 @@ namespace xsimd
10751097
return _mm512_add_pd(tmpx, tmpy);
10761098
}
10771099

1100+
// incr_if
1101+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1102+
XSIMD_INLINE batch<T, A> incr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx512f>) noexcept
1103+
{
1104+
1105+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1106+
{
1107+
return _mm512_mask_add_epi32(self, mask.data, self, _mm512_set1_epi32(1));
1108+
}
1109+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1110+
{
1111+
return _mm512_mask_add_epi64(self, mask.data, self, _mm512_set1_epi64(1));
1112+
}
1113+
else
1114+
{
1115+
return incr_if(self, mask, generic {});
1116+
}
1117+
}
1118+
10781119
// insert
10791120
template <class A, size_t I>
10801121
XSIMD_INLINE batch<float, A> insert(batch<float, A> const& self, float val, index<I>, requires_arch<avx512f>) noexcept
10811122
{
1123+
10821124
return _mm512_castsi512_ps(_mm512_mask_set1_epi32(_mm512_castps_si512(self), __mmask16(1 << (I & 15)), *(int32_t*)&val));
10831125
}
10841126

0 commit comments

Comments
 (0)