Skip to content

Commit ce58d62

Browse files
Support avgr operation and document both avg and avgr
1 parent d7c3383 commit ce58d62

File tree

11 files changed

+198
-22
lines changed

11 files changed

+198
-22
lines changed

docs/source/api/arithmetic_index.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,14 @@ Fused operations:
8181
| :cpp:func:`fnms` | fused negate multiply sub |
8282
+---------------------------------------+----------------------------------------------------+
8383

84+
Average computation:
85+
86+
+---------------------------------------+----------------------------------------------------+
87+
| :cpp:func:`avg` | per slot average |
88+
+---------------------------------------+----------------------------------------------------+
89+
| :cpp:func:`avgr` | per slot rounded average |
90+
+---------------------------------------+----------------------------------------------------+
91+
8492
----
8593

8694
.. doxygengroup:: batch_arithmetic

include/xsimd/arch/generic/xsimd_generic_math.hpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,30 @@ namespace xsimd
7878
return detail::avg(x, y, typename std::is_integral<T>::type {}, typename std::is_signed<T>::type {});
7979
}
8080

81+
// avgr
82+
namespace detail
83+
{
84+
template <class A, class T>
85+
inline batch<T, A> avgr(batch<T, A> const& x, batch<T, A> const& y, std::true_type) noexcept
86+
{
87+
constexpr unsigned shift = 8 * sizeof(T) - 1;
88+
auto adj = std::is_signed<T>::value ? ((x ^ y) & 0x1) : (((x ^ y) << shift) >> shift);
89+
return ::xsimd::kernel::avg(x, y, A {}) + adj;
90+
}
91+
92+
template <class A, class T>
93+
inline batch<T, A> avgr(batch<T, A> const& x, batch<T, A> const& y, std::false_type) noexcept
94+
{
95+
return ::xsimd::kernel::avg(x, y, A {});
96+
}
97+
}
98+
99+
template <class A, class T>
100+
inline batch<T, A> avgr(batch<T, A> const& x, batch<T, A> const& y, requires_arch<generic>) noexcept
101+
{
102+
return detail::avgr(x, y, typename std::is_integral<T>::type {});
103+
}
104+
81105
// batch_cast
82106
template <class A, class T>
83107
inline batch<T, A> batch_cast(batch<T, A> const& self, batch<T, A> const&, requires_arch<generic>) noexcept

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,19 +76,37 @@ namespace xsimd
7676
}
7777
}
7878

79+
// avgr
80+
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
81+
inline batch<T, A> avgr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
82+
{
83+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
84+
{
85+
return _mm256_avg_epu8(self, other);
86+
}
87+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
88+
{
89+
return _mm256_avg_epu16(self, other);
90+
}
91+
else
92+
{
93+
return avgr(self, other, generic {});
94+
}
95+
}
96+
7997
// avg
8098
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
8199
inline batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
82100
{
83101
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
84102
{
85103
auto adj = ((self ^ other) << 7) >> 7;
86-
return batch<T, A>(_mm256_avg_epu8(self, other)) - adj;
104+
return avgr(self, other, A {}) - adj;
87105
}
88106
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
89107
{
90108
auto adj = ((self ^ other) << 15) >> 15;
91-
return batch<T, A>(_mm256_avg_epu16(self, other)) - adj;
109+
return avgr(self, other, A {}) - adj;
92110
}
93111
else
94112
{

include/xsimd/arch/xsimd_avx512bw.hpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,19 +112,37 @@ namespace xsimd
112112
}
113113
}
114114

115+
// avgr
116+
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
117+
inline batch<T, A> avgr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
118+
{
119+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
120+
{
121+
return _mm512_avg_epu8(self, other);
122+
}
123+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
124+
{
125+
return _mm512_avg_epu16(self, other);
126+
}
127+
else
128+
{
129+
return avgr(self, other, generic {});
130+
}
131+
}
132+
115133
// avg
116134
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
117135
inline batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
118136
{
119137
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
120138
{
121139
auto adj = ((self ^ other) << 7) >> 7;
122-
return batch<T, A>(_mm512_avg_epu8(self, other)) - adj;
140+
return avgr(self, other, A {}) - adj;
123141
}
124142
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
125143
{
126144
auto adj = ((self ^ other) << 15) >> 15;
127-
return batch<T, A>(_mm512_avg_epu16(self, other)) - adj;
145+
return avgr(self, other, A {}) - adj;
128146
}
129147
else
130148
{

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -770,6 +770,22 @@ namespace xsimd
770770
return dispatcher.apply(register_type(lhs), register_type(rhs));
771771
}
772772

773+
/********
774+
* avgr *
775+
********/
776+
777+
WRAP_BINARY_UINT_EXCLUDING_64(vrhaddq, detail::identity_return_type)
778+
779+
template <class A, class T, class = typename std::enable_if<(std::is_unsigned<T>::value && sizeof(T) != 8), void>::type>
780+
inline batch<T, A> avgr(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
781+
{
782+
using register_type = typename batch<T, A>::register_type;
783+
const detail::neon_dispatcher_impl<uint8x16_t, uint16x8_t, uint32x4_t>::binary dispatcher = {
784+
std::make_tuple(wrap::vrhaddq_u8, wrap::vrhaddq_u16, wrap::vrhaddq_u32)
785+
};
786+
return dispatcher.apply(register_type(lhs), register_type(rhs));
787+
}
788+
773789
/********
774790
* sadd *
775791
********/

include/xsimd/arch/xsimd_scalar.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,18 @@ namespace xsimd
163163
}
164164
}
165165

166+
template <class T, class Tp>
167+
inline typename std::common_type<T, Tp>::type avgr(T const& x, Tp const& y) noexcept
168+
{
169+
using common_type = typename std::common_type<T, Tp>::type;
170+
if (std::is_floating_point<common_type>::value)
171+
return avg(x, y);
172+
else
173+
{
174+
return avg(x, y) + ((x ^ y) & 1);
175+
}
176+
}
177+
166178
template <class T>
167179
inline T incr(T const& x) noexcept
168180
{

include/xsimd/arch/xsimd_sse2.hpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ namespace xsimd
6262
inline batch<T, A> shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<batch<ITy, A>, Indices...>, requires_arch<generic>) noexcept;
6363
template <class A, class T>
6464
inline batch<T, A> avg(batch<T, A> const&, batch<T, A> const&, requires_arch<generic>) noexcept;
65+
template <class A, class T>
66+
inline batch<T, A> avgr(batch<T, A> const&, batch<T, A> const&, requires_arch<generic>) noexcept;
6567

6668
// abs
6769
template <class A>
@@ -150,19 +152,37 @@ namespace xsimd
150152
return _mm_movemask_epi8(self) != 0;
151153
}
152154

155+
// avgr
156+
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
157+
inline batch<T, A> avgr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse2>) noexcept
158+
{
159+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
160+
{
161+
return _mm_avg_epu8(self, other);
162+
}
163+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
164+
{
165+
return _mm_avg_epu16(self, other);
166+
}
167+
else
168+
{
169+
return avgr(self, other, generic {});
170+
}
171+
}
172+
153173
// avg
154174
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
155175
inline batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse2>) noexcept
156176
{
157177
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
158178
{
159179
auto adj = ((self ^ other) << 7) >> 7;
160-
return batch<T, A>(_mm_avg_epu8(self, other)) - adj;
180+
return avgr(self, other, A {}) - adj;
161181
}
162182
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
163183
{
164184
auto adj = ((self ^ other) << 15) >> 15;
165-
return batch<T, A>(_mm_avg_epu16(self, other)) - adj;
185+
return avgr(self, other, A {}) - adj;
166186
}
167187
else
168188
{

include/xsimd/arch/xsimd_wasm.hpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,19 +118,37 @@ namespace xsimd
118118
return wasm_f64x2_add(self, other);
119119
}
120120

121+
// avgr
122+
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
123+
inline batch<T, A> avgr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
124+
{
125+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
126+
{
127+
return wasm_u8x16_avgr(self, other);
128+
}
129+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
130+
{
131+
return wasm_u16x8_avgr(self, other);
132+
}
133+
else
134+
{
135+
return avgr(self, other, generic {});
136+
}
137+
}
138+
121139
// avg
122140
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
123141
inline batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
124142
{
125143
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
126144
{
127145
auto adj = ((self ^ other) << 7) >> 7;
128-
return batch<T, A>(wasm_u8x16_avgr(self, other)) - adj;
146+
return avgr(self, other, A {}) - adj;
129147
}
130148
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
131149
{
132150
auto adj = ((self ^ other) << 15) >> 15;
133-
return batch<T, A>(wasm_u16x8_avgr(self, other)) - adj;
151+
return avgr(self, other, A {}) - adj;
134152
}
135153
else
136154
{

include/xsimd/types/xsimd_api.hpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,21 @@ namespace xsimd
217217
return kernel::avg<A>(x, y, A {});
218218
}
219219

220+
/**
221+
* @ingroup batch_math
222+
*
223+
* Computes the rounded average of batches \c x and \c y
224+
* @param x batch of T
225+
* @param y batch of T
226+
* @return the rounded average of elements between \c x and \c y.
227+
*/
228+
template <class T, class A>
229+
inline batch<T, A> avgr(batch<T, A> const& x, batch<T, A> const& y) noexcept
230+
{
231+
detail::static_check_supported_config<T, A>();
232+
return kernel::avgr<A>(x, y, A {});
233+
}
234+
220235
/**
221236
* @ingroup batch_conversion
222237
*

test/test_batch.cpp

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -739,22 +739,42 @@ struct batch_test
739739

740740
void test_avg() const
741741
{
742-
array_type expected;
743-
std::transform(lhs.cbegin(), lhs.cend(), rhs.cbegin(), expected.begin(),
744-
[](const value_type& l, const value_type& r) -> value_type
745-
{
746-
if (std::is_integral<value_type>::value)
742+
{
743+
array_type expected;
744+
std::transform(lhs.cbegin(), lhs.cend(), rhs.cbegin(), expected.begin(),
745+
[](const value_type& l, const value_type& r) -> value_type
747746
{
748-
return ((long long)l + r) / 2;
749-
}
750-
else
747+
if (std::is_integral<value_type>::value)
748+
{
749+
return ((long long)l + r) / 2;
750+
}
751+
else
752+
{
753+
return (l + r) / 2;
754+
}
755+
});
756+
batch_type res = avg(batch_lhs(), batch_rhs());
757+
INFO("avg");
758+
CHECK_BATCH_EQ(res, expected);
759+
}
760+
{
761+
array_type expected;
762+
std::transform(lhs.cbegin(), lhs.cend(), rhs.cbegin(), expected.begin(),
763+
[](const value_type& l, const value_type& r) -> value_type
751764
{
752-
return (l + r) / 2;
753-
}
754-
});
755-
batch_type res = avg(batch_lhs(), batch_rhs());
756-
INFO("avg");
757-
CHECK_BATCH_EQ(res, expected);
765+
if (std::is_integral<value_type>::value)
766+
{
767+
return ((long long)l + r) / 2 + ((long long)(l + r) & 1);
768+
}
769+
else
770+
{
771+
return (l + r) / 2;
772+
}
773+
});
774+
batch_type res = avgr(batch_lhs(), batch_rhs());
775+
INFO("avgr");
776+
CHECK_BATCH_EQ(res, expected);
777+
}
758778
}
759779

760780
void test_horizontal_operations() const

0 commit comments

Comments
 (0)