Skip to content

Commit d7c3383

Browse files
Provide generic, intel, wasm and arm implementation for avg
That's the non-rounding implementation.
1 parent d2ad03a commit d7c3383

File tree

10 files changed

+225
-14
lines changed

10 files changed

+225
-14
lines changed

include/xsimd/arch/generic/xsimd_generic_math.hpp

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ namespace xsimd
2626

2727
using namespace types;
2828
// abs
29-
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
29+
template <class A, class T, class>
3030
inline batch<T, A> abs(batch<T, A> const& self, requires_arch<generic>) noexcept
3131
{
3232
if (std::is_unsigned<T>::value)
@@ -45,6 +45,39 @@ namespace xsimd
4545
return hypot(z.real(), z.imag());
4646
}
4747

48+
// avg
49+
namespace detail
50+
{
51+
template <class A, class T>
52+
inline batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y, std::true_type, std::false_type) noexcept
53+
{
54+
return (x & y) + ((x ^ y) >> 1);
55+
}
56+
57+
template <class A, class T>
58+
inline batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y, std::true_type, std::true_type) noexcept
59+
{
60+
// Inspired by
61+
// https://stackoverflow.com/questions/5697500/take-the-average-of-two-signed-numbers-in-c
62+
auto t = (x & y) + ((x ^ y) >> 1);
63+
auto t_u = bitwise_cast<typename std::make_unsigned<T>::type>(t);
64+
auto avg = t + (bitwise_cast<T>(t_u >> (8 * sizeof(T) - 1)) & (x ^ y));
65+
return avg;
66+
}
67+
68+
template <class A, class T>
69+
inline batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y, std::false_type, std::true_type) noexcept
70+
{
71+
return (x + y) / 2;
72+
}
73+
}
74+
75+
template <class A, class T>
76+
inline batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y, requires_arch<generic>) noexcept
77+
{
78+
return detail::avg(x, y, typename std::is_integral<T>::type {}, typename std::is_signed<T>::type {});
79+
}
80+
4881
// batch_cast
4982
template <class A, class T>
5083
inline batch<T, A> batch_cast(batch<T, A> const& self, batch<T, A> const&, requires_arch<generic>) noexcept

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,26 @@ namespace xsimd
7676
}
7777
}
7878

79+
// avg
80+
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
81+
inline batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
82+
{
83+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
84+
{
85+
auto adj = ((self ^ other) << 7) >> 7;
86+
return batch<T, A>(_mm256_avg_epu8(self, other)) - adj;
87+
}
88+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
89+
{
90+
auto adj = ((self ^ other) << 15) >> 15;
91+
return batch<T, A>(_mm256_avg_epu16(self, other)) - adj;
92+
}
93+
else
94+
{
95+
return avg(self, other, generic {});
96+
}
97+
}
98+
7999
// bitwise_and
80100
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
81101
inline batch<T, A> bitwise_and(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept

include/xsimd/arch/xsimd_avx512bw.hpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,26 @@ namespace xsimd
112112
}
113113
}
114114

115+
// avg
116+
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
117+
inline batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
118+
{
119+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
120+
{
121+
auto adj = ((self ^ other) << 7) >> 7;
122+
return batch<T, A>(_mm512_avg_epu8(self, other)) - adj;
123+
}
124+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
125+
{
126+
auto adj = ((self ^ other) << 15) >> 15;
127+
return batch<T, A>(_mm512_avg_epu16(self, other)) - adj;
128+
}
129+
else
130+
{
131+
return avg(self, other, generic {});
132+
}
133+
}
134+
115135
// bitwise_lshift
116136
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
117137
inline batch<T, A> bitwise_lshift(batch<T, A> const& self, int32_t other, requires_arch<avx512bw>) noexcept

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,33 +23,39 @@
2323
// Wrap intrinsics so we can pass them as function pointers
2424
// - OP: intrinsics name prefix, e.g., vorrq
2525
// - RT: type traits to deduce intrinsics return types
26-
#define WRAP_BINARY_INT_EXCLUDING_64(OP, RT) \
26+
#define WRAP_BINARY_UINT_EXCLUDING_64(OP, RT) \
2727
namespace wrap \
2828
{ \
2929
inline RT<uint8x16_t> OP##_u8(uint8x16_t a, uint8x16_t b) noexcept \
3030
{ \
3131
return ::OP##_u8(a, b); \
3232
} \
33-
inline RT<int8x16_t> OP##_s8(int8x16_t a, int8x16_t b) noexcept \
34-
{ \
35-
return ::OP##_s8(a, b); \
36-
} \
3733
inline RT<uint16x8_t> OP##_u16(uint16x8_t a, uint16x8_t b) noexcept \
3834
{ \
3935
return ::OP##_u16(a, b); \
4036
} \
41-
inline RT<int16x8_t> OP##_s16(int16x8_t a, int16x8_t b) noexcept \
42-
{ \
43-
return ::OP##_s16(a, b); \
44-
} \
4537
inline RT<uint32x4_t> OP##_u32(uint32x4_t a, uint32x4_t b) noexcept \
4638
{ \
4739
return ::OP##_u32(a, b); \
4840
} \
49-
inline RT<int32x4_t> OP##_s32(int32x4_t a, int32x4_t b) noexcept \
50-
{ \
51-
return ::OP##_s32(a, b); \
52-
} \
41+
}
42+
43+
#define WRAP_BINARY_INT_EXCLUDING_64(OP, RT) \
44+
WRAP_BINARY_UINT_EXCLUDING_64(OP, RT) \
45+
namespace wrap \
46+
{ \
47+
inline RT<int8x16_t> OP##_s8(int8x16_t a, int8x16_t b) noexcept \
48+
{ \
49+
return ::OP##_s8(a, b); \
50+
} \
51+
inline RT<int16x8_t> OP##_s16(int16x8_t a, int16x8_t b) noexcept \
52+
{ \
53+
return ::OP##_s16(a, b); \
54+
} \
55+
inline RT<int32x4_t> OP##_s32(int32x4_t a, int32x4_t b) noexcept \
56+
{ \
57+
return ::OP##_s32(a, b); \
58+
} \
5359
}
5460

5561
#define WRAP_BINARY_INT(OP, RT) \
@@ -204,6 +210,10 @@ namespace xsimd
204210
uint32x4_t, int32x4_t,
205211
float32x4_t>;
206212

213+
using excluding_int64f32_dispatcher = neon_dispatcher_impl<uint8x16_t, int8x16_t,
214+
uint16x8_t, int16x8_t,
215+
uint32x4_t, int32x4_t>;
216+
207217
/**************************
208218
* comparison dispatchers *
209219
**************************/
@@ -744,6 +754,22 @@ namespace xsimd
744754
return dispatcher.apply(register_type(lhs), register_type(rhs));
745755
}
746756

757+
/*******
758+
* avg *
759+
*******/
760+
761+
WRAP_BINARY_UINT_EXCLUDING_64(vhaddq, detail::identity_return_type)
762+
763+
template <class A, class T, class = typename std::enable_if<(std::is_unsigned<T>::value && sizeof(T) != 8), void>::type>
764+
inline batch<T, A> avg(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
765+
{
766+
using register_type = typename batch<T, A>::register_type;
767+
const detail::neon_dispatcher_impl<uint8x16_t, uint16x8_t, uint32x4_t>::binary dispatcher = {
768+
std::make_tuple(wrap::vhaddq_u8, wrap::vhaddq_u16, wrap::vhaddq_u32)
769+
};
770+
return dispatcher.apply(register_type(lhs), register_type(rhs));
771+
}
772+
747773
/********
748774
* sadd *
749775
********/

include/xsimd/arch/xsimd_scalar.hpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,27 @@ namespace xsimd
142142
return x + y;
143143
}
144144

145+
template <class T, class Tp>
146+
inline typename std::common_type<T, Tp>::type avg(T const& x, Tp const& y) noexcept
147+
{
148+
using common_type = typename std::common_type<T, Tp>::type;
149+
if (std::is_floating_point<common_type>::value)
150+
return (x + y) / 2;
151+
else if (std::is_unsigned<common_type>::value)
152+
{
153+
return (x & y) + ((x ^ y) >> 1);
154+
}
155+
else
156+
{
157+
// Inspired by
158+
// https://stackoverflow.com/questions/5697500/take-the-average-of-two-signed-numbers-in-c
159+
auto t = (x & y) + ((x ^ y) >> 1);
160+
auto t_u = static_cast<typename std::make_unsigned<common_type>::type>(t);
161+
auto avg = t + (static_cast<T>(t_u >> (8 * sizeof(T) - 1)) & (x ^ y));
162+
return avg;
163+
}
164+
}
165+
145166
template <class T>
146167
inline T incr(T const& x) noexcept
147168
{

include/xsimd/arch/xsimd_sse2.hpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ namespace xsimd
6060
inline batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<generic>) noexcept;
6161
template <class A, typename T, typename ITy, ITy... Indices>
6262
inline batch<T, A> shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<batch<ITy, A>, Indices...>, requires_arch<generic>) noexcept;
63+
template <class A, class T>
64+
inline batch<T, A> avg(batch<T, A> const&, batch<T, A> const&, requires_arch<generic>) noexcept;
6365

6466
// abs
6567
template <class A>
@@ -148,6 +150,26 @@ namespace xsimd
148150
return _mm_movemask_epi8(self) != 0;
149151
}
150152

153+
// avg
154+
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
155+
inline batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse2>) noexcept
156+
{
157+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
158+
{
159+
auto adj = ((self ^ other) << 7) >> 7;
160+
return batch<T, A>(_mm_avg_epu8(self, other)) - adj;
161+
}
162+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
163+
{
164+
auto adj = ((self ^ other) << 15) >> 15;
165+
return batch<T, A>(_mm_avg_epu16(self, other)) - adj;
166+
}
167+
else
168+
{
169+
return avg(self, other, generic {});
170+
}
171+
}
172+
151173
// batch_bool_cast
152174
template <class A, class T_out, class T_in>
153175
inline batch_bool<T_out, A> batch_bool_cast(batch_bool<T_in, A> const& self, batch_bool<T_out, A> const&, requires_arch<sse2>) noexcept

include/xsimd/arch/xsimd_wasm.hpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ namespace xsimd
3737
inline batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<generic>) noexcept;
3838
template <class A, typename T, typename ITy, ITy... Indices>
3939
inline batch<T, A> shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<batch<ITy, A>, Indices...>, requires_arch<generic>) noexcept;
40+
template <class A, class T>
41+
inline batch<T, A> avg(batch<T, A> const&, batch<T, A> const&, requires_arch<generic>) noexcept;
4042

4143
// abs
4244
template <class A, class T, typename std::enable_if<std::is_integral<T>::value && std::is_signed<T>::value, void>::type>
@@ -116,6 +118,26 @@ namespace xsimd
116118
return wasm_f64x2_add(self, other);
117119
}
118120

121+
// avg
122+
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
123+
inline batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
124+
{
125+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
126+
{
127+
auto adj = ((self ^ other) << 7) >> 7;
128+
return batch<T, A>(wasm_u8x16_avgr(self, other)) - adj;
129+
}
130+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
131+
{
132+
auto adj = ((self ^ other) << 15) >> 15;
133+
return batch<T, A>(wasm_u16x8_avgr(self, other)) - adj;
134+
}
135+
else
136+
{
137+
return avg(self, other, generic {});
138+
}
139+
}
140+
119141
// all
120142
template <class A>
121143
inline bool all(batch_bool<float, A> const& self, requires_arch<wasm>) noexcept

include/xsimd/types/xsimd_api.hpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,21 @@ namespace xsimd
202202
return kernel::atanh<A>(x, A {});
203203
}
204204

205+
/**
206+
* @ingroup batch_math
207+
*
208+
* Computes the average of batches \c x and \c y
209+
* @param x batch of T
210+
* @param y batch of T
211+
* @return the average of elements between \c x and \c y.
212+
*/
213+
template <class T, class A>
214+
inline batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y) noexcept
215+
{
216+
detail::static_check_supported_config<T, A>();
217+
return kernel::avg<A>(x, y, A {});
218+
}
219+
205220
/**
206221
* @ingroup batch_conversion
207222
*

test/test_batch.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,26 @@ struct batch_test
737737
}
738738
}
739739

740+
void test_avg() const
741+
{
742+
array_type expected;
743+
std::transform(lhs.cbegin(), lhs.cend(), rhs.cbegin(), expected.begin(),
744+
[](const value_type& l, const value_type& r) -> value_type
745+
{
746+
if (std::is_integral<value_type>::value)
747+
{
748+
return ((long long)l + r) / 2;
749+
}
750+
else
751+
{
752+
return (l + r) / 2;
753+
}
754+
});
755+
batch_type res = avg(batch_lhs(), batch_rhs());
756+
INFO("avg");
757+
CHECK_BATCH_EQ(res, expected);
758+
}
759+
740760
void test_horizontal_operations() const
741761
{
742762
// reduce_add
@@ -938,6 +958,11 @@ TEST_CASE_TEMPLATE("[batch]", B, BATCH_TYPES)
938958
Test.test_abs();
939959
}
940960

961+
SUBCASE("avg")
962+
{
963+
Test.test_avg();
964+
}
965+
941966
SUBCASE("horizontal_operations")
942967
{
943968
Test.test_horizontal_operations();

test/test_xsimd_api.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,13 @@ struct xsimd_api_all_types_functions
11561156
CHECK_EQ(extract(xsimd::add(T(val0), T(val1))), val0 + val1);
11571157
}
11581158

1159+
void test_avg()
1160+
{
1161+
value_type val0(1);
1162+
value_type val1(3);
1163+
CHECK_EQ(extract(xsimd::avg(T(val0), T(val1))), (val0 + val1) / value_type(2));
1164+
}
1165+
11591166
void test_decr()
11601167
{
11611168
value_type val0(1);

0 commit comments

Comments
 (0)