Skip to content

Commit c586776

Browse files
WIP
1 parent ddfad22 commit c586776

File tree

1 file changed

+41
-170
lines changed

1 file changed

+41
-170
lines changed

include/xsimd/arch/xsimd_altivec.hpp

Lines changed: 41 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -65,59 +65,23 @@ namespace xsimd
6565
XSIMD_INLINE batch<T, A> avg(batch<T, A> const&, batch<T, A> const&, requires_arch<common>) noexcept;
6666
template <class A, class T>
6767
XSIMD_INLINE batch<T, A> avgr(batch<T, A> const&, batch<T, A> const&, requires_arch<common>) noexcept;
68+
#endif
6869

6970
// abs
7071
template <class A>
71-
XSIMD_INLINE batch<double, A> abs(batch<double, A> const& self, requires_arch<sse2>) noexcept
72-
{
73-
__m128d sign_mask = _mm_set1_pd(-0.f); // -0.f = 1 << 31
74-
return _mm_andnot_pd(sign_mask, self);
75-
}
76-
template <class A>
77-
XSIMD_INLINE batch<float, A> abs(batch<float, A> const& self, requires_arch<sse2>) noexcept
72+
XSIMD_INLINE batch<float, A> abs(batch<float, A> const& self, requires_arch<altivec>) noexcept
7873
{
79-
__m128 sign_mask = _mm_set1_ps(-0.f); // -0.f = 1 << 31
80-
return _mm_andnot_ps(sign_mask, self);
74+
return vec_abs(self);
8175
}
8276

8377
// add
84-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
85-
XSIMD_INLINE batch<T, A> add(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse2>) noexcept
78+
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
79+
XSIMD_INLINE batch<T, A> add(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
8680
{
87-
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
88-
{
89-
return _mm_add_epi8(self, other);
90-
}
91-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
92-
{
93-
return _mm_add_epi16(self, other);
94-
}
95-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
96-
{
97-
return _mm_add_epi32(self, other);
98-
}
99-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
100-
{
101-
return _mm_add_epi64(self, other);
102-
}
103-
else
104-
{
105-
assert(false && "unsupported arch/op combination");
106-
return {};
107-
}
81+
return vec_add(self, other);
10882
}
10983

110-
template <class A>
111-
XSIMD_INLINE batch<float, A> add(batch<float, A> const& self, batch<float, A> const& other, requires_arch<sse2>) noexcept
112-
{
113-
return _mm_add_ps(self, other);
114-
}
115-
116-
template <class A>
117-
XSIMD_INLINE batch<double, A> add(batch<double, A> const& self, batch<double, A> const& other, requires_arch<sse2>) noexcept
118-
{
119-
return _mm_add_pd(self, other);
120-
}
84+
#if 0
12185

12286
// all
12387
template <class A>
@@ -923,40 +887,22 @@ namespace xsimd
923887
return _mm_cmpunord_pd(self, self);
924888
}
925889

890+
#endif
926891
// load_aligned
927-
template <class A>
928-
XSIMD_INLINE batch<float, A> load_aligned(float const* mem, convert<float>, requires_arch<sse2>) noexcept
929-
{
930-
return _mm_load_ps(mem);
931-
}
932-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
933-
XSIMD_INLINE batch<T, A> load_aligned(T const* mem, convert<T>, requires_arch<sse2>) noexcept
892+
template <class A, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
893+
XSIMD_INLINE batch<T, A> load_aligned(T const* mem, convert<T>, requires_arch<altivec>) noexcept
934894
{
935-
return _mm_load_si128((__m128i const*)mem);
936-
}
937-
template <class A>
938-
XSIMD_INLINE batch<double, A> load_aligned(double const* mem, convert<double>, requires_arch<sse2>) noexcept
939-
{
940-
return _mm_load_pd(mem);
895+
return vec_ld(0, mem);
941896
}
942897

943898
// load_unaligned
944-
template <class A>
945-
XSIMD_INLINE batch<float, A> load_unaligned(float const* mem, convert<float>, requires_arch<sse2>) noexcept
899+
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
900+
XSIMD_INLINE batch<T, A> load_unaligned(T const* mem, convert<T>, requires_arch<altivec>) noexcept
946901
{
947-
return _mm_loadu_ps(mem);
948-
}
949-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
950-
XSIMD_INLINE batch<T, A> load_unaligned(T const* mem, convert<T>, requires_arch<sse2>) noexcept
951-
{
952-
return _mm_loadu_si128((__m128i const*)mem);
953-
}
954-
template <class A>
955-
XSIMD_INLINE batch<double, A> load_unaligned(double const* mem, convert<double>, requires_arch<sse2>) noexcept
956-
{
957-
return _mm_loadu_pd(mem);
902+
return *(typename batch<T, A>::register_type)mem;
958903
}
959904

905+
#if 0
960906
// load_complex
961907
namespace detail
962908
{
@@ -972,6 +918,8 @@ namespace xsimd
972918
return { _mm_shuffle_pd(hi, lo, _MM_SHUFFLE2(0, 0)), _mm_shuffle_pd(hi, lo, _MM_SHUFFLE2(1, 1)) };
973919
}
974920
}
921+
#endif
922+
#if 0
975923

976924
// le
977925
template <class A>
@@ -1435,42 +1383,14 @@ namespace xsimd
14351383
return _mm_srli_si128(x, N);
14361384
}
14371385

1386+
#endif
14381387
// sadd
1439-
1440-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1441-
XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse2>) noexcept
1388+
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
1389+
XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
14421390
{
1443-
if (std::is_signed<T>::value)
1444-
{
1445-
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1446-
{
1447-
return _mm_adds_epi8(self, other);
1448-
}
1449-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1450-
{
1451-
return _mm_adds_epi16(self, other);
1452-
}
1453-
else
1454-
{
1455-
return sadd(self, other, common {});
1456-
}
1457-
}
1458-
else
1459-
{
1460-
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1461-
{
1462-
return _mm_adds_epu8(self, other);
1463-
}
1464-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1465-
{
1466-
return _mm_adds_epu16(self, other);
1467-
}
1468-
else
1469-
{
1470-
return sadd(self, other, common {});
1471-
}
1472-
}
1391+
return vec_adds(self, other);
14731392
}
1393+
#if 0
14741394

14751395
// set
14761396
template <class A, class... Values>
@@ -1527,88 +1447,39 @@ namespace xsimd
15271447
static_assert(sizeof...(Values) == batch_bool<double, A>::size, "consistent init");
15281448
return _mm_castsi128_pd(set(batch<int64_t, A>(), A {}, static_cast<int64_t>(values ? -1LL : 0LL)...).data);
15291449
}
1450+
#endif
15301451

15311452
// ssub
15321453

1533-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1534-
XSIMD_INLINE batch<T, A> ssub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse2>) noexcept
1454+
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
1455+
XSIMD_INLINE batch<T, A> ssub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
15351456
{
1536-
if (std::is_signed<T>::value)
1537-
{
1538-
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1539-
{
1540-
return _mm_subs_epi8(self, other);
1541-
}
1542-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1543-
{
1544-
return _mm_subs_epi16(self, other);
1545-
}
1546-
else
1547-
{
1548-
return ssub(self, other, common {});
1549-
}
1550-
}
1551-
else
1552-
{
1553-
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1554-
{
1555-
return _mm_subs_epu8(self, other);
1556-
}
1557-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1558-
{
1559-
return _mm_subs_epu16(self, other);
1560-
}
1561-
else
1562-
{
1563-
return ssub(self, other, common {});
1564-
}
1565-
}
1457+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1458+
{
1459+
return vec_subs(self, other);
1460+
}
1461+
else
1462+
{
1463+
return ssub(self, other, common {});
1464+
}
15661465
}
15671466

1467+
15681468
// store_aligned
1569-
template <class A>
1570-
XSIMD_INLINE void store_aligned(float* mem, batch<float, A> const& self, requires_arch<sse2>) noexcept
1571-
{
1572-
return _mm_store_ps(mem, self);
1573-
}
1574-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1575-
XSIMD_INLINE void store_aligned(T* mem, batch<T, A> const& self, requires_arch<sse2>) noexcept
1469+
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
1470+
XSIMD_INLINE void store_aligned(T* mem, batch<T, A> const& self, requires_arch<altivec>) noexcept
15761471
{
1577-
return _mm_store_si128((__m128i*)mem, self);
1578-
}
1579-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1580-
XSIMD_INLINE void store_aligned(T* mem, batch_bool<T, A> const& self, requires_arch<sse2>) noexcept
1581-
{
1582-
return _mm_store_si128((__m128i*)mem, self);
1583-
}
1584-
template <class A>
1585-
XSIMD_INLINE void store_aligned(double* mem, batch<double, A> const& self, requires_arch<sse2>) noexcept
1586-
{
1587-
return _mm_store_pd(mem, self);
1472+
return vec_st(self, 0, mem);
15881473
}
15891474

15901475
// store_unaligned
1591-
template <class A>
1592-
XSIMD_INLINE void store_unaligned(float* mem, batch<float, A> const& self, requires_arch<sse2>) noexcept
1593-
{
1594-
return _mm_storeu_ps(mem, self);
1595-
}
1596-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1597-
XSIMD_INLINE void store_unaligned(T* mem, batch<T, A> const& self, requires_arch<sse2>) noexcept
1476+
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
1477+
XSIMD_INLINE void store_unaligned(T* mem, batch<T, A> const& self, requires_arch<altivec>) noexcept
15981478
{
1599-
return _mm_storeu_si128((__m128i*)mem, self);
1600-
}
1601-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1602-
XSIMD_INLINE void store_unaligned(T* mem, batch_bool<T, A> const& self, requires_arch<sse2>) noexcept
1603-
{
1604-
return _mm_storeu_si128((__m128i*)mem, self);
1605-
}
1606-
template <class A>
1607-
XSIMD_INLINE void store_unaligned(double* mem, batch<double, A> const& self, requires_arch<sse2>) noexcept
1608-
{
1609-
return _mm_storeu_pd(mem, self);
1479+
*(typename batch<T, A>::register_type)mem = self;
16101480
}
16111481

1482+
#if 0
16121483
// sub
16131484
template <class A>
16141485
XSIMD_INLINE batch<float, A> sub(batch<float, A> const& self, batch<float, A> const& other, requires_arch<sse2>) noexcept

0 commit comments

Comments
 (0)