@@ -65,59 +65,23 @@ namespace xsimd
6565 XSIMD_INLINE batch<T, A> avg(batch<T, A> const&, batch<T, A> const&, requires_arch<common>) noexcept;
6666 template <class A, class T>
6767 XSIMD_INLINE batch<T, A> avgr(batch<T, A> const&, batch<T, A> const&, requires_arch<common>) noexcept;
68+ #endif
6869
6970 // abs
7071 template <class A >
71- XSIMD_INLINE batch<double, A> abs(batch<double, A> const& self, requires_arch<sse2>) noexcept
72- {
73- __m128d sign_mask = _mm_set1_pd(-0.f); // -0.f = 1 << 31
74- return _mm_andnot_pd(sign_mask, self);
75- }
76- template <class A>
77- XSIMD_INLINE batch<float, A> abs(batch<float, A> const& self, requires_arch<sse2>) noexcept
72+ XSIMD_INLINE batch<float , A> abs (batch<float , A> const & self, requires_arch<altivec>) noexcept
7873 {
79- __m128 sign_mask = _mm_set1_ps(-0.f); // -0.f = 1 << 31
80- return _mm_andnot_ps(sign_mask, self);
74+ return vec_abs (self);
8175 }
8276
8377 // add
84- template <class A, class T, class = typename std::enable_if<std::is_integral <T>::value, void>::type>
85- XSIMD_INLINE batch<T, A> add(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse2 >) noexcept
78+ template <class A , class T , class = typename std::enable_if<std::is_scalar <T>::value, void >::type>
79+ XSIMD_INLINE batch<T, A> add (batch<T, A> const & self, batch<T, A> const & other, requires_arch<altivec >) noexcept
8680 {
87- XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
88- {
89- return _mm_add_epi8(self, other);
90- }
91- else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
92- {
93- return _mm_add_epi16(self, other);
94- }
95- else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
96- {
97- return _mm_add_epi32(self, other);
98- }
99- else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
100- {
101- return _mm_add_epi64(self, other);
102- }
103- else
104- {
105- assert(false && "unsupported arch/op combination");
106- return {};
107- }
81+ return vec_add (self, other);
10882 }
10983
110- template <class A>
111- XSIMD_INLINE batch<float, A> add(batch<float, A> const& self, batch<float, A> const& other, requires_arch<sse2>) noexcept
112- {
113- return _mm_add_ps(self, other);
114- }
115-
116- template <class A>
117- XSIMD_INLINE batch<double, A> add(batch<double, A> const& self, batch<double, A> const& other, requires_arch<sse2>) noexcept
118- {
119- return _mm_add_pd(self, other);
120- }
84+ #if 0
12185
12286 // all
12387 template <class A>
@@ -923,40 +887,22 @@ namespace xsimd
923887 return _mm_cmpunord_pd(self, self);
924888 }
925889
890+ #endif
926891 // load_aligned
927- template <class A>
928- XSIMD_INLINE batch<float, A> load_aligned(float const* mem, convert<float>, requires_arch<sse2>) noexcept
929- {
930- return _mm_load_ps(mem);
931- }
932- template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
933- XSIMD_INLINE batch<T, A> load_aligned(T const* mem, convert<T>, requires_arch<sse2>) noexcept
892+ template <class A , class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
893+ XSIMD_INLINE batch<T, A> load_aligned (T const * mem, convert<T>, requires_arch<altivec>) noexcept
934894 {
935- return _mm_load_si128((__m128i const*)mem);
936- }
937- template <class A>
938- XSIMD_INLINE batch<double, A> load_aligned(double const* mem, convert<double>, requires_arch<sse2>) noexcept
939- {
940- return _mm_load_pd(mem);
895+ return vec_ld (0 , mem);
941896 }
942897
943898 // load_unaligned
944- template <class A>
945- XSIMD_INLINE batch<float , A> load_unaligned(float const* mem, convert<float >, requires_arch<sse2 >) noexcept
899+ template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type >
900+ XSIMD_INLINE batch<T , A> load_unaligned (T const * mem, convert<T >, requires_arch<altivec >) noexcept
946901 {
947- return _mm_loadu_ps(mem);
948- }
949- template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
950- XSIMD_INLINE batch<T, A> load_unaligned(T const* mem, convert<T>, requires_arch<sse2>) noexcept
951- {
952- return _mm_loadu_si128((__m128i const*)mem);
953- }
954- template <class A>
955- XSIMD_INLINE batch<double, A> load_unaligned(double const* mem, convert<double>, requires_arch<sse2>) noexcept
956- {
957- return _mm_loadu_pd(mem);
902+ return *(typename batch<T, A>::register_type)mem;
958903 }
959904
905+ #if 0
960906 // load_complex
961907 namespace detail
962908 {
@@ -972,6 +918,8 @@ namespace xsimd
972918 return { _mm_shuffle_pd(hi, lo, _MM_SHUFFLE2(0, 0)), _mm_shuffle_pd(hi, lo, _MM_SHUFFLE2(1, 1)) };
973919 }
974920 }
921+ #endif
922+ #if 0
975923
976924 // le
977925 template <class A>
@@ -1435,42 +1383,14 @@ namespace xsimd
14351383 return _mm_srli_si128(x, N);
14361384 }
14371385
1386+ #endif
14381387 // sadd
1439-
1440- template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1441- XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse2>) noexcept
1388+ template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
1389+ XSIMD_INLINE batch<T, A> sadd (batch<T, A> const & self, batch<T, A> const & other, requires_arch<altivec>) noexcept
14421390 {
1443- if (std::is_signed<T>::value)
1444- {
1445- XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1446- {
1447- return _mm_adds_epi8(self, other);
1448- }
1449- else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1450- {
1451- return _mm_adds_epi16(self, other);
1452- }
1453- else
1454- {
1455- return sadd(self, other, common {});
1456- }
1457- }
1458- else
1459- {
1460- XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1461- {
1462- return _mm_adds_epu8(self, other);
1463- }
1464- else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1465- {
1466- return _mm_adds_epu16(self, other);
1467- }
1468- else
1469- {
1470- return sadd(self, other, common {});
1471- }
1472- }
1391+ return vec_adds (self, other);
14731392 }
1393+ #if 0
14741394
14751395 // set
14761396 template <class A, class... Values>
@@ -1527,88 +1447,39 @@ namespace xsimd
15271447 static_assert(sizeof...(Values) == batch_bool<double, A>::size, "consistent init");
15281448 return _mm_castsi128_pd(set(batch<int64_t, A>(), A {}, static_cast<int64_t>(values ? -1LL : 0LL)...).data);
15291449 }
1450+ #endif
15301451
15311452 // ssub
15321453
1533- template <class A, class T, class = typename std::enable_if<std::is_integral <T>::value, void>::type>
1534- XSIMD_INLINE batch<T, A> ssub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse2 >) noexcept
1454+ template <class A , class T , class = typename std::enable_if<std::is_scalar <T>::value, void >::type>
1455+ XSIMD_INLINE batch<T, A> ssub (batch<T, A> const & self, batch<T, A> const & other, requires_arch<altivec >) noexcept
15351456 {
1536- if (std::is_signed<T>::value)
1537- {
1538- XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1539- {
1540- return _mm_subs_epi8(self, other);
1541- }
1542- else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1543- {
1544- return _mm_subs_epi16(self, other);
1545- }
1546- else
1547- {
1548- return ssub(self, other, common {});
1549- }
1550- }
1551- else
1552- {
1553- XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1554- {
1555- return _mm_subs_epu8(self, other);
1556- }
1557- else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1558- {
1559- return _mm_subs_epu16(self, other);
1560- }
1561- else
1562- {
1563- return ssub(self, other, common {});
1564- }
1565- }
1457+ XSIMD_IF_CONSTEXPR (sizeof (T) == 1 )
1458+ {
1459+ return vec_subs (self, other);
1460+ }
1461+ else
1462+ {
1463+ return ssub (self, other, common {});
1464+ }
15661465 }
15671466
1467+
15681468 // store_aligned
1569- template <class A>
1570- XSIMD_INLINE void store_aligned(float* mem, batch<float, A> const& self, requires_arch<sse2>) noexcept
1571- {
1572- return _mm_store_ps(mem, self);
1573- }
1574- template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1575- XSIMD_INLINE void store_aligned(T* mem, batch<T, A> const& self, requires_arch<sse2>) noexcept
1469+ template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
1470+ XSIMD_INLINE void store_aligned (T* mem, batch<T, A> const & self, requires_arch<altivec>) noexcept
15761471 {
1577- return _mm_store_si128((__m128i*)mem, self);
1578- }
1579- template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1580- XSIMD_INLINE void store_aligned(T* mem, batch_bool<T, A> const& self, requires_arch<sse2>) noexcept
1581- {
1582- return _mm_store_si128((__m128i*)mem, self);
1583- }
1584- template <class A>
1585- XSIMD_INLINE void store_aligned(double* mem, batch<double, A> const& self, requires_arch<sse2>) noexcept
1586- {
1587- return _mm_store_pd(mem, self);
1472+ return vec_st (self, 0 , mem);
15881473 }
15891474
15901475 // store_unaligned
1591- template <class A>
1592- XSIMD_INLINE void store_unaligned(float* mem, batch<float, A> const& self, requires_arch<sse2>) noexcept
1593- {
1594- return _mm_storeu_ps(mem, self);
1595- }
1596- template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1597- XSIMD_INLINE void store_unaligned(T* mem, batch<T, A> const& self, requires_arch<sse2>) noexcept
1476+ template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
1477+ XSIMD_INLINE void store_unaligned (T* mem, batch<T, A> const & self, requires_arch<altivec>) noexcept
15981478 {
1599- return _mm_storeu_si128((__m128i*)mem, self);
1600- }
1601- template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1602- XSIMD_INLINE void store_unaligned(T* mem, batch_bool<T, A> const& self, requires_arch<sse2>) noexcept
1603- {
1604- return _mm_storeu_si128((__m128i*)mem, self);
1605- }
1606- template <class A>
1607- XSIMD_INLINE void store_unaligned(double* mem, batch<double, A> const& self, requires_arch<sse2>) noexcept
1608- {
1609- return _mm_storeu_pd(mem, self);
1479+ *(typename batch<T, A>::register_type)mem = self;
16101480 }
16111481
1482+ #if 0
16121483 // sub
16131484 template <class A>
16141485 XSIMD_INLINE batch<float, A> sub(batch<float, A> const& self, batch<float, A> const& other, requires_arch<sse2>) noexcept
0 commit comments