Skip to content

Commit d66759f

Browse files
++
1 parent 2de3a6b commit d66759f

File tree

1 file changed

+13
-41
lines changed

1 file changed

+13
-41
lines changed

include/xsimd/arch/xsimd_altivec.hpp

Lines changed: 13 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,12 @@ namespace xsimd
122122
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
123123
XSIMD_INLINE batch<T, A> bitwise_and(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
124124
{
125-
return vec_and(self, other);
125+
return vec_and(self.data, other.data);
126126
}
127127
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
128128
XSIMD_INLINE batch_bool<T, A> bitwise_and(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<altivec>) noexcept
129129
{
130-
return vec_and(self, other);
130+
return vec_and(self.data, other.data);
131131
}
132132

133133
// bitwise_andnot
@@ -239,6 +239,8 @@ namespace xsimd
239239
}
240240
}
241241

242+
#endif
243+
242244
// decr_if
243245
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
244246
XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<altivec>) noexcept
@@ -247,17 +249,14 @@ namespace xsimd
247249
}
248250

249251
// div
250-
template <class A>
251-
XSIMD_INLINE batch<float, A> div(batch<float, A> const& self, batch<float, A> const& other, requires_arch<altivec>) noexcept
252-
{
253-
return _mm_div_ps(self, other);
254-
}
255-
template <class A>
256-
XSIMD_INLINE batch<double, A> div(batch<double, A> const& self, batch<double, A> const& other, requires_arch<altivec>) noexcept
252+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
253+
XSIMD_INLINE batch<float, A> div(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
257254
{
258-
return _mm_div_pd(self, other);
255+
return vec_div(self, other);
259256
}
260257

258+
#if 0
259+
261260
// fast_cast
262261
namespace detail
263262
{
@@ -267,33 +266,6 @@ namespace xsimd
267266
return _mm_cvtepi32_ps(self);
268267
}
269268

270-
template <class A>
271-
XSIMD_INLINE batch<double, A> fast_cast(batch<uint64_t, A> const& x, batch<double, A> const&, requires_arch<altivec>) noexcept
272-
{
273-
// from https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx
274-
// adapted to altivec
275-
__m128i xH = _mm_srli_epi64(x, 32);
276-
xH = _mm_or_si128(xH, _mm_castpd_si128(_mm_set1_pd(19342813113834066795298816.))); // 2^84
277-
__m128i mask = _mm_setr_epi16(0xFFFF, 0xFFFF, 0x0000, 0x0000, 0xFFFF, 0xFFFF, 0x0000, 0x0000);
278-
__m128i xL = _mm_or_si128(_mm_and_si128(mask, x), _mm_andnot_si128(mask, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)))); // 2^52
279-
__m128d f = _mm_sub_pd(_mm_castsi128_pd(xH), _mm_set1_pd(19342813118337666422669312.)); // 2^84 + 2^52
280-
return _mm_add_pd(f, _mm_castsi128_pd(xL));
281-
}
282-
283-
template <class A>
284-
XSIMD_INLINE batch<double, A> fast_cast(batch<int64_t, A> const& x, batch<double, A> const&, requires_arch<altivec>) noexcept
285-
{
286-
// from https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx
287-
// adapted to altivec
288-
__m128i xH = _mm_srai_epi32(x, 16);
289-
xH = _mm_and_si128(xH, _mm_setr_epi16(0x0000, 0x0000, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0xFFFF, 0xFFFF));
290-
xH = _mm_add_epi64(xH, _mm_castpd_si128(_mm_set1_pd(442721857769029238784.))); // 3*2^67
291-
__m128i mask = _mm_setr_epi16(0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000);
292-
__m128i xL = _mm_or_si128(_mm_and_si128(mask, x), _mm_andnot_si128(mask, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)))); // 2^52
293-
__m128d f = _mm_sub_pd(_mm_castsi128_pd(xH), _mm_set1_pd(442726361368656609280.)); // 3*2^67 + 2^52
294-
return _mm_add_pd(f, _mm_castsi128_pd(xL));
295-
}
296-
297269
template <class A>
298270
XSIMD_INLINE batch<int32_t, A> fast_cast(batch<float, A> const& self, batch<int32_t, A> const&, requires_arch<altivec>) noexcept
299271
{
@@ -306,12 +278,12 @@ namespace xsimd
306278
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
307279
XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
308280
{
309-
return vec_cmpeq(self, other);
281+
return vec_cmpeq(self.data, other.data);
310282
}
311283
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
312284
XSIMD_INLINE batch_bool<T, A> eq(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<altivec>) noexcept
313285
{
314-
return vec_cmpeq(self, other);
286+
return vec_cmpeq(self.data, other.data);
315287
}
316288

317289
// first
@@ -881,8 +853,8 @@ namespace xsimd
881853
{
882854
// From: https://stackoverflow.com/questions/35317341/how-to-store-a-vector-to-an-unaligned-location-in-memory-with-altivec
883855
// Load the surrounding area
884-
auto low = vec_ld(0, dst);
885-
auto high = vec_ld(16, dst);
856+
auto low = vec_ld(0, mem);
857+
auto high = vec_ld(16, mem);
886858
// Prepare the constants that we need
887859
auto permuteVector = vec_lvsr(0, (int*)mem);
888860
auto oxFF = vec_splat_s8(-1);

0 commit comments

Comments
 (0)