Skip to content

Commit 540e0d2

Browse files
WIP
1 parent d66759f commit 540e0d2

File tree

1 file changed

+12
-12
lines changed

1 file changed

+12
-12
lines changed

include/xsimd/arch/xsimd_altivec.hpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -414,23 +414,23 @@ namespace xsimd
414414
return vec_cmpgt(self, other);
415415
}
416416

417-
#if 0
418-
419417
// haddp
420418
template <class A>
421419
XSIMD_INLINE batch<float, A> haddp(batch<float, A> const* row, requires_arch<altivec>) noexcept
422420
{
423-
__m128 tmp0 = _mm_unpacklo_ps(row[0], row[1]);
424-
__m128 tmp1 = _mm_unpackhi_ps(row[0], row[1]);
425-
__m128 tmp2 = _mm_unpackhi_ps(row[2], row[3]);
426-
tmp0 = _mm_add_ps(tmp0, tmp1);
427-
tmp1 = _mm_unpacklo_ps(row[2], row[3]);
428-
tmp1 = _mm_add_ps(tmp1, tmp2);
429-
tmp2 = _mm_movehl_ps(tmp1, tmp0);
430-
tmp0 = _mm_movelh_ps(tmp0, tmp1);
431-
return _mm_add_ps(tmp0, tmp2);
421+
auto tmp0 = vec_mergee(row[0], row[1]); // v00 v10 v02 v12
422+
auto tmp1 = vec_mergeo(row[0], row[1]); // v01 v11 v03 v13
423+
auto tmp4 = vec_add(tmp0, tmp1); // (v00 + v01, v10 + v11, v02 + v03, v12 + v13)
424+
425+
auto tmp2 = vec_mergee(row[2], row[3]); // v20 v30 v22 v32
426+
auto tmp3 = vec_mergeo(row[2], row[3]); // v21 v31 v23 v33
427+
auto tmp5 = vec_add(tmp0, tmp1); // (v20 + v21, v30 + v31, v22 + v23, v32 + v33)
428+
429+
auto tmp6 = vec_permi(tmp4, tmp5, 0x0); // (v00 + v01, v10 + v11, v20 + v21, v30 + v31
430+
auto tmp7 = vec_permi(tmp4, tmp5, 0x3); // (v02 + v03, v12 + v13, v12 + v13, v32 + v33)
431+
432+
return vec_add(tmp6, tmp7);
432433
}
433-
#endif
434434

435435
// incr_if
436436
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>

0 commit comments

Comments
 (0)