Skip to content

Commit 3ee1682

Browse files
WIP
1 parent 540e0d2 commit 3ee1682

File tree

1 file changed

+21
-25
lines changed

1 file changed

+21
-25
lines changed

include/xsimd/arch/xsimd_altivec.hpp

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -278,12 +278,14 @@ namespace xsimd
278278
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
279279
XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
280280
{
281-
return vec_cmpeq(self.data, other.data);
281+
auto res = vec_cmpeq(self.data, other.data);
282+
return *reinterpret_cast<typename batch_bool<T, A>::register_type*>(&res);
282283
}
283284
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
284285
XSIMD_INLINE batch_bool<T, A> eq(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<altivec>) noexcept
285286
{
286-
return vec_cmpeq(self.data, other.data);
287+
auto res = vec_cmpeq(self.data, other.data);
288+
return *reinterpret_cast<typename batch_bool<T, A>::register_type*>(&res);
287289
}
288290

289291
// first
@@ -793,23 +795,21 @@ namespace xsimd
793795
{
794796
return vec_sqrt(val);
795797
}
796-
#if 0
797798

798799
// slide_left
799800
template <size_t N, class A, class T>
800801
XSIMD_INLINE batch<T, A> slide_left(batch<T, A> const& x, requires_arch<altivec>) noexcept
801802
{
802-
return _mm_slli_si128(x, N);
803+
return vec_sll(x, vec_splat_u8(N));
803804
}
804805

805806
// slide_right
806807
template <size_t N, class A, class T>
807808
XSIMD_INLINE batch<T, A> slide_right(batch<T, A> const& x, requires_arch<altivec>) noexcept
808809
{
809-
return _mm_srli_si128(x, N);
810+
return vec_srl(x, vec_splat_u8(N));
810811
}
811812

812-
#endif
813813
// sadd
814814
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
815815
XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
@@ -819,12 +819,18 @@ namespace xsimd
819819

820820
// set
821821
template <class A, class T, class... Values>
822-
XSIMD_INLINE batch<float, A> set(batch<T, A> const&, requires_arch<altivec>, Values... values) noexcept
822+
XSIMD_INLINE batch<T, A> set(batch<T, A> const&, requires_arch<altivec>, Values... values) noexcept
823823
{
824824
static_assert(sizeof...(Values) == batch<T, A>::size, "consistent init");
825825
return typename batch<T, A>::register_type { values... };
826826
}
827827

828+
template <class A, class T, class... Values, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
829+
XSIMD_INLINE batch_bool<T, A> set(batch_bool<T, A> const&, requires_arch<altivec>, Values... values) noexcept
830+
{
831+
return set(batch<T, A>(), A {}, static_cast<T>(values ? -1LL : 0LL)...).data;
832+
}
833+
828834
// ssub
829835

830836
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
@@ -851,24 +857,14 @@ namespace xsimd
851857
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
852858
XSIMD_INLINE void store_unaligned(T* mem, batch<T, A> const& self, requires_arch<altivec>) noexcept
853859
{
854-
// From: https://stackoverflow.com/questions/35317341/how-to-store-a-vector-to-an-unaligned-location-in-memory-with-altivec
855-
// Load the surrounding area
856-
auto low = vec_ld(0, mem);
857-
auto high = vec_ld(16, mem);
858-
// Prepare the constants that we need
859-
auto permuteVector = vec_lvsr(0, (int*)mem);
860-
auto oxFF = vec_splat_s8(-1);
861-
auto ox00 = vec_splat_s8(0);
862-
// Make a mask for which parts of the vectors to swap out
863-
auto mask = vec_perm(ox00, oxFF, permuteVector);
864-
// Right rotate our input data
865-
v = vec_perm(self, self, permuteVector);
866-
// Insert our data into the low and high vectors
867-
low = vec_sel(self, low, mask);
868-
high = vec_sel(high, self, mask);
869-
// Store the two aligned result vectors
870-
vec_st(low, 0, mem);
871-
vec_st(high, 16, mem);
860+
auto tmp = vec_perm(*reinterpret_cast<__vector unsigned char>(&self.data), *reinterpret_cast<__vector unsigned char>(&self.data), vec_lvsr(0, (unsigned char*)mem));
861+
vec_ste((__vector unsigned char)tmp, 0, (unsigned char*)mem);
862+
vec_ste((__vector unsigned short)tmp, 1, (unsigned short*)mem);
863+
vec_ste((__vector unsigned int)tmp, 3, (unsigned int*)mem);
864+
vec_ste((__vector unsigned int)tmp, 4, (unsigned int*)mem);
865+
vec_ste((__vector unsigned int)tmp, 8, (unsigned int*)mem);
866+
vec_ste((__vector unsigned int)tmp, 12, (unsigned int*)mem);
867+
vec_ste((__vector unsigned short)tmp, 14, (unsigned short*)mem);
872868
}
873869

874870
// sub

0 commit comments

Comments
 (0)