Skip to content

Commit eabed46

Browse files
WIP
1 parent ced83cf commit eabed46

File tree

3 files changed

+24
-30
lines changed

3 files changed

+24
-30
lines changed

include/xsimd/arch/common/xsimd_common_memory.hpp

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -391,28 +391,6 @@ namespace xsimd
391391
}
392392

393393
// Scatter with runtime indexes.
394-
namespace detail
395-
{
396-
template <size_t N, typename T, typename A, typename U, typename V, typename std::enable_if<N == 0, int>::type = 0>
397-
XSIMD_INLINE void scatter(batch<T, A> const& src, U* dst,
398-
batch<V, A> const& index,
399-
::xsimd::index<N> I) noexcept
400-
{
401-
dst[index.get(I)] = static_cast<U>(src.get(I));
402-
}
403-
404-
template <size_t N, typename T, typename A, typename U, typename V, typename std::enable_if<N != 0, int>::type = 0>
405-
XSIMD_INLINE void
406-
scatter(batch<T, A> const& src, U* dst, batch<V, A> const& index,
407-
::xsimd::index<N> I) noexcept
408-
{
409-
static_assert(N <= batch<V, A>::size, "Incorrect value in recursion!");
410-
411-
kernel::detail::scatter<N - 1, T, A, U, V>(
412-
src, dst, index, {});
413-
dst[index.get(I)] = static_cast<U>(src.get(I));
414-
}
415-
} // namespace detail
416394

417395
template <typename A, typename T, typename V>
418396
XSIMD_INLINE void
@@ -422,8 +400,13 @@ namespace xsimd
422400
{
423401
static_assert(batch<T, A>::size == batch<V, A>::size,
424402
"Source and index sizes must match");
425-
kernel::detail::scatter<batch<V, A>::size - 1, T, A, T, V>(
426-
src, dst, index, {});
403+
alignas(A::alignment()) T src_buffer[batch<T, A>::size];
404+
kernel::store_aligned<A>(&src_buffer[0], batch<T, A>(src), A {});
405+
406+
alignas(A::alignment()) V index_buffer[batch<T, A>::size];
407+
kernel::store_aligned<A>(&index_buffer[0], batch<V, A>(index), A {});
408+
for (size_t i = 0; i < batch<T, A>::size; ++i)
409+
dst[index_buffer[i]] = src_buffer[i];
427410
}
428411

429412
template <typename A, typename T, typename U, typename V>
@@ -434,8 +417,13 @@ namespace xsimd
434417
{
435418
static_assert(batch<T, A>::size == batch<V, A>::size,
436419
"Source and index sizes must match");
437-
kernel::detail::scatter<batch<V, A>::size - 1, T, A, U, V>(
438-
src, dst, index, {});
420+
alignas(A::alignment()) T src_buffer[batch<T, A>::size];
421+
kernel::store_aligned<A>(&src_buffer[0], batch<T, A>(src), A {});
422+
423+
alignas(A::alignment()) V index_buffer[batch<T, A>::size];
424+
kernel::store_aligned<A>(&index_buffer[0], batch<V, A>(index), A {});
425+
for (size_t i = 0; i < batch<T, A>::size; ++i)
426+
dst[index_buffer[i]] = src_buffer[i];
439427
}
440428

441429
template <typename A, typename T, typename U, typename V>
@@ -447,7 +435,13 @@ namespace xsimd
447435
static_assert(batch<T, A>::size == batch<V, A>::size,
448436
"Source and index sizes must match");
449437
const auto tmp = batch_cast<U>(src);
450-
kernel::scatter<A>(tmp, dst, index, A {});
438+
alignas(A::alignment()) U src_buffer[batch<T, A>::size];
439+
kernel::store_aligned<A>(&src_buffer[0], batch<U, A>(tmp), A {});
440+
441+
alignas(A::alignment()) V index_buffer[batch<T, A>::size];
442+
kernel::store_aligned<A>(&index_buffer[0], batch<V, A>(index), A {});
443+
for (size_t i = 0; i < batch<T, A>::size; ++i)
444+
dst[index_buffer[i]] = src_buffer[i];
451445
}
452446

453447
// shuffle

include/xsimd/arch/xsimd_altivec.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ namespace xsimd
518518
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
519519
XSIMD_INLINE batch<T, A> load_unaligned(T const* mem, convert<T>, requires_arch<altivec>) noexcept
520520
{
521-
return *(typename batch<T, A>::register_type const*)mem;
521+
return vec_xl(0, (typename batch<T, A>::register_type const*)mem);
522522
}
523523

524524
// load_complex
@@ -925,7 +925,7 @@ namespace xsimd
925925
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
926926
XSIMD_INLINE void store_unaligned(T* mem, batch<T, A> const& self, requires_arch<altivec>) noexcept
927927
{
928-
*(typename batch<T, A>::register_type*)mem = self.data;
928+
return vec_xst(self.data, 0, reinterpret_cast<typename batch<T, A>::register_type*>(mem));
929929
}
930930

931931
// sub

test/test_memory.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ TEST_CASE("[is_aligned]")
4646
float f[100];
4747
void* unaligned_f = static_cast<void*>(&f[0]);
4848
constexpr std::size_t alignment = xsimd::default_arch::alignment();
49-
std::size_t aligned_f_size;
49+
std::size_t aligned_f_size = sizeof(f);
5050
void* aligned_f = std::align(alignment, sizeof(f), unaligned_f, aligned_f_size);
5151
CHECK_UNARY(xsimd::is_aligned(aligned_f));
5252

0 commit comments

Comments
 (0)