Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ jobs:
- { compiler: 'gcc', version: '13', flags: 'enable_xtl_complex' }
- { compiler: 'gcc', version: '14', flags: 'avx' }
- { compiler: 'gcc', version: '13', flags: 'avx512' }
- { compiler: 'gcc', version: '10', flags: 'avx512' }
- { compiler: 'gcc', version: '12', flags: 'i386' }
- { compiler: 'gcc', version: '13', flags: 'avx512pf' }
- { compiler: 'gcc', version: '13', flags: 'avx512vbmi' }
Expand Down
49 changes: 48 additions & 1 deletion include/xsimd/arch/common/xsimd_common_swizzle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,49 @@ namespace xsimd
return cross_impl<0, sizeof...(Vs), sizeof...(Vs) / 2, Vs...>::value;
}

// 128-bit lane aware cross_impl: checks per 128-bit lane
template <std::size_t I,
std::size_t N,
std::size_t LaneElems,
typename U,
U... Vs>
struct cross_impl128
{
static constexpr std::size_t Vi = static_cast<std::size_t>(get_at<U, I, Vs...>::value);
static constexpr bool curr = ((I / LaneElems) != (static_cast<std::size_t>(Vi) / LaneElems));
static constexpr bool next = cross_impl128<I + 1, N, LaneElems, U, Vs...>::value;
static constexpr bool value = curr || next;
};
template <std::size_t N, std::size_t LaneElems, typename U, U... Vs>
struct cross_impl128<N, N, LaneElems, U, Vs...>
{
static constexpr bool value = false;
};

template <typename ElemT, typename U, U... Vs>
XSIMD_INLINE constexpr bool is_cross_lane_128() noexcept
{
static_assert(sizeof...(Vs) >= 1, "Need at least one lane");
constexpr std::size_t N = sizeof...(Vs);
constexpr std::size_t lane_elems = 16 / sizeof(ElemT);
return cross_impl128<0, N, lane_elems, U, Vs...>::value;
}

// overload accepting an element type first to compute 128-bit lane size
template <typename ElemT, typename U, U... Vs>
XSIMD_INLINE constexpr bool is_cross_lane() noexcept
{
static_assert(std::is_integral<U>::value, "swizzle mask values must be integral");
return is_cross_lane_128<ElemT, U, Vs...>();
}

// convenience overload taking element type then integer non-type parameter pack
template <typename ElemT, std::size_t... Vs>
XSIMD_INLINE constexpr bool is_cross_lane() noexcept
{
return is_cross_lane_128<ElemT, std::size_t, Vs...>();
}

template <typename T, T... Vs>
XSIMD_INLINE constexpr bool is_identity() noexcept { return detail::identity_impl<0, T, Vs...>(); }
template <typename T, T... Vs>
Expand All @@ -184,7 +227,11 @@ namespace xsimd
template <typename T, class A, T... Vs>
XSIMD_INLINE constexpr bool is_only_from_hi(batch_constant<T, A, Vs...>) noexcept { return detail::is_only_from_hi<T, Vs...>(); }
template <typename T, class A, T... Vs>
XSIMD_INLINE constexpr bool is_cross_lane(batch_constant<T, A, Vs...>) noexcept { return detail::is_cross_lane<Vs...>(); }
XSIMD_INLINE constexpr bool is_cross_lane(batch_constant<T, A, Vs...>) noexcept
{
static_assert(std::is_integral<T>::value, "swizzle mask values must be integral");
return is_cross_lane_128<T, T, Vs...>();
}

} // namespace detail
} // namespace kernel
Expand Down
6 changes: 3 additions & 3 deletions include/xsimd/arch/xsimd_avx512f.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2737,15 +2737,15 @@ namespace xsimd
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
return static_cast<T>(_mm512_cvtsi512_si32(self) & 0xFF);
return static_cast<T>(_mm_cvtsi128_si32(_mm512_castsi512_si128(self)) & 0xFF);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
return static_cast<T>(_mm512_cvtsi512_si32(self) & 0xFFFF);
return static_cast<T>(_mm_cvtsi128_si32(_mm512_castsi512_si128(self)) & 0xFFFF);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{
return static_cast<T>(_mm512_cvtsi512_si32(self));
return static_cast<T>(_mm_cvtsi128_si32(_mm512_castsi512_si128(self)));
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{
Expand Down
23 changes: 18 additions & 5 deletions test/test_batch_manip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,24 @@ namespace xsimd
static_assert(is_dup_hi<std::uint32_t, 2, 3, 2, 3>(), "4-lane dup_hi failed");
static_assert(!is_dup_lo<std::uint32_t, 2, 3, 2, 3>(), "4-lane dup_lo on dup_hi");

static_assert(is_cross_lane<0, 1, 0, 1>(), "dup-lo only → crossing");
static_assert(is_cross_lane<2, 3, 2, 3>(), "dup-hi only → crossing");
static_assert(is_cross_lane<0, 3, 3, 3>(), "one low + rest high → crossing");
static_assert(!is_cross_lane<1, 0, 2, 3>(), "mixed low/high → no crossing");
static_assert(!is_cross_lane<0, 1, 2, 3>(), "mixed low/high → no crossing");
static_assert(is_cross_lane<double, 0, 1, 0, 1>(), "dup-lo only → crossing");
static_assert(is_cross_lane<double, 2, 3, 2, 3>(), "dup-hi only → crossing");
static_assert(is_cross_lane<double, 0, 3, 3, 3>(), "one low + rest high → crossing");
static_assert(!is_cross_lane<double, 1, 0, 2, 3>(), "mixed low/high → no crossing");
static_assert(!is_cross_lane<double, 0, 1, 2, 3>(), "mixed low/high → no crossing");
// 8-lane 128-bit lane checks (use double/int64 for 2-elements-per-128-bit lanes)
static_assert(is_cross_lane<double, 3, 2, 1, 0, 7, 6, 5, 4>(), "8-lane 128-bit swap → crossing");
static_assert(!is_cross_lane<double, 0, 1, 2, 3, 4, 5, 6, 7>(), "identity 8-lane → no crossing");
static_assert(is_cross_lane<std::uint64_t, 3, 2, 1, 0, 7, 6, 5, 4>(), "8-lane uint64_t swap → crossing");
static_assert(is_cross_lane<std::int32_t, 4, 5, 6, 7, 0, 1, 2, 3>(), "8-lane int32_t swap → crossing");

// Additional compile-time checks for 16-element batches (e.g. float/int32)
static_assert(is_cross_lane<float, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7>(),
"16-lane 128-bit swap → crossing");
static_assert(!is_cross_lane<float, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15>(),
"identity 16-lane → no crossing");
static_assert(is_cross_lane<std::uint32_t, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7>(),
"16-lane uint32_t swap → crossing");
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions test/test_shuffle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -672,10 +672,15 @@ struct shuffle_test
}
};

#if defined(__GNUC__) && (__GNUC__ == 10) && XSIMD_WITH_AVX512F
// Use zip_lo as a stable reference for the expected interleave.
B b_ref_lo = xsimd::zip_lo(b_lhs, b_rhs);
#else
std::array<value_type, size> ref_lo;
for (size_t i = 0; i < size; ++i)
ref_lo[i] = (i & 1) ? rhs[i / 2] : lhs[i / 2];
B b_ref_lo = B::load_unaligned(ref_lo.data());
#endif

INFO("zip_lo");
B b_res_lo = xsimd::shuffle(b_lhs, b_rhs, xsimd::make_batch_constant<mask_type, zip_lo_generator, arch_type>());
Expand All @@ -689,12 +694,17 @@ struct shuffle_test
}
};

#if defined(__GNUC__) && (__GNUC__ == 10) && XSIMD_WITH_AVX512F
// Use zip_hi as a stable reference for the expected interleave.
B b_ref_hi = xsimd::zip_hi(b_lhs, b_rhs);
#else
std::array<value_type, size> ref_hi;
for (size_t i = 0; i < size; ++i)
{
ref_hi[i] = (i & 1) ? rhs[size / 2 + i / 2] : lhs[size / 2 + i / 2];
}
B b_ref_hi = B::load_unaligned(ref_hi.data());
#endif

INFO("zip_hi");
B b_res_hi = xsimd::shuffle(b_lhs, b_rhs, xsimd::make_batch_constant<mask_type, zip_hi_generator, arch_type>());
Expand Down