Skip to content

Commit fc8a187

Browse files
committed
removed templated shuffle
1 parent f7116e7 commit fc8a187

File tree

6 files changed

+13
-52
lines changed

6 files changed

+13
-52
lines changed

.github/workflows/emulated.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
- { compiler: 'clang', version: '16'}
1818
steps:
1919
- name: Setup compiler
20-
if: ${{ matrix.sys.compiler == 'gcc' }}
20+
if: ${{ matrix.sys.compiler == 'gcc' }}
2121
run: |
2222
GCC_VERSION=${{ matrix.sys.version }}
2323
sudo apt-get update
@@ -31,7 +31,7 @@ jobs:
3131
- name: Setup compiler
3232
if: ${{ matrix.sys.compiler == 'clang' }}
3333
run: |
34-
LLVM_VERSION=${{ matrix.sys.version }}
34+
LLVM_VERSION=${{ matrix.sys.version }}
3535
sudo apt-get update || exit 1
3636
sudo apt-get --no-install-suggests --no-install-recommends install clang-$LLVM_VERSION || exit 1
3737
sudo apt-get --no-install-suggests --no-install-recommends install g++ g++-multilib || exit 1
@@ -49,7 +49,7 @@ jobs:
4949
- name: Configure build
5050
env:
5151
CC: ${{ env.CC }}
52-
CXX: ${{ env.CXX }}
52+
CXX: ${{ env.CXX }}
5353
run: |
5454
5555
mkdir _build

.github/workflows/linux.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
- { compiler: 'clang', version: '18', flags: 'avx512' }
3030
steps:
3131
- name: Setup compiler
32-
if: ${{ matrix.sys.compiler == 'gcc' }}
32+
if: ${{ matrix.sys.compiler == 'gcc' }}
3333
run: |
3434
GCC_VERSION=${{ matrix.sys.version }}
3535
sudo apt-get update
@@ -45,7 +45,7 @@ jobs:
4545
- name: Setup compiler
4646
if: ${{ matrix.sys.compiler == 'clang' }}
4747
run: |
48-
LLVM_VERSION=${{ matrix.sys.version }}
48+
LLVM_VERSION=${{ matrix.sys.version }}
4949
sudo apt-get update || exit 1
5050
sudo apt-get --no-install-suggests --no-install-recommends install clang-$LLVM_VERSION || exit 1
5151
sudo apt-get --no-install-suggests --no-install-recommends install g++ g++-multilib || exit 1
@@ -66,7 +66,7 @@ jobs:
6666
- name: Configure build
6767
env:
6868
CC: ${{ env.CC }}
69-
CXX: ${{ env.CXX }}
69+
CXX: ${{ env.CXX }}
7070
run: |
7171
if [[ '${{ matrix.sys.flags }}' == 'enable_xtl_complex' ]]; then
7272
CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DENABLE_XTL_COMPLEX=ON"

include/xsimd/arch/common/xsimd_common_swizzle.hpp

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -204,41 +204,6 @@ namespace xsimd
204204
static_assert(no_duplicates_v<0, 1, 2, 3, 4, 5, 6, 7>(), "N=8: [0..7] → distinct");
205205
static_assert(!no_duplicates_v<0, 1, 2, 3, 4, 5, 6, 0>(), "N=8: last repeats 0");
206206

207-
// ────────────────────────────────────────────────────────────────────────
208-
// ────── log2 for powers of 2 ──────
209-
template <std::size_t N>
210-
struct log2_c
211-
{
212-
static_assert(N > 0 && (N & (N - 1)) == 0, "N must be power of 2");
213-
static constexpr std::size_t value = 1 + log2_c<N / 2>::value;
214-
};
215-
template <>
216-
struct log2_c<1>
217-
{
218-
static constexpr std::size_t value = 0;
219-
};
220-
221-
// ────── Recursive encoder ──────
222-
template <std::size_t I, std::size_t N, std::size_t SHIFT, uint32_t... Values>
223-
struct shuffle_impl
224-
{
225-
static constexpr uint32_t value = (get_nth_value<I, Values...>::value << (I * SHIFT)) | shuffle_impl<I + 1, N, SHIFT, Values...>::value;
226-
};
227-
template <std::size_t N, std::size_t SHIFT, uint32_t... Values>
228-
struct shuffle_impl<N, N, SHIFT, Values...>
229-
{
230-
static constexpr uint32_t value = 0;
231-
};
232-
template <uint32_t... Values>
233-
XSIMD_INLINE constexpr std::uint32_t shuffle() noexcept
234-
{
235-
return shuffle_impl<0, sizeof...(Values), log2_c<sizeof...(Values)>::value, Values...>::value;
236-
}
237-
template <uint32_t... Values>
238-
XSIMD_INLINE constexpr std::uint32_t mod_shuffle() noexcept
239-
{
240-
return shuffle<(Values % sizeof...(Values))...>();
241-
}
242207
} // namespace detail
243208
} // namespace kernel
244209
} // namespace xsimd

include/xsimd/arch/xsimd_avx.hpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,7 +1058,7 @@ namespace xsimd
10581058
template <class A, class T, class _ = typename std::enable_if<(sizeof(T) <= 2), void>::type>
10591059
XSIMD_INLINE T reduce_max(batch<T, A> const& self, requires_arch<avx>) noexcept
10601060
{
1061-
constexpr auto mask = detail::shuffle<1, 0>();
1061+
constexpr auto mask = detail::shuffle(1, 0);
10621062
batch<T, A> step = _mm256_permute2f128_si256(self, self, mask);
10631063
batch<T, A> acc = max(self, step);
10641064
__m128i low = _mm256_castsi256_si128(acc);
@@ -1069,7 +1069,7 @@ namespace xsimd
10691069
template <class A, class T, class _ = typename std::enable_if<(sizeof(T) <= 2), void>::type>
10701070
XSIMD_INLINE T reduce_min(batch<T, A> const& self, requires_arch<avx>) noexcept
10711071
{
1072-
constexpr auto mask = detail::shuffle<1, 0>();
1072+
constexpr auto mask = detail::shuffle(1, 0);
10731073
batch<T, A> step = _mm256_permute2f128_si256(self, self, mask);
10741074
batch<T, A> acc = min(self, step);
10751075
__m128i low = _mm256_castsi256_si128(acc);
@@ -1214,7 +1214,7 @@ namespace xsimd
12141214
template <class A, class ITy, ITy I0, ITy I1, ITy I2, ITy I3, ITy I4, ITy I5, ITy I6, ITy I7>
12151215
XSIMD_INLINE batch<float, A> shuffle(batch<float, A> const& x, batch<float, A> const& y, batch_constant<ITy, A, I0, I1, I2, I3, I4, I5, I6, I7> mask, requires_arch<avx>) noexcept
12161216
{
1217-
constexpr uint32_t smask = detail::mod_shuffle<I0, I1, I2, I3>();
1217+
constexpr uint32_t smask = detail::mod_shuffle(I0, I1, I2, I3);
12181218
// shuffle within lane
12191219
if (I4 == (I0 + 4) && I5 == (I1 + 4) && I6 == (I2 + 4) && I7 == (I3 + 4) && I0 < 4 && I1 < 4 && I2 >= 8 && I2 < 12 && I3 >= 8 && I3 < 12)
12201220
return _mm256_shuffle_ps(x, y, smask);
@@ -1488,7 +1488,7 @@ namespace xsimd
14881488
auto split = _mm256_permute2f128_ps(self, self, control);
14891489
if (!is_dup_identity)
14901490
{
1491-
constexpr auto shuffle_mask = is_dup_low ? detail::mod_shuffle<V0, V1, V2, V3>() : detail::mod_shuffle<V4 - 4, V5 - 4, V6 - 4, V7 - 4>();
1491+
constexpr auto shuffle_mask = is_dup_low ? detail::mod_shuffle(V0, V1, V2, V3) : detail::mod_shuffle(V4 - 4, V5 - 4, V6 - 4, V7 - 4);
14921492
split = _mm256_permute_ps(split, shuffle_mask);
14931493
}
14941494
return split;
@@ -1910,4 +1910,4 @@ namespace xsimd
19101910
}
19111911
}
19121912

1913-
#endif
1913+
#endif

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -964,15 +964,15 @@ namespace xsimd
964964
constexpr auto imm = ((V0 & 1) << 0) | ((V1 & 1) << 1) | ((V2 & 1) << 2) | ((V3 & 1) << 3);
965965
return _mm256_permute_pd(self, imm);
966966
}
967-
constexpr auto imm = detail::mod_shuffle<V0, V1, V2, V3>();
967+
constexpr auto imm = detail::mod_shuffle(V0, V1, V2, V3);
968968
// fallback to full 4-element permute
969969
return _mm256_permute4x64_pd(self, imm);
970970
}
971971

972972
template <class A, uint64_t V0, uint64_t V1, uint64_t V2, uint64_t V3>
973973
XSIMD_INLINE batch<uint64_t, A> swizzle(batch<uint64_t, A> const& self, batch_constant<uint64_t, A, V0, V1, V2, V3>, requires_arch<avx2>) noexcept
974974
{
975-
constexpr auto mask = detail::mod_shuffle<V0, V1, V2, V3>();
975+
constexpr auto mask = detail::mod_shuffle(V0, V1, V2, V3);
976976
return _mm256_permute4x64_epi64(self, mask);
977977
}
978978
template <class A, uint64_t V0, uint64_t V1, uint64_t V2, uint64_t V3>

include/xsimd/arch/xsimd_common_fwd.hpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,6 @@ namespace xsimd
5050
XSIMD_INLINE constexpr bool is_dup_lo(batch_constant<T, A, Vs...>) noexcept;
5151
template <typename T, class A, T... Vs>
5252
XSIMD_INLINE constexpr bool is_dup_hi(batch_constant<T, A, Vs...>) noexcept;
53-
template <uint32_t... Values>
54-
XSIMD_INLINE constexpr std::uint32_t shuffle() noexcept;
55-
template <uint32_t... Values>
56-
XSIMD_INLINE constexpr std::uint32_t mod_shuffle() noexcept;
5753
template <typename T, class A, T... Vs>
5854
XSIMD_INLINE constexpr bool is_cross_lane(batch_constant<T, A, Vs...>) noexcept;
5955
template <typename T, class A, T... Vs>

0 commit comments

Comments
 (0)