Skip to content

Commit c63af92

Browse files
Harmonize constant batch type signature with non constant batch
batch<T, A> <> batch_constant<T, A, Csts...> batch_bool<T, A> <> batch_bool_constant<T, A, Csts...> This is a strong API (and ABI) change, but it makes the type system more harmonious.
1 parent f372568 commit c63af92

22 files changed

+255
-253
lines changed

docs/source/api/batch_manip.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Conditional expression
1616
.. doxygenfunction:: select(batch_bool<T, A> const &cond, batch<T, A> const &true_br, batch<T, A> const &false_br) noexcept
1717
:project: xsimd
1818

19-
.. doxygenfunction:: select(batch_bool_constant<batch<T, A>, Values...> const &cond, batch<T, A> const &true_br, batch<T, A> const &false_br) noexcept
19+
.. doxygenfunction:: select(batch_bool_constant<T, A, Values...> const &cond, batch<T, A> const &true_br, batch<T, A> const &false_br) noexcept
2020
:project: xsimd
2121

2222

include/xsimd/arch/generic/xsimd_generic_math.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2064,7 +2064,7 @@ namespace xsimd
20642064
inline T reduce(Op op, batch<T, A> const& self, std::integral_constant<unsigned, Lvl>) noexcept
20652065
{
20662066
using index_type = as_unsigned_integer_t<T>;
2067-
batch<T, A> split = swizzle(self, make_batch_constant<batch<index_type, A>, split_high<index_type, Lvl / 2>>());
2067+
batch<T, A> split = swizzle(self, make_batch_constant<index_type, A, split_high<index_type, Lvl / 2>>());
20682068
return reduce(op, op(split, self), std::integral_constant<unsigned, Lvl / 2>());
20692069
}
20702070
}

include/xsimd/arch/generic/xsimd_generic_memory.hpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121

2222
namespace xsimd
2323
{
24-
template <class batch_type, typename batch_type::value_type... Values>
24+
template <typename T, class A, T... Values>
2525
struct batch_constant;
2626

27-
template <class batch_type, bool... Values>
27+
template <typename T, class A, bool... Values>
2828
struct batch_bool_constant;
2929

3030
namespace kernel
@@ -180,7 +180,7 @@ namespace xsimd
180180
}
181181
};
182182
batch<T, A> tmp(val);
183-
return select(make_batch_bool_constant<batch<T, A>, index_mask>(), self, tmp);
183+
return select(make_batch_bool_constant<T, A, index_mask>(), self, tmp);
184184
}
185185

186186
// get
@@ -295,7 +295,7 @@ namespace xsimd
295295
}
296296
};
297297

298-
return swizzle(self, make_batch_constant<batch<as_unsigned_integer_t<T>, A>, rotate_generator>(), A {});
298+
return swizzle(self, make_batch_constant<as_unsigned_integer_t<T>, A, rotate_generator>(), A {});
299299
}
300300

301301
template <size_t N, class A, class T>
@@ -316,7 +316,7 @@ namespace xsimd
316316
}
317317
};
318318

319-
return swizzle(self, make_batch_constant<batch<as_unsigned_integer_t<T>, A>, rotate_generator>(), A {});
319+
return swizzle(self, make_batch_constant<as_unsigned_integer_t<T>, A, rotate_generator>(), A {});
320320
}
321321

322322
template <size_t N, class A, class T>
@@ -455,19 +455,19 @@ namespace xsimd
455455
}
456456

457457
template <class A, typename T, typename ITy, ITy... Indices>
458-
inline batch<T, A> shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<batch<ITy, A>, Indices...>, requires_arch<generic>) noexcept
458+
inline batch<T, A> shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<ITy, A, Indices...>, requires_arch<generic>) noexcept
459459
{
460460
constexpr size_t bsize = sizeof...(Indices);
461461

462462
// Detect common patterns
463463
XSIMD_IF_CONSTEXPR(detail::is_swizzle_fst(bsize, Indices...))
464464
{
465-
return swizzle(x, batch_constant<batch<ITy, A>, ((Indices >= bsize) ? 0 /* never happens */ : Indices)...>());
465+
return swizzle(x, batch_constant<ITy, A, ((Indices >= bsize) ? 0 /* never happens */ : Indices)...>());
466466
}
467467

468468
XSIMD_IF_CONSTEXPR(detail::is_swizzle_snd(bsize, Indices...))
469469
{
470-
return swizzle(y, batch_constant<batch<ITy, A>, ((Indices >= bsize) ? (Indices - bsize) : 0 /* never happens */)...>());
470+
return swizzle(y, batch_constant<ITy, A, ((Indices >= bsize) ? (Indices - bsize) : 0 /* never happens */)...>());
471471
}
472472

473473
XSIMD_IF_CONSTEXPR(detail::is_zip_lo(bsize, Indices...))
@@ -482,7 +482,7 @@ namespace xsimd
482482

483483
XSIMD_IF_CONSTEXPR(detail::is_select(bsize, Indices...))
484484
{
485-
return select(batch_bool_constant<batch<T, A>, (Indices < bsize)...>(), x, y);
485+
return select(batch_bool_constant<T, A, (Indices < bsize)...>(), x, y);
486486
}
487487

488488
#if defined(__has_builtin)
@@ -503,9 +503,9 @@ namespace xsimd
503503
#else
504504
// Use a generic_pattern. It is suboptimal but clang optimizes this
505505
// pretty well.
506-
batch<T, A> x_lane = swizzle(x, batch_constant<batch<ITy, A>, ((Indices >= bsize) ? (Indices - bsize) : Indices)...>());
507-
batch<T, A> y_lane = swizzle(y, batch_constant<batch<ITy, A>, ((Indices >= bsize) ? (Indices - bsize) : Indices)...>());
508-
batch_bool_constant<batch<T, A>, (Indices < bsize)...> select_x_lane;
506+
batch<T, A> x_lane = swizzle(x, batch_constant<ITy, A, ((Indices >= bsize) ? (Indices - bsize) : Indices)...>());
507+
batch<T, A> y_lane = swizzle(y, batch_constant<ITy, A, ((Indices >= bsize) ? (Indices - bsize) : Indices)...>());
508+
batch_bool_constant<T, A, (Indices < bsize)...> select_x_lane;
509509
return select(select_x_lane, x_lane, y_lane);
510510
#endif
511511
}
@@ -542,7 +542,7 @@ namespace xsimd
542542

543543
// swizzle
544544
template <class A, class T, class ITy, ITy... Vs>
545-
inline batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& self, batch_constant<batch<ITy, A>, Vs...> mask, requires_arch<generic>) noexcept
545+
inline batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& self, batch_constant<ITy, A, Vs...> mask, requires_arch<generic>) noexcept
546546
{
547547
return { swizzle(self.real(), mask), swizzle(self.imag(), mask) };
548548
}

include/xsimd/arch/xsimd_avx.hpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,22 +1161,22 @@ namespace xsimd
11611161
return detail::merge_sse(res_low, res_hi);
11621162
}
11631163
template <class A, class T, bool... Values, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1164-
inline batch<T, A> select(batch_bool_constant<batch<T, A>, Values...> const&, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<avx>) noexcept
1164+
inline batch<T, A> select(batch_bool_constant<T, A, Values...> const&, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<avx>) noexcept
11651165
{
11661166
return select(batch_bool<T, A> { Values... }, true_br, false_br, avx2 {});
11671167
}
11681168

11691169
template <class A, bool... Values>
1170-
inline batch<float, A> select(batch_bool_constant<batch<float, A>, Values...> const&, batch<float, A> const& true_br, batch<float, A> const& false_br, requires_arch<avx>) noexcept
1170+
inline batch<float, A> select(batch_bool_constant<float, A, Values...> const&, batch<float, A> const& true_br, batch<float, A> const& false_br, requires_arch<avx>) noexcept
11711171
{
1172-
constexpr auto mask = batch_bool_constant<batch<float, A>, Values...>::mask();
1172+
constexpr auto mask = batch_bool_constant<float, A, Values...>::mask();
11731173
return _mm256_blend_ps(false_br, true_br, mask);
11741174
}
11751175

11761176
template <class A, bool... Values>
1177-
inline batch<double, A> select(batch_bool_constant<batch<double, A>, Values...> const&, batch<double, A> const& true_br, batch<double, A> const& false_br, requires_arch<avx>) noexcept
1177+
inline batch<double, A> select(batch_bool_constant<double, A, Values...> const&, batch<double, A> const& true_br, batch<double, A> const& false_br, requires_arch<avx>) noexcept
11781178
{
1179-
constexpr auto mask = batch_bool_constant<batch<double, A>, Values...>::mask();
1179+
constexpr auto mask = batch_bool_constant<double, A, Values...>::mask();
11801180
return _mm256_blend_pd(false_br, true_br, mask);
11811181
}
11821182

@@ -1238,7 +1238,7 @@ namespace xsimd
12381238

12391239
// shuffle
12401240
template <class A, class ITy, ITy I0, ITy I1, ITy I2, ITy I3, ITy I4, ITy I5, ITy I6, ITy I7>
1241-
inline batch<float, A> shuffle(batch<float, A> const& x, batch<float, A> const& y, batch_constant<batch<ITy, A>, I0, I1, I2, I3, I4, I5, I6, I7> mask, requires_arch<avx>) noexcept
1241+
inline batch<float, A> shuffle(batch<float, A> const& x, batch<float, A> const& y, batch_constant<ITy, A, I0, I1, I2, I3, I4, I5, I6, I7> mask, requires_arch<avx>) noexcept
12421242
{
12431243
constexpr uint32_t smask = detail::mod_shuffle(I0, I1, I2, I3);
12441244
// shuffle within lane
@@ -1253,7 +1253,7 @@ namespace xsimd
12531253
}
12541254

12551255
template <class A, class ITy, ITy I0, ITy I1, ITy I2, ITy I3>
1256-
inline batch<double, A> shuffle(batch<double, A> const& x, batch<double, A> const& y, batch_constant<batch<ITy, A>, I0, I1, I2, I3> mask, requires_arch<avx>) noexcept
1256+
inline batch<double, A> shuffle(batch<double, A> const& x, batch<double, A> const& y, batch_constant<ITy, A, I0, I1, I2, I3> mask, requires_arch<avx>) noexcept
12571257
{
12581258
constexpr uint32_t smask = (I0 & 0x1) | ((I1 & 0x1) << 1) | ((I2 & 0x1) << 2) | ((I3 & 0x1) << 3);
12591259
// shuffle within lane
@@ -1504,7 +1504,7 @@ namespace xsimd
15041504

15051505
// swizzle (constant mask)
15061506
template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3, uint32_t V4, uint32_t V5, uint32_t V6, uint32_t V7>
1507-
inline batch<float, A> swizzle(batch<float, A> const& self, batch_constant<batch<uint32_t, A>, V0, V1, V2, V3, V4, V5, V6, V7>, requires_arch<avx>) noexcept
1507+
inline batch<float, A> swizzle(batch<float, A> const& self, batch_constant<uint32_t, A, V0, V1, V2, V3, V4, V5, V6, V7>, requires_arch<avx>) noexcept
15081508
{
15091509
// duplicate low and high part of input
15101510
__m256 hi = _mm256_castps128_ps256(_mm256_extractf128_ps(self, 1));
@@ -1514,22 +1514,22 @@ namespace xsimd
15141514
__m256 low_low = _mm256_insertf128_ps(self, _mm256_castps256_ps128(low), 1);
15151515

15161516
// normalize mask
1517-
batch_constant<batch<uint32_t, A>, (V0 % 4), (V1 % 4), (V2 % 4), (V3 % 4), (V4 % 4), (V5 % 4), (V6 % 4), (V7 % 4)> half_mask;
1517+
batch_constant<uint32_t, A, (V0 % 4), (V1 % 4), (V2 % 4), (V3 % 4), (V4 % 4), (V5 % 4), (V6 % 4), (V7 % 4)> half_mask;
15181518

15191519
// permute within each lane
15201520
__m256 r0 = _mm256_permutevar_ps(low_low, (batch<uint32_t, A>)half_mask);
15211521
__m256 r1 = _mm256_permutevar_ps(hi_hi, (batch<uint32_t, A>)half_mask);
15221522

15231523
// mask to choose the right lane
1524-
batch_bool_constant<batch<uint32_t, A>, (V0 >= 4), (V1 >= 4), (V2 >= 4), (V3 >= 4), (V4 >= 4), (V5 >= 4), (V6 >= 4), (V7 >= 4)> blend_mask;
1524+
batch_bool_constant<uint32_t, A, (V0 >= 4), (V1 >= 4), (V2 >= 4), (V3 >= 4), (V4 >= 4), (V5 >= 4), (V6 >= 4), (V7 >= 4)> blend_mask;
15251525

15261526
// blend the two permutes
15271527
constexpr auto mask = blend_mask.mask();
15281528
return _mm256_blend_ps(r0, r1, mask);
15291529
}
15301530

15311531
template <class A, uint64_t V0, uint64_t V1, uint64_t V2, uint64_t V3>
1532-
inline batch<double, A> swizzle(batch<double, A> const& self, batch_constant<batch<uint64_t, A>, V0, V1, V2, V3>, requires_arch<avx>) noexcept
1532+
inline batch<double, A> swizzle(batch<double, A> const& self, batch_constant<uint64_t, A, V0, V1, V2, V3>, requires_arch<avx>) noexcept
15331533
{
15341534
// duplicate low and high part of input
15351535
__m256d hi = _mm256_castpd128_pd256(_mm256_extractf128_pd(self, 1));
@@ -1539,14 +1539,14 @@ namespace xsimd
15391539
__m256d low_low = _mm256_insertf128_pd(self, _mm256_castpd256_pd128(low), 1);
15401540

15411541
// normalize mask
1542-
batch_constant<batch<uint64_t, A>, (V0 % 2) * -1, (V1 % 2) * -1, (V2 % 2) * -1, (V3 % 2) * -1> half_mask;
1542+
batch_constant<uint64_t, A, (V0 % 2) * -1, (V1 % 2) * -1, (V2 % 2) * -1, (V3 % 2) * -1> half_mask;
15431543

15441544
// permute within each lane
15451545
__m256d r0 = _mm256_permutevar_pd(low_low, (batch<uint64_t, A>)half_mask);
15461546
__m256d r1 = _mm256_permutevar_pd(hi_hi, (batch<uint64_t, A>)half_mask);
15471547

15481548
// mask to choose the right lane
1549-
batch_bool_constant<batch<uint64_t, A>, (V0 >= 2), (V1 >= 2), (V2 >= 2), (V3 >= 2)> blend_mask;
1549+
batch_bool_constant<uint64_t, A, (V0 >= 2), (V1 >= 2), (V2 >= 2), (V3 >= 2)> blend_mask;
15501550

15511551
// blend the two permutes
15521552
constexpr auto mask = blend_mask.mask();
@@ -1564,7 +1564,7 @@ namespace xsimd
15641564
uint32_t V7,
15651565
detail::enable_sized_integral_t<T, 4> = 0>
15661566
inline batch<T, A> swizzle(batch<T, A> const& self,
1567-
batch_constant<batch<uint32_t, A>,
1567+
batch_constant<uint32_t, A,
15681568
V0,
15691569
V1,
15701570
V2,
@@ -1588,7 +1588,7 @@ namespace xsimd
15881588
detail::enable_sized_integral_t<T, 8> = 0>
15891589
inline batch<T, A>
15901590
swizzle(batch<T, A> const& self,
1591-
batch_constant<batch<uint64_t, A>, V0, V1, V2, V3> const& mask,
1591+
batch_constant<uint64_t, A, V0, V1, V2, V3> const& mask,
15921592
requires_arch<avx>) noexcept
15931593
{
15941594
return bitwise_cast<T>(

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -729,9 +729,9 @@ namespace xsimd
729729
}
730730
}
731731
template <class A, class T, bool... Values, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
732-
inline batch<T, A> select(batch_bool_constant<batch<T, A>, Values...> const&, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<avx2>) noexcept
732+
inline batch<T, A> select(batch_bool_constant<T, A, Values...> const&, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<avx2>) noexcept
733733
{
734-
constexpr int mask = batch_bool_constant<batch<T, A>, Values...>::mask();
734+
constexpr int mask = batch_bool_constant<T, A, Values...>::mask();
735735
// FIXME: for some reason mask here is not considered as an immediate,
736736
// but it's okay for _mm256_blend_epi32
737737
// case 2: return _mm256_blend_epi16(false_br, true_br, mask);
@@ -912,36 +912,36 @@ namespace xsimd
912912

913913
// swizzle (constant mask)
914914
template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3, uint32_t V4, uint32_t V5, uint32_t V6, uint32_t V7>
915-
inline batch<float, A> swizzle(batch<float, A> const& self, batch_constant<batch<uint32_t, A>, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<avx2>) noexcept
915+
inline batch<float, A> swizzle(batch<float, A> const& self, batch_constant<uint32_t, A, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<avx2>) noexcept
916916
{
917917
return _mm256_permutevar8x32_ps(self, (batch<uint32_t, A>)mask);
918918
}
919919

920920
template <class A, uint64_t V0, uint64_t V1, uint64_t V2, uint64_t V3>
921-
inline batch<double, A> swizzle(batch<double, A> const& self, batch_constant<batch<uint64_t, A>, V0, V1, V2, V3>, requires_arch<avx2>) noexcept
921+
inline batch<double, A> swizzle(batch<double, A> const& self, batch_constant<uint64_t, A, V0, V1, V2, V3>, requires_arch<avx2>) noexcept
922922
{
923923
constexpr auto mask = detail::shuffle(V0, V1, V2, V3);
924924
return _mm256_permute4x64_pd(self, mask);
925925
}
926926

927927
template <class A, uint64_t V0, uint64_t V1, uint64_t V2, uint64_t V3>
928-
inline batch<uint64_t, A> swizzle(batch<uint64_t, A> const& self, batch_constant<batch<uint64_t, A>, V0, V1, V2, V3>, requires_arch<avx2>) noexcept
928+
inline batch<uint64_t, A> swizzle(batch<uint64_t, A> const& self, batch_constant<uint64_t, A, V0, V1, V2, V3>, requires_arch<avx2>) noexcept
929929
{
930930
constexpr auto mask = detail::shuffle(V0, V1, V2, V3);
931931
return _mm256_permute4x64_epi64(self, mask);
932932
}
933933
template <class A, uint64_t V0, uint64_t V1, uint64_t V2, uint64_t V3>
934-
inline batch<int64_t, A> swizzle(batch<int64_t, A> const& self, batch_constant<batch<uint64_t, A>, V0, V1, V2, V3> mask, requires_arch<avx2>) noexcept
934+
inline batch<int64_t, A> swizzle(batch<int64_t, A> const& self, batch_constant<uint64_t, A, V0, V1, V2, V3> mask, requires_arch<avx2>) noexcept
935935
{
936936
return bitwise_cast<int64_t>(swizzle(bitwise_cast<uint64_t>(self), mask, avx2 {}));
937937
}
938938
template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3, uint32_t V4, uint32_t V5, uint32_t V6, uint32_t V7>
939-
inline batch<uint32_t, A> swizzle(batch<uint32_t, A> const& self, batch_constant<batch<uint32_t, A>, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<avx2>) noexcept
939+
inline batch<uint32_t, A> swizzle(batch<uint32_t, A> const& self, batch_constant<uint32_t, A, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<avx2>) noexcept
940940
{
941941
return _mm256_permutevar8x32_epi32(self, (batch<uint32_t, A>)mask);
942942
}
943943
template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3, uint32_t V4, uint32_t V5, uint32_t V6, uint32_t V7>
944-
inline batch<int32_t, A> swizzle(batch<int32_t, A> const& self, batch_constant<batch<uint32_t, A>, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<avx2>) noexcept
944+
inline batch<int32_t, A> swizzle(batch<int32_t, A> const& self, batch_constant<uint32_t, A, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<avx2>) noexcept
945945
{
946946
return bitwise_cast<int32_t>(swizzle(bitwise_cast<uint32_t>(self), mask, avx2 {}));
947947
}

include/xsimd/arch/xsimd_avx512bw.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -617,25 +617,25 @@ namespace xsimd
617617

618618
// swizzle (static version)
619619
template <class A, uint16_t... Vs>
620-
inline batch<uint16_t, A> swizzle(batch<uint16_t, A> const& self, batch_constant<batch<uint16_t, A>, Vs...> mask, requires_arch<avx512bw>) noexcept
620+
inline batch<uint16_t, A> swizzle(batch<uint16_t, A> const& self, batch_constant<uint16_t, A, Vs...> mask, requires_arch<avx512bw>) noexcept
621621
{
622622
return swizzle(self, (batch<uint16_t, A>)mask, avx512bw {});
623623
}
624624

625625
template <class A, uint16_t... Vs>
626-
inline batch<int16_t, A> swizzle(batch<int16_t, A> const& self, batch_constant<batch<uint16_t, A>, Vs...> mask, requires_arch<avx512bw>) noexcept
626+
inline batch<int16_t, A> swizzle(batch<int16_t, A> const& self, batch_constant<uint16_t, A, Vs...> mask, requires_arch<avx512bw>) noexcept
627627
{
628628
return swizzle(self, (batch<uint16_t, A>)mask, avx512bw {});
629629
}
630630

631631
template <class A, uint8_t... Vs>
632-
inline batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch_constant<batch<uint8_t, A>, Vs...> mask, requires_arch<avx512bw>) noexcept
632+
inline batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch_constant<uint8_t, A, Vs...> mask, requires_arch<avx512bw>) noexcept
633633
{
634634
return swizzle(self, (batch<uint8_t, A>)mask, avx512bw {});
635635
}
636636

637637
template <class A, uint8_t... Vs>
638-
inline batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch_constant<batch<uint8_t, A>, Vs...> mask, requires_arch<avx512bw>) noexcept
638+
inline batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch_constant<uint8_t, A, Vs...> mask, requires_arch<avx512bw>) noexcept
639639
{
640640
return swizzle(self, (batch<uint8_t, A>)mask, avx512bw {});
641641
}

0 commit comments

Comments
 (0)