16
16
#include " hdr/stdint_proxy.h"
17
17
#include " src/__support/CPP/algorithm.h"
18
18
#include " src/__support/CPP/limits.h"
19
+ #include " src/__support/CPP/tuple.h"
19
20
#include " src/__support/CPP/type_traits.h"
21
+ #include " src/__support/CPP/utility/integer_sequence.h"
20
22
#include " src/__support/macros/attributes.h"
21
23
#include " src/__support/macros/config.h"
22
24
@@ -32,9 +34,6 @@ namespace cpp {
32
34
33
35
namespace internal {
34
36
35
- template <typename T>
36
- using get_as_integer_type_t = unsigned _BitInt (sizeof (T) * CHAR_BIT);
37
-
38
37
#if defined(LIBC_TARGET_CPU_HAS_AVX512F)
39
38
template <typename T>
40
39
LIBC_INLINE_VAR constexpr size_t native_vector_size = 64 / sizeof (T);
@@ -48,9 +47,6 @@ LIBC_INLINE_VAR constexpr size_t native_vector_size = 16 / sizeof(T);
48
47
template <typename T> LIBC_INLINE constexpr size_t native_vector_size = 1 ;
49
48
#endif
50
49
51
- template <typename T> LIBC_INLINE constexpr T poison () {
52
- return __builtin_nondeterministic_value (T ());
53
- }
54
50
} // namespace internal
55
51
56
52
// Type aliases.
@@ -61,6 +57,74 @@ using simd = T [[clang::ext_vector_type(N)]];
61
57
template <typename T>
62
58
using simd_mask = simd<bool , internal::native_vector_size<T>>;
63
59
60
+ namespace internal {
61
+
62
+ template <typename T>
63
+ using get_as_integer_type_t = unsigned _BitInt (sizeof (T) * CHAR_BIT);
64
+
65
+ template <typename T> LIBC_INLINE constexpr T poison () {
66
+ return __builtin_nondeterministic_value (T ());
67
+ }
68
+
69
+ template <typename T, size_t N, size_t OriginalSize, size_t ... Indices>
70
+ LIBC_INLINE constexpr static cpp::simd<T, sizeof ...(Indices)>
71
+ extend (cpp::simd<T, N> x, cpp::index_sequence<Indices...>) {
72
+ return __builtin_shufflevector (
73
+ x, x, (Indices < OriginalSize ? static_cast <int >(Indices) : -1 )...);
74
+ }
75
+
76
+ template <typename T, size_t N, size_t TargetSize, size_t OriginalSize>
77
+ LIBC_INLINE constexpr static auto extend (cpp::simd<T, N> x) {
78
+ // Recursively resize an input vector to the target size, increasing its size
79
+ // by at most double the input size each step due to shufflevector limitation.
80
+ if constexpr (N == TargetSize)
81
+ return x;
82
+ else if constexpr (TargetSize <= 2 * N)
83
+ return extend<T, N, TargetSize>(x, cpp::make_index_sequence<TargetSize>{});
84
+ else
85
+ return extend<T, 2 * N, TargetSize, OriginalSize>(
86
+ extend<T, N, 2 * N>(x, cpp::make_index_sequence<2 * N>{}));
87
+ }
88
+
89
+ template <typename T, size_t N, size_t M, size_t ... Indices>
90
+ LIBC_INLINE constexpr static cpp::simd<T, N + M>
91
+ concat (cpp::simd<T, N> x, cpp::simd<T, M> y, cpp::index_sequence<Indices...>) {
92
+ constexpr size_t Size = cpp::max (N, M);
93
+ auto remap = [](size_t idx) -> int {
94
+ if (idx < N)
95
+ return static_cast <int >(idx);
96
+ if (idx < N + M)
97
+ return static_cast <int >((idx - N) + Size);
98
+ return -1 ;
99
+ };
100
+
101
+ // Extend the input vectors until they are the same size, then use the indices
102
+ // to shuffle in only the indices that correspond to the original values.
103
+ auto x_ext = extend<T, N, Size, N>(x);
104
+ auto y_ext = extend<T, M, Size, M>(y);
105
+ return __builtin_shufflevector (x_ext, y_ext, remap (Indices)...);
106
+ }
107
+
108
+ template <typename T, size_t N, size_t Count, size_t Offset, size_t ... Indices>
109
+ LIBC_INLINE constexpr static cpp::simd<T, Count>
110
+ slice (cpp::simd<T, N> x, cpp::index_sequence<Indices...>) {
111
+ return __builtin_shufflevector (x, x, (Offset + Indices)...);
112
+ }
113
+
114
+ template <typename T, size_t N, size_t Offset, size_t Head, size_t ... Tail>
115
+ LIBC_INLINE constexpr static auto split (cpp::simd<T, N> x) {
116
+ // Recursively splits the input vector by walking the variadic template list,
117
+ // increasing our current head each call.
118
+ auto result = cpp::make_tuple (
119
+ slice<T, N, Head, Offset>(x, cpp::make_index_sequence<Head>{}));
120
+ if constexpr (sizeof ...(Tail) > 0 )
121
+ return cpp::tuple_cat (result, split<T, N, Offset + Head, Tail...>(x));
122
+ else
123
+ return result;
124
+ }
125
+
126
+ } // namespace internal
127
+
64
128
// Type trait helpers.
65
129
template <typename T>
66
130
struct simd_size : cpp::integral_constant<size_t , __builtin_vectorelements(T)> {
@@ -273,6 +337,25 @@ LIBC_INLINE constexpr static simd<T, N> select(simd<bool, N> m, simd<T, N> x,
273
337
return m ? x : y;
274
338
}
275
339
340
+ // Shuffling helpers.
341
+ template <typename T, size_t N, size_t M>
342
+ LIBC_INLINE constexpr static auto concat (cpp::simd<T, N> x, cpp::simd<T, M> y) {
343
+ return internal::concat (x, y, make_index_sequence<N + M>{});
344
+ }
345
+ template <typename T, size_t N, size_t M, typename ... Rest>
346
+ LIBC_INLINE constexpr static auto concat (cpp::simd<T, N> x, cpp::simd<T, M> y,
347
+ Rest... rest) {
348
+ auto xy = concat (x, y);
349
+ if constexpr (sizeof ...(Rest))
350
+ return concat (xy, rest...);
351
+ else
352
+ return xy;
353
+ }
354
+ template <size_t ... Sizes, typename T, size_t N> auto split (cpp::simd<T, N> x) {
355
+ static_assert ((... + Sizes) == N, " split sizes must sum to vector size" );
356
+ return internal::split<T, N, 0 , Sizes...>(x);
357
+ }
358
+
276
359
// TODO: where expressions, scalar overloads, ABI types.
277
360
278
361
} // namespace cpp
0 commit comments