@@ -34,9 +34,6 @@ namespace cpp {
3434
3535namespace internal {
3636
37- template <typename T>
38- using get_as_integer_type_t = unsigned _BitInt (sizeof (T) * CHAR_BIT);
39-
4037#if defined(LIBC_TARGET_CPU_HAS_AVX512F)
4138template <typename T>
4239LIBC_INLINE_VAR constexpr size_t native_vector_size = 64 / sizeof (T);
@@ -50,10 +47,6 @@ LIBC_INLINE_VAR constexpr size_t native_vector_size = 16 / sizeof(T);
5047template <typename T> LIBC_INLINE constexpr size_t native_vector_size = 1 ;
5148#endif
5249
53- template <typename T> LIBC_INLINE constexpr T poison () {
54- return __builtin_nondeterministic_value (T ());
55- }
56-
5750} // namespace internal
5851
5952// Type aliases.
@@ -64,6 +57,74 @@ using simd = T [[clang::ext_vector_type(N)]];
6457template <typename T>
6558using simd_mask = simd<bool , internal::native_vector_size<T>>;
6659
60+ namespace internal {
61+
62+ template <typename T>
63+ using get_as_integer_type_t = unsigned _BitInt (sizeof (T) * CHAR_BIT);
64+
65+ template <typename T> LIBC_INLINE constexpr T poison () {
66+ return __builtin_nondeterministic_value (T ());
67+ }
68+
69+ template <typename T, size_t N, size_t OriginalSize, size_t ... Indices>
70+ LIBC_INLINE constexpr static cpp::simd<T, sizeof ...(Indices)>
71+ extend (cpp::simd<T, N> x, cpp::index_sequence<Indices...>) {
72+ return __builtin_shufflevector (
73+ x, x, (Indices < OriginalSize ? static_cast <int >(Indices) : -1 )...);
74+ }
75+
76+ template <typename T, size_t N, size_t TargetSize, size_t OriginalSize>
77+ LIBC_INLINE constexpr static auto extend (cpp::simd<T, N> x) {
78+ // Recursively resize an input vector to the target size, increasing its size
79+ // by at most double the input size each step.
80+ if constexpr (N == TargetSize)
81+ return x;
82+ else if constexpr (TargetSize <= 2 * N)
83+ return extend<T, N, TargetSize>(x, cpp::make_index_sequence<TargetSize>{});
84+ else
85+ return extend<T, 2 * N, TargetSize, OriginalSize>(
86+ extend<T, N, 2 * N>(x, cpp::make_index_sequence<2 * N>{}));
87+ }
88+
89+ template <typename T, size_t N, size_t M, size_t ... Indices>
90+ LIBC_INLINE constexpr static cpp::simd<T, N + M>
91+ concat (cpp::simd<T, N> x, cpp::simd<T, M> y, cpp::index_sequence<Indices...>) {
92+ constexpr size_t Length = (N > M ? N : M);
93+ auto remap = [](size_t idx) -> int {
94+ if (idx < N)
95+ return static_cast <int >(idx);
96+ if (idx < N + M)
97+ return static_cast <int >((idx - N) + Length);
98+ return -1 ;
99+ };
100+
101+ // Extend the input vectors until they are the same size, then use the indices
102+ // to shuffle in only the indices that correspond to the original values.
103+ auto x_ext = extend<T, N, Length, N>(x);
104+ auto y_ext = extend<T, M, Length, M>(y);
105+ return __builtin_shufflevector (x_ext, y_ext, remap (Indices)...);
106+ }
107+
108+ template <typename T, size_t N, size_t Count, size_t Offset, size_t ... Indices>
109+ LIBC_INLINE constexpr static cpp::simd<T, Count>
110+ slice (cpp::simd<T, N> x, cpp::index_sequence<Indices...>) {
111+ return __builtin_shufflevector (x, x, (Offset + Indices)...);
112+ }
113+
114+ template <typename T, size_t N, size_t Offset, size_t Head, size_t ... Tail>
115+ LIBC_INLINE constexpr static auto split (cpp::simd<T, N> x) {
116+ // Recursively splits the input vector by walking the variadic template list,
117+ // increasing our current head each call.
118+ auto first = cpp::make_tuple (
119+ slice<T, N, Head, Offset>(x, cpp::make_index_sequence<Head>{}));
120+ if constexpr (sizeof ...(Tail) > 0 )
121+ return cpp::tuple_cat (first, split<T, N, Offset + Head, Tail...>(x));
122+ else
123+ return first;
124+ }
125+
126+ } // namespace internal
127+
67128// Type trait helpers.
68129template <typename T>
69130struct simd_size : cpp::integral_constant<size_t , __builtin_vectorelements(T)> {
@@ -276,58 +337,6 @@ LIBC_INLINE constexpr static simd<T, N> select(simd<bool, N> m, simd<T, N> x,
276337 return m ? x : y;
277338}
278339
279- namespace internal {
280- template <typename T, size_t N, size_t O, size_t ... I>
281- LIBC_INLINE constexpr static cpp::simd<T, sizeof ...(I)>
282- extend (cpp::simd<T, N> x, cpp::index_sequence<I...>) {
283- return __builtin_shufflevector (x, x, (I < O ? static_cast <int >(I) : -1 )...);
284- }
285- template <typename T, size_t N, size_t M, size_t O>
286- LIBC_INLINE constexpr static auto extend (cpp::simd<T, N> x) {
287- if constexpr (N == M)
288- return x;
289- else if constexpr (M <= 2 * N)
290- return extend<T, N, M>(x, cpp::make_index_sequence<M>{});
291- else
292- return extend<T, 2 * N, M, O>(
293- extend<T, N, 2 * N>(x, cpp::make_index_sequence<2 * N>{}));
294- }
295- template <typename T, size_t N, size_t M, size_t ... I>
296- LIBC_INLINE constexpr static cpp::simd<T, N + M>
297- concat (cpp::simd<T, N> x, cpp::simd<T, M> y, cpp::index_sequence<I...>) {
298- constexpr size_t L = (N > M ? N : M);
299-
300- auto x_ext = extend<T, N, L, N>(x);
301- auto y_ext = extend<T, M, L, M>(y);
302-
303- auto remap = [](size_t idx) -> int {
304- if (idx < N)
305- return static_cast <int >(idx);
306- if (idx < N + M)
307- return static_cast <int >((idx - N) + L);
308- return -1 ;
309- };
310-
311- return __builtin_shufflevector (x_ext, y_ext, remap (I)...);
312- }
313-
314- template <typename T, size_t N, size_t Count, size_t Offset, size_t ... I>
315- LIBC_INLINE constexpr static cpp::simd<T, Count>
316- slice (cpp::simd<T, N> x, cpp::index_sequence<I...>) {
317- return __builtin_shufflevector (x, x, (Offset + I)...);
318- }
319- template <typename T, size_t N, size_t Offset, size_t Head, size_t ... Tail>
320- LIBC_INLINE constexpr static auto split (cpp::simd<T, N> x) {
321- auto first = cpp::make_tuple (
322- slice<T, N, Head, Offset>(x, cpp::make_index_sequence<Head>{}));
323- if constexpr (sizeof ...(Tail) > 0 )
324- return cpp::tuple_cat (first, split<T, N, Offset + Head, Tail...>(x));
325- else
326- return first;
327- }
328-
329- } // namespace internal
330-
331340// Shuffling helpers.
332341template <typename T, size_t N, size_t M>
333342LIBC_INLINE constexpr static auto concat (cpp::simd<T, N> x, cpp::simd<T, M> y) {
0 commit comments