Skip to content

Commit ed7dac9

Browse files
committed
[libc] Implement generic SIMD helper 'simd.h' and implement strlen
Summary: This PR introduces a new 'simd.h' header that implements an interface similar to the proposed `stdx::simd` in C++. However, we instead wrap around the LLVM internal type. This makes heavy use of the clang vector extensions and boolean vectors, instead using primitive vector types instead of a class (many benefits to this). I use this interface to implement a generic strlen implementation, but propse we use this for math. Right now this requires a feature only introduced in clang-22.
1 parent ca14a8a commit ed7dac9

File tree

8 files changed

+313
-2
lines changed

8 files changed

+313
-2
lines changed

libc/src/__support/CPP/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,3 +210,9 @@ add_object_library(
210210
libc.src.__support.common
211211
libc.src.__support.macros.properties.os
212212
)
213+
214+
add_header_library(
215+
simd
216+
HDRS
217+
simd.h
218+
)

libc/src/__support/CPP/algorithm.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@
1818
namespace LIBC_NAMESPACE_DECL {
1919
namespace cpp {
2020

21+
template <class T = void> struct plus {};
22+
template <class T = void> struct multiplies {};
23+
template <class T = void> struct bit_and {};
24+
template <class T = void> struct bit_or {};
25+
template <class T = void> struct bit_xor {};
26+
2127
template <class T> LIBC_INLINE constexpr const T &max(const T &a, const T &b) {
2228
return (a < b) ? b : a;
2329
}

libc/src/__support/CPP/simd.h

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
//===-- Portable SIMD library similar to stdx::simd -------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file provides a generic interface into fixed-size SIMD instructions
10+
// using the clang vector type. The API shares some similarities with the
11+
// stdx::simd proposal, but instead chooses to use vectors as primitive types
12+
// with several extra helper functions.
13+
//
14+
//===----------------------------------------------------------------------===//
15+
16+
#include "hdr/stdint_proxy.h"
17+
#include "src/__support/CPP/algorithm.h"
18+
#include "src/__support/CPP/bit.h"
19+
#include "src/__support/CPP/type_traits/integral_constant.h"
20+
#include "src/__support/macros/attributes.h"
21+
#include "src/__support/macros/config.h"
22+
23+
#include <stddef.h>
24+
25+
#ifndef LLVM_LIBC_SRC___SUPPORT_CPP_SIMD_H
26+
#define LLVM_LIBC_SRC___SUPPORT_CPP_SIMD_H
27+
28+
namespace LIBC_NAMESPACE_DECL {
29+
namespace cpp {
30+
31+
static_assert(LIBC_HAS_VECTOR_TYPE, "compiler does not support vector types");
32+
33+
namespace internal {
34+
35+
template <size_t Size> struct get_as_integer_type;
36+
37+
template <> struct get_as_integer_type<1> {
38+
using type = uint8_t;
39+
};
40+
template <> struct get_as_integer_type<2> {
41+
using type = uint16_t;
42+
};
43+
template <> struct get_as_integer_type<4> {
44+
using type = uint32_t;
45+
};
46+
template <> struct get_as_integer_type<8> {
47+
using type = uint64_t;
48+
};
49+
50+
template <class T>
51+
using get_as_integer_type_t = typename get_as_integer_type<sizeof(T)>::type;
52+
53+
#if defined(LIBC_TARGET_CPU_HAS_AVX512F)
54+
template <typename T>
55+
inline constexpr size_t native_vector_size = 64 / sizeof(T);
56+
#elif defined(LIBC_TARGET_CPU_HAS_AVX2)
57+
template <typename T>
58+
inline constexpr size_t native_vector_size = 32 / sizeof(T);
59+
#elif defined(LIBC_TARGET_CPU_HAS_SSE2) || defined(LIBC_TARGET_CPU_HAS_ARM_NEON)
60+
template <typename T>
61+
inline constexpr size_t native_vector_size = 16 / sizeof(T);
62+
#else
63+
template <typename T> inline constexpr size_t native_vector_size = 1;
64+
#endif
65+
} // namespace internal
66+
67+
// Type aliases.
68+
template <typename T, size_t N>
69+
using fixed_size_simd = T [[clang::ext_vector_type(N)]];
70+
template <typename T, size_t N = internal::native_vector_size<T>>
71+
using simd = T [[clang::ext_vector_type(N)]];
72+
template <typename T>
73+
using simd_mask = simd<bool, internal::native_vector_size<T>>;
74+
75+
// Type trait helpers.
76+
template <typename T> struct simd_size : cpp::integral_constant<size_t, 1> {};
77+
template <typename T, unsigned N>
78+
struct simd_size<T [[clang::ext_vector_type(N)]]>
79+
: cpp::integral_constant<size_t, N> {};
80+
template <class T> constexpr size_t simd_size_v = simd_size<T>::value;
81+
82+
template <typename T> struct is_simd : cpp::integral_constant<bool, false> {};
83+
template <typename T, unsigned N>
84+
struct is_simd<T [[clang::ext_vector_type(N)]]>
85+
: cpp::integral_constant<bool, true> {};
86+
template <class T> constexpr bool is_simd_v = is_simd<T>::value;
87+
88+
template <typename T>
89+
struct is_simd_mask : cpp::integral_constant<bool, false> {};
90+
template <unsigned N>
91+
struct is_simd_mask<bool [[clang::ext_vector_type(N)]]>
92+
: cpp::integral_constant<bool, true> {};
93+
template <class T> constexpr bool is_simd_mask_v = is_simd_mask<T>::value;
94+
95+
template <typename T>
96+
using enable_if_simd_t = cpp::enable_if_t<is_simd_v<T>, T>;
97+
98+
// Casting.
99+
template <typename To, typename From, size_t N>
100+
LIBC_INLINE constexpr simd<To, N> simd_cast(simd<From, N> v) {
101+
return __builtin_convertvector(v, simd<To, N>);
102+
}
103+
104+
// SIMD mask operations.
105+
template <size_t N> LIBC_INLINE constexpr bool all_of(simd<bool, N> m) {
106+
return __builtin_reduce_and(m);
107+
}
108+
template <size_t N> LIBC_INLINE constexpr bool any_of(simd<bool, N> m) {
109+
return __builtin_reduce_or(m);
110+
}
111+
template <size_t N> LIBC_INLINE constexpr bool none_of(simd<bool, N> m) {
112+
return !any_of(m);
113+
}
114+
template <size_t N> LIBC_INLINE constexpr bool some_of(simd<bool, N> m) {
115+
return any_of(m) && !all_of(m);
116+
}
117+
template <size_t N> LIBC_INLINE constexpr int popcount(simd<bool, N> m) {
118+
return __builtin_popcountg(m);
119+
}
120+
template <size_t N> LIBC_INLINE constexpr int find_first_set(simd<bool, N> m) {
121+
return __builtin_ctzg(m);
122+
}
123+
template <size_t N> LIBC_INLINE constexpr int find_last_set(simd<bool, N> m) {
124+
constexpr size_t size = simd_size_v<simd<bool, N>>;
125+
return size - __builtin_clzg(m);
126+
}
127+
128+
// Elementwise operations.
129+
template <typename T, size_t N>
130+
LIBC_INLINE constexpr simd<T, N> min(simd<T, N> x, simd<T, N> y) {
131+
return __builtin_elementwise_min(x, y);
132+
}
133+
template <typename T, size_t N>
134+
LIBC_INLINE constexpr simd<T, N> max(simd<T, N> x, simd<T, N> y) {
135+
return __builtin_elementwise_max(x, y);
136+
}
137+
138+
// Reduction operations.
139+
template <typename T, size_t N, typename Op = cpp::plus<>>
140+
LIBC_INLINE constexpr T reduce(simd<T, N> v, Op op = {}) {
141+
return reduce(v, op);
142+
}
143+
template <typename T, size_t N>
144+
LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::plus<>) {
145+
return __builtin_reduce_add(v);
146+
}
147+
template <typename T, size_t N>
148+
LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::multiplies<>) {
149+
return __builtin_reduce_mul(v);
150+
}
151+
template <typename T, size_t N>
152+
LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_and<>) {
153+
return __builtin_reduce_and(v);
154+
}
155+
template <typename T, size_t N>
156+
LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_or<>) {
157+
return __builtin_reduce_or(v);
158+
}
159+
template <typename T, size_t N>
160+
LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_xor<>) {
161+
return __builtin_reduce_xor(v);
162+
}
163+
template <typename T, size_t N> LIBC_INLINE constexpr T hmin(simd<T, N> v) {
164+
return __builtin_reduce_min(v);
165+
}
166+
template <typename T, size_t N> LIBC_INLINE constexpr T hmax(simd<T, N> v) {
167+
return __builtin_reduce_max(v);
168+
}
169+
170+
// Accessor helpers.
171+
template <typename T>
172+
LIBC_INLINE enable_if_simd_t<T> load_unaligned(const void *ptr) {
173+
T tmp;
174+
__builtin_memcpy(&tmp, ptr, sizeof(T));
175+
return tmp;
176+
}
177+
template <typename T>
178+
LIBC_INLINE enable_if_simd_t<T> load_aligned(const void *ptr) {
179+
return *reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignof(T)));
180+
}
181+
template <typename T>
182+
LIBC_INLINE enable_if_simd_t<T> store_unaligned(T v, void *ptr) {
183+
__builtin_memcpy(ptr, &v, sizeof(T));
184+
}
185+
template <typename T>
186+
LIBC_INLINE enable_if_simd_t<T> store_aligned(T v, void *ptr) {
187+
*reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignof(T))) = v;
188+
}
189+
template <typename T>
190+
LIBC_INLINE enable_if_simd_t<T> masked_load(simd<bool, simd_size_v<T>> m,
191+
void *ptr) {
192+
return __builtin_masked_load(
193+
m, reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignof(T))));
194+
}
195+
template <typename T>
196+
LIBC_INLINE enable_if_simd_t<T> masked_store(simd<bool, simd_size_v<T>> m, T v,
197+
void *ptr) {
198+
__builtin_masked_store(
199+
m, v, reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignof(T))));
200+
}
201+
202+
// Construction helpers.
203+
template <typename T, size_t N> LIBC_INLINE constexpr simd<T, N> splat(T v) {
204+
return simd<T, N>(v);
205+
}
206+
template <typename T> LIBC_INLINE constexpr simd<T> splat(T v) {
207+
return splat<T, simd_size_v<simd<T>>>(v);
208+
}
209+
template <typename T, unsigned N>
210+
LIBC_INLINE constexpr simd<T, N> iota(T base = T(0), T step = T(1)) {
211+
simd<T, N> v{};
212+
for (unsigned i = 0; i < N; ++i)
213+
v[i] = base + T(i) * step;
214+
return v;
215+
}
216+
template <typename T>
217+
LIBC_INLINE constexpr simd<T> iota(T base = T(0), T step = T(1)) {
218+
return iota<T, simd_size_v<simd<T>>>(base, step);
219+
}
220+
221+
// Conditional helpers.
222+
template <typename T, size_t N>
223+
LIBC_INLINE constexpr simd<T, N> select(simd<bool, N> m, simd<T, N> x,
224+
simd<T, N> y) {
225+
return m ? x : y;
226+
}
227+
228+
// TODO: where expressions, scalar overloads, ABI types.
229+
230+
} // namespace cpp
231+
} // namespace LIBC_NAMESPACE_DECL
232+
233+
#endif

libc/src/__support/macros/attributes.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,10 @@ LIBC_THREAD_MODE_EXTERNAL.
7373
#define LIBC_PREFERED_TYPE(TYPE)
7474
#endif
7575

76+
#if __has_attribute(ext_vector_type) && __has_feature(ext_vector_type_boolean)
77+
#define LIBC_HAS_VECTOR_TYPE 1
78+
#else
79+
#define LIBC_HAS_VECTOR_TYPE 0
80+
#endif
81+
7682
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_ATTRIBUTES_H

libc/src/__support/macros/properties/cpu_features.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@
5959
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
6060
#endif // __ARM_FP
6161

62+
#if defined(__ARM_NEON)
63+
#define LIBC_TARGET_CPU_HAS_ARM_NEON
64+
#endif
65+
6266
#if defined(__riscv_flen)
6367
// https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc
6468
#if defined(__riscv_zfhmin)

libc/src/string/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ add_header_library(
2020
libc.hdr.stdint_proxy
2121
libc.src.__support.CPP.bitset
2222
libc.src.__support.CPP.type_traits
23+
libc.src.__support.CPP.simd
2324
libc.src.__support.common
2425
${string_config_options}
2526
)
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
//===-- Strlen for generic SIMD types -------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H
10+
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H
11+
12+
#include "src/__support/CPP/simd.h"
13+
#include "src/__support/common.h"
14+
15+
namespace LIBC_NAMESPACE_DECL {
16+
namespace internal {
17+
18+
// Exploit the underlying integer representation to do a variable shift.
19+
LIBC_INLINE constexpr cpp::simd_mask<char> shift_mask(cpp::simd_mask<char> m,
20+
size_t shift) {
21+
using bitmask_ty = cpp::internal::get_as_integer_type_t<cpp::simd_mask<char>>;
22+
bitmask_ty r = cpp::bit_cast<bitmask_ty>(m) >> shift;
23+
return cpp::bit_cast<cpp::simd_mask<char>>(r);
24+
}
25+
26+
[[clang::no_sanitize("address")]] LIBC_INLINE size_t
27+
string_length(const char *src) {
28+
constexpr cpp::simd<char> null_byte = cpp::splat('\0');
29+
30+
size_t alignment = alignof(cpp::simd<char>);
31+
const cpp::simd<char> *aligned = reinterpret_cast<const cpp::simd<char> *>(
32+
__builtin_align_down(src, alignment));
33+
34+
cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(aligned);
35+
cpp::simd_mask<char> mask = cpp::simd_cast<bool>(chars == null_byte);
36+
size_t offset = src - reinterpret_cast<const char *>(aligned);
37+
if (cpp::any_of(shift_mask(mask, offset)))
38+
return cpp::find_first_set(shift_mask(mask, offset));
39+
40+
for (;;) {
41+
cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(++aligned);
42+
cpp::simd_mask<char> mask = cpp::simd_cast<bool>(chars == null_byte);
43+
if (cpp::any_of(mask))
44+
return (reinterpret_cast<const char *>(aligned) - src) +
45+
cpp::find_first_set(mask);
46+
}
47+
}
48+
} // namespace internal
49+
50+
namespace string_length_impl = internal;
51+
} // namespace LIBC_NAMESPACE_DECL
52+
53+
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H

libc/src/string/string_utils.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,16 @@
2323
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
2424

2525
#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
26-
#if defined(LIBC_TARGET_ARCH_IS_X86)
26+
#if LIBC_HAS_VECTOR_TYPE
27+
#include "src/string/memory_utils/generic/inline_strlen.h"
28+
#elif defined(LIBC_TARGET_ARCH_IS_X86)
2729
#include "src/string/memory_utils/x86_64/inline_strlen.h"
2830
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_NEON)
2931
#include "src/string/memory_utils/aarch64/inline_strlen.h"
3032
#else
3133
namespace string_length_impl = LIBC_NAMESPACE::wide_read;
3234
#endif
33-
#endif
35+
#endif // defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
3436

3537
namespace LIBC_NAMESPACE_DECL {
3638
namespace internal {

0 commit comments

Comments
 (0)