Skip to content

Commit eb7b162

Browse files
authored
[libc] Implement generic SIMD helper 'simd.h' and implement strlen (#152605)
Summary: This PR introduces a new 'simd.h' header that implements an interface similar to the proposed `stdx::simd` in C++. However, we instead wrap around the LLVM internal type. This makes heavy use of the clang vector extensions and boolean vectors, instead using primitive vector types instead of a class (many benefits to this). I use this interface to implement a generic strlen implementation, but propse we use this for math. Right now this requires a feature only introduced in clang-22.
1 parent abda8be commit eb7b162

File tree

9 files changed

+321
-2
lines changed

9 files changed

+321
-2
lines changed

libc/src/__support/CPP/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,3 +210,9 @@ add_object_library(
210210
libc.src.__support.common
211211
libc.src.__support.macros.properties.os
212212
)
213+
214+
add_header_library(
215+
simd
216+
HDRS
217+
simd.h
218+
)

libc/src/__support/CPP/algorithm.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@
1818
namespace LIBC_NAMESPACE_DECL {
1919
namespace cpp {
2020

21+
template <class T = void> struct plus {};
22+
template <class T = void> struct multiplies {};
23+
template <class T = void> struct bit_and {};
24+
template <class T = void> struct bit_or {};
25+
template <class T = void> struct bit_xor {};
26+
2127
template <class T> LIBC_INLINE constexpr const T &max(const T &a, const T &b) {
2228
return (a < b) ? b : a;
2329
}

libc/src/__support/CPP/simd.h

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
//===-- Portable SIMD library similar to stdx::simd -------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file provides a generic interface into fixed-size SIMD instructions
10+
// using the clang vector type. The API shares some similarities with the
11+
// stdx::simd proposal, but instead chooses to use vectors as primitive types
12+
// with several extra helper functions.
13+
//
14+
//===----------------------------------------------------------------------===//
15+
16+
#include "hdr/stdint_proxy.h"
17+
#include "src/__support/CPP/algorithm.h"
18+
#include "src/__support/CPP/bit.h"
19+
#include "src/__support/CPP/type_traits/integral_constant.h"
20+
#include "src/__support/macros/attributes.h"
21+
#include "src/__support/macros/config.h"
22+
23+
#include <stddef.h>
24+
25+
#ifndef LLVM_LIBC_SRC___SUPPORT_CPP_SIMD_H
26+
#define LLVM_LIBC_SRC___SUPPORT_CPP_SIMD_H
27+
28+
#if LIBC_HAS_VECTOR_TYPE
29+
30+
namespace LIBC_NAMESPACE_DECL {
31+
namespace cpp {
32+
33+
namespace internal {
34+
35+
template <typename T>
36+
using get_as_integer_type_t = unsigned _BitInt(sizeof(T) * CHAR_BIT);
37+
38+
#if defined(LIBC_TARGET_CPU_HAS_AVX512F)
39+
template <typename T>
40+
inline constexpr size_t native_vector_size = 64 / sizeof(T);
41+
#elif defined(LIBC_TARGET_CPU_HAS_AVX2)
42+
template <typename T>
43+
inline constexpr size_t native_vector_size = 32 / sizeof(T);
44+
#elif defined(LIBC_TARGET_CPU_HAS_SSE2) || defined(LIBC_TARGET_CPU_HAS_ARM_NEON)
45+
template <typename T>
46+
inline constexpr size_t native_vector_size = 16 / sizeof(T);
47+
#else
48+
template <typename T> inline constexpr size_t native_vector_size = 1;
49+
#endif
50+
51+
template <typename T> LIBC_INLINE constexpr T poison() {
52+
return __builtin_nondeterministic_value(T());
53+
}
54+
} // namespace internal
55+
56+
// Type aliases.
57+
template <typename T, size_t N>
58+
using fixed_size_simd = T [[clang::ext_vector_type(N)]];
59+
template <typename T, size_t N = internal::native_vector_size<T>>
60+
using simd = T [[clang::ext_vector_type(N)]];
61+
template <typename T>
62+
using simd_mask = simd<bool, internal::native_vector_size<T>>;
63+
64+
// Type trait helpers.
65+
template <typename T>
66+
struct simd_size : cpp::integral_constant<size_t, __builtin_vectorelements(T)> {
67+
};
68+
template <class T> constexpr size_t simd_size_v = simd_size<T>::value;
69+
70+
template <typename T> struct is_simd : cpp::integral_constant<bool, false> {};
71+
template <typename T, unsigned N>
72+
struct is_simd<simd<T, N>> : cpp::integral_constant<bool, true> {};
73+
template <class T> constexpr bool is_simd_v = is_simd<T>::value;
74+
75+
template <typename T>
76+
struct is_simd_mask : cpp::integral_constant<bool, false> {};
77+
template <unsigned N>
78+
struct is_simd_mask<simd<bool, N>> : cpp::integral_constant<bool, true> {};
79+
template <class T> constexpr bool is_simd_mask_v = is_simd_mask<T>::value;
80+
81+
template <typename T> struct simd_element_type;
82+
template <typename T, size_t N> struct simd_element_type<simd<T, N>> {
83+
using type = T;
84+
};
85+
template <typename T>
86+
using simd_element_type_t = typename simd_element_type<T>::type;
87+
88+
template <typename T>
89+
using enable_if_simd_t = cpp::enable_if_t<is_simd_v<T>, T>;
90+
91+
// Casting.
92+
template <typename To, typename From, size_t N>
93+
LIBC_INLINE constexpr simd<To, N> simd_cast(simd<From, N> v) {
94+
return __builtin_convertvector(v, simd<To, N>);
95+
}
96+
97+
// SIMD mask operations.
98+
template <size_t N> LIBC_INLINE constexpr bool all_of(simd<bool, N> m) {
99+
return __builtin_reduce_and(m);
100+
}
101+
template <size_t N> LIBC_INLINE constexpr bool any_of(simd<bool, N> m) {
102+
return __builtin_reduce_or(m);
103+
}
104+
template <size_t N> LIBC_INLINE constexpr bool none_of(simd<bool, N> m) {
105+
return !any_of(m);
106+
}
107+
template <size_t N> LIBC_INLINE constexpr bool some_of(simd<bool, N> m) {
108+
return any_of(m) && !all_of(m);
109+
}
110+
template <size_t N> LIBC_INLINE constexpr int popcount(simd<bool, N> m) {
111+
return __builtin_popcountg(m);
112+
}
113+
template <size_t N> LIBC_INLINE constexpr int find_first_set(simd<bool, N> m) {
114+
return __builtin_ctzg(m);
115+
}
116+
template <size_t N> LIBC_INLINE constexpr int find_last_set(simd<bool, N> m) {
117+
constexpr size_t size = simd_size_v<simd<bool, N>>;
118+
return size - __builtin_clzg(m);
119+
}
120+
121+
// Elementwise operations.
122+
template <typename T, size_t N>
123+
LIBC_INLINE constexpr simd<T, N> min(simd<T, N> x, simd<T, N> y) {
124+
return __builtin_elementwise_min(x, y);
125+
}
126+
template <typename T, size_t N>
127+
LIBC_INLINE constexpr simd<T, N> max(simd<T, N> x, simd<T, N> y) {
128+
return __builtin_elementwise_max(x, y);
129+
}
130+
131+
// Reduction operations.
132+
template <typename T, size_t N, typename Op = cpp::plus<>>
133+
LIBC_INLINE constexpr T reduce(simd<T, N> v, Op op = {}) {
134+
return reduce(v, op);
135+
}
136+
template <typename T, size_t N>
137+
LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::plus<>) {
138+
return __builtin_reduce_add(v);
139+
}
140+
template <typename T, size_t N>
141+
LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::multiplies<>) {
142+
return __builtin_reduce_mul(v);
143+
}
144+
template <typename T, size_t N>
145+
LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_and<>) {
146+
return __builtin_reduce_and(v);
147+
}
148+
template <typename T, size_t N>
149+
LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_or<>) {
150+
return __builtin_reduce_or(v);
151+
}
152+
template <typename T, size_t N>
153+
LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_xor<>) {
154+
return __builtin_reduce_xor(v);
155+
}
156+
template <typename T, size_t N> LIBC_INLINE constexpr T hmin(simd<T, N> v) {
157+
return __builtin_reduce_min(v);
158+
}
159+
template <typename T, size_t N> LIBC_INLINE constexpr T hmax(simd<T, N> v) {
160+
return __builtin_reduce_max(v);
161+
}
162+
163+
// Accessor helpers.
164+
template <typename T>
165+
LIBC_INLINE enable_if_simd_t<T> load_unaligned(const void *ptr) {
166+
T tmp;
167+
__builtin_memcpy(&tmp, ptr, sizeof(T));
168+
return tmp;
169+
}
170+
template <typename T>
171+
LIBC_INLINE enable_if_simd_t<T> load_aligned(const void *ptr) {
172+
return load_unaligned<T>(__builtin_assume_aligned(ptr, alignof(T)));
173+
}
174+
template <typename T>
175+
LIBC_INLINE enable_if_simd_t<T> store_unaligned(T v, void *ptr) {
176+
__builtin_memcpy(ptr, &v, sizeof(T));
177+
}
178+
template <typename T>
179+
LIBC_INLINE enable_if_simd_t<T> store_aligned(T v, void *ptr) {
180+
store_unaligned<T>(v, __builtin_assume_aligned(ptr, alignof(T)));
181+
}
182+
template <typename T>
183+
LIBC_INLINE enable_if_simd_t<T>
184+
masked_load(simd<bool, simd_size_v<T>> m, void *ptr,
185+
T passthru = internal::poison<simd_element_type<T>>()) {
186+
return __builtin_masked_load(m, ptr, passthru);
187+
}
188+
template <typename T>
189+
LIBC_INLINE enable_if_simd_t<T> masked_store(simd<bool, simd_size_v<T>> m, T v,
190+
void *ptr) {
191+
__builtin_masked_store(
192+
m, v, static_cast<T *>(__builtin_assume_aligned(ptr, alignof(T))));
193+
}
194+
195+
// Construction helpers.
196+
template <typename T, size_t N> LIBC_INLINE constexpr simd<T, N> splat(T v) {
197+
return simd<T, N>(v);
198+
}
199+
template <typename T> LIBC_INLINE constexpr simd<T> splat(T v) {
200+
return splat<T, simd_size_v<simd<T>>>(v);
201+
}
202+
template <typename T, unsigned N>
203+
LIBC_INLINE constexpr simd<T, N> iota(T base = T(0), T step = T(1)) {
204+
simd<T, N> v{};
205+
for (unsigned i = 0; i < N; ++i)
206+
v[i] = base + T(i) * step;
207+
return v;
208+
}
209+
template <typename T>
210+
LIBC_INLINE constexpr simd<T> iota(T base = T(0), T step = T(1)) {
211+
return iota<T, simd_size_v<simd<T>>>(base, step);
212+
}
213+
214+
// Conditional helpers.
215+
template <typename T, size_t N>
216+
LIBC_INLINE constexpr simd<T, N> select(simd<bool, N> m, simd<T, N> x,
217+
simd<T, N> y) {
218+
return m ? x : y;
219+
}
220+
221+
// TODO: where expressions, scalar overloads, ABI types.
222+
223+
} // namespace cpp
224+
} // namespace LIBC_NAMESPACE_DECL
225+
226+
#endif // LIBC_HAS_VECTOR_TYPE
227+
#endif

libc/src/__support/macros/attributes.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,10 @@ LIBC_THREAD_MODE_EXTERNAL.
7373
#define LIBC_PREFERED_TYPE(TYPE)
7474
#endif
7575

76+
#if __has_attribute(ext_vector_type) && __has_feature(ext_vector_type_boolean)
77+
#define LIBC_HAS_VECTOR_TYPE 1
78+
#else
79+
#define LIBC_HAS_VECTOR_TYPE 0
80+
#endif
81+
7682
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_ATTRIBUTES_H

libc/src/__support/macros/properties/cpu_features.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@
5959
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
6060
#endif // __ARM_FP
6161

62+
#if defined(__ARM_NEON)
63+
#define LIBC_TARGET_CPU_HAS_ARM_NEON
64+
#endif
65+
6266
#if defined(__riscv_flen)
6367
// https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc
6468
#if defined(__riscv_zfhmin)

libc/src/string/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ add_header_library(
2020
libc.hdr.stdint_proxy
2121
libc.src.__support.CPP.bitset
2222
libc.src.__support.CPP.type_traits
23+
libc.src.__support.CPP.simd
2324
libc.src.__support.common
2425
${string_config_options}
2526
)
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
//===-- Strlen for generic SIMD types -------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H
10+
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H
11+
12+
#include "src/__support/CPP/simd.h"
13+
#include "src/__support/common.h"
14+
15+
namespace LIBC_NAMESPACE_DECL {
16+
namespace internal {
17+
18+
// Exploit the underlying integer representation to do a variable shift.
19+
LIBC_INLINE constexpr cpp::simd_mask<char> shift_mask(cpp::simd_mask<char> m,
20+
size_t shift) {
21+
using bitmask_ty = cpp::internal::get_as_integer_type_t<cpp::simd_mask<char>>;
22+
bitmask_ty r = cpp::bit_cast<bitmask_ty>(m) >> shift;
23+
return cpp::bit_cast<cpp::simd_mask<char>>(r);
24+
}
25+
26+
[[clang::no_sanitize("address")]] LIBC_INLINE size_t
27+
string_length(const char *src) {
28+
constexpr cpp::simd<char> null_byte = cpp::splat('\0');
29+
30+
size_t alignment = alignof(cpp::simd<char>);
31+
const cpp::simd<char> *aligned = reinterpret_cast<const cpp::simd<char> *>(
32+
__builtin_align_down(src, alignment));
33+
34+
cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(aligned);
35+
cpp::simd_mask<char> mask = cpp::simd_cast<bool>(chars == null_byte);
36+
size_t offset = src - reinterpret_cast<const char *>(aligned);
37+
if (cpp::any_of(shift_mask(mask, offset)))
38+
return cpp::find_first_set(shift_mask(mask, offset));
39+
40+
for (;;) {
41+
cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(++aligned);
42+
cpp::simd_mask<char> mask = cpp::simd_cast<bool>(chars == null_byte);
43+
if (cpp::any_of(mask))
44+
return (reinterpret_cast<const char *>(aligned) - src) +
45+
cpp::find_first_set(mask);
46+
}
47+
}
48+
} // namespace internal
49+
50+
namespace string_length_impl = internal;
51+
} // namespace LIBC_NAMESPACE_DECL
52+
53+
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H

libc/src/string/string_utils.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,16 @@
2323
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
2424

2525
#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
26-
#if defined(LIBC_TARGET_ARCH_IS_X86)
26+
#if LIBC_HAS_VECTOR_TYPE
27+
#include "src/string/memory_utils/generic/inline_strlen.h"
28+
#elif defined(LIBC_TARGET_ARCH_IS_X86)
2729
#include "src/string/memory_utils/x86_64/inline_strlen.h"
2830
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_NEON)
2931
#include "src/string/memory_utils/aarch64/inline_strlen.h"
3032
#else
3133
namespace string_length_impl = LIBC_NAMESPACE::wide_read;
3234
#endif
33-
#endif
35+
#endif // defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
3436

3537
namespace LIBC_NAMESPACE_DECL {
3638
namespace internal {

utils/bazel/llvm-project-overlay/libc/BUILD.bazel

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,18 @@ libc_support_library(
677677
],
678678
)
679679

680+
libc_support_library(
681+
name = "__support_cpp_simd",
682+
hdrs = ["src/__support/CPP/simd.h"],
683+
deps = [
684+
":__support_cpp_algorithm",
685+
":__support_cpp_bit",
686+
":__support_cpp_type_traits",
687+
":__support_macros_attributes",
688+
":hdr_stdint_proxy",
689+
],
690+
)
691+
680692
libc_support_library(
681693
name = "__support_cpp_span",
682694
hdrs = ["src/__support/CPP/span.h"],
@@ -4938,6 +4950,7 @@ libc_support_library(
49384950
"src/string/memory_utils/arm/inline_memset.h",
49394951
"src/string/memory_utils/generic/aligned_access.h",
49404952
"src/string/memory_utils/generic/byte_per_byte.h",
4953+
"src/string/memory_utils/generic/inline_strlen.h",
49414954
"src/string/memory_utils/inline_bcmp.h",
49424955
"src/string/memory_utils/inline_bzero.h",
49434956
"src/string/memory_utils/inline_memcmp.h",
@@ -4964,6 +4977,7 @@ libc_support_library(
49644977
":__support_cpp_array",
49654978
":__support_cpp_bit",
49664979
":__support_cpp_cstddef",
4980+
":__support_cpp_simd",
49674981
":__support_cpp_type_traits",
49684982
":__support_macros_attributes",
49694983
":__support_macros_optimization",

0 commit comments

Comments
 (0)