Skip to content

Commit 1513156

Browse files
committed
[libc] Implement wide read strlen with LLVM vector type
Summary: This is a PR to show how this could be done cross-platform with LLVM vectors. The downside is that this only works with LLVM/Clang 15 due to the needed support for boolean vectors, It's based off of #152389 and mostly just shows a common `vector` helper that could be used for anything SIMD related.
1 parent 79253cf commit 1513156

File tree

6 files changed

+153
-1
lines changed

6 files changed

+153
-1
lines changed

libc/src/__support/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,15 @@ add_header_library(
313313
libc.src.string.memory_utils.inline_memset
314314
)
315315

316+
add_header_library(
317+
vector
318+
HDRS
319+
vector.h
320+
DEPENDS
321+
libc.hdr.stdint_proxy
322+
libc.src.__support.macros.attributes
323+
)
324+
316325
add_header_library(
317326
char_vector
318327
HDRS

libc/src/__support/macros/attributes.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,12 @@
4848
#define LIBC_PREFERED_TYPE(TYPE)
4949
#endif
5050

51+
#if __has_attribute(ext_vector_type) && defined(__clang__) && \
52+
__clang_major__ >= 15
53+
#define LIBC_HAS_VECTOR_TYPE 1
54+
#define LIBC_VECTOR_TYPE(N) __attribute__((ext_vector_type(N)))
55+
#else
56+
#define LIBC_HAS_VECTOR_TYPE 0
57+
#endif
58+
5159
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_ATTRIBUTES_H

libc/src/__support/macros/properties/cpu_features.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@
5959
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
6060
#endif // __ARM_FP
6161

62+
#if defined(__ARM_NEON)
63+
#define LIBC_TARGET_CPU_HAS_ARM_NEON
64+
#endif
65+
6266
#if defined(__riscv_flen)
6367
// https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc
6468
#if defined(__riscv_zfhmin)

libc/src/__support/vector.h

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
//===-- Helper functions for SIMD extensions --------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_VECTOR_H
10+
#define LLVM_LIBC_SRC___SUPPORT_VECTOR_H
11+
12+
#include "hdr/stdint_proxy.h"
13+
#include "src/__support/CPP/bit.h"
14+
#include "src/__support/CPP/type_traits.h"
15+
#include "src/__support/common.h"
16+
#include "src/__support/macros/properties/cpu_features.h"
17+
18+
#include <stddef.h>
19+
20+
namespace LIBC_NAMESPACE_DECL {
21+
22+
static_assert(LIBC_HAS_VECTOR_TYPE, "Compiler does not support vector types.");
23+
24+
namespace vector {
25+
26+
template <size_t N> struct BitmaskTy;
27+
template <> struct BitmaskTy<1> {
28+
using type = uint8_t;
29+
};
30+
template <> struct BitmaskTy<8> {
31+
using type = uint8_t;
32+
};
33+
template <> struct BitmaskTy<16> {
34+
using type = uint16_t;
35+
};
36+
template <> struct BitmaskTy<32> {
37+
using type = uint32_t;
38+
};
39+
template <> struct BitmaskTy<64> {
40+
using type = uint64_t;
41+
};
42+
43+
template <typename T> struct Scalar {
44+
static constexpr size_t WIDTH = sizeof(T);
45+
static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T);
46+
};
47+
template <typename T> struct SSE2 {
48+
static constexpr size_t WIDTH = 16;
49+
static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T);
50+
};
51+
template <typename T> struct AVX2 {
52+
static constexpr size_t WIDTH = 32;
53+
static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T);
54+
};
55+
template <typename T> struct AVX512 {
56+
static constexpr size_t WIDTH = 64;
57+
static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T);
58+
};
59+
template <typename T> struct Neon {
60+
static constexpr size_t WIDTH = 16;
61+
static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T);
62+
};
63+
64+
#if defined(LIBC_TARGET_CPU_HAS_AVX512F)
65+
template <typename T> using Platform = AVX512<T>;
66+
#elif defined(LIBC_TARGET_CPU_HAS_AVX2)
67+
template <typename T> using Platform = AVX2<T>;
68+
#elif defined(LIBC_TARGET_CPU_HAS_SSE2)
69+
template <typename T> using Platform = SSE2<T>;
70+
#elif defined(LIBC_TARGET_CPU_HAS_ARM_NEON)
71+
template <typename T> using Platform = Neon<T>;
72+
#else
73+
template <typename T> using Platform = Scalar<T>;
74+
#endif
75+
76+
template <typename T, size_t N = Platform<T>::NUM_ELEMENTS>
77+
using Vector = T LIBC_VECTOR_TYPE(N);
78+
79+
template <typename To, typename From, size_t N>
80+
LIBC_INLINE Vector<To, N> convert(const Vector<From, N> &v) {
81+
return __builtin_convertvector(v, Vector<To, N>);
82+
}
83+
84+
template <typename T, size_t N>
85+
LIBC_INLINE typename BitmaskTy<N>::type to_bitmask(const Vector<T, N> &v) {
86+
return cpp::bit_cast<typename BitmaskTy<N>::type>(convert<bool, T, N>(v));
87+
}
88+
} // namespace vector
89+
90+
} // namespace LIBC_NAMESPACE_DECL
91+
92+
#endif // LLVM_LIBC_SRC___SUPPORT_VECTOR_H

libc/src/string/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ add_header_library(
2020
libc.hdr.stdint_proxy
2121
libc.src.__support.CPP.bitset
2222
libc.src.__support.CPP.type_traits
23+
libc.src.__support.CPP.vector
2324
libc.src.__support.common
2425
${string_config_options}
2526
)

libc/src/string/string_utils.h

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
#include "src/__support/macros/config.h"
2323
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
2424

25+
#if LIBC_HAS_VECTOR_TYPE
26+
#include "src/__support/vector.h"
27+
#endif
28+
2529
namespace LIBC_NAMESPACE_DECL {
2630
namespace internal {
2731

@@ -61,7 +65,7 @@ template <typename Word> LIBC_INLINE constexpr bool has_zeroes(Word block) {
6165
}
6266

6367
template <typename Word>
64-
LIBC_INLINE size_t string_length_wide_read(const char *src) {
68+
LIBC_INLINE size_t string_length_wide_read_chars(const char *src) {
6569
const char *char_ptr = src;
6670
// Step 1: read 1 byte at a time to align to block size
6771
for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0;
@@ -81,6 +85,40 @@ LIBC_INLINE size_t string_length_wide_read(const char *src) {
8185
return static_cast<size_t>(char_ptr - src);
8286
}
8387

88+
#if LIBC_HAS_VECTOR_TYPE
89+
LIBC_INLINE size_t string_length_wide_read_vector(const char *src) {
90+
using namespace vector;
91+
92+
const Vector<char> null('\0');
93+
94+
// Align the pointer to the native vector width and shift out unused byted.
95+
const char *aligned = reinterpret_cast<const char *>(
96+
reinterpret_cast<uintptr_t>(src) &
97+
~static_cast<uintptr_t>(sizeof(Vector<char>) - 1));
98+
const Vector<char> *char_ptr =
99+
reinterpret_cast<const Vector<char> *>(aligned);
100+
auto bitmask = to_bitmask(*char_ptr == null);
101+
if (decltype(bitmask) shifted = bitmask >> (src - aligned))
102+
return cpp::countr_zero(shifted);
103+
104+
// Continue until we find the null byte.
105+
for (;;) {
106+
++char_ptr;
107+
if (auto bitmask = to_bitmask(*char_ptr == null))
108+
return (reinterpret_cast<const char *>(char_ptr) - src) +
109+
cpp::countr_zero(bitmask);
110+
}
111+
}
112+
#endif
113+
114+
LIBC_INLINE size_t string_length_wide_read(const char *src) {
115+
#if LIBC_HAS_VECTOR_TYPE
116+
return string_length_wide_read_vector(src);
117+
#else
118+
return string_length_wide_read_chars(src);
119+
#endif
120+
}
121+
84122
// Returns the length of a string, denoted by the first occurrence
85123
// of a null terminator.
86124
template <typename T> LIBC_INLINE size_t string_length(const T *src) {

0 commit comments

Comments
 (0)