Skip to content

Commit 1502398

Browse files
committed
[libc] Implement wide read strlen with LLVM vector type
Summary: This is a PR to show how this could be done cross-platform with LLVM vectors. The downside is that this only works with LLVM/Clang 15 due to the needed support for boolean vectors, It's based off of llvm#152389 and mostly just shows a common `vector` helper that could be used for anything SIMD related.
1 parent 79253cf commit 1502398

File tree

6 files changed

+147
-2
lines changed

6 files changed

+147
-2
lines changed

libc/src/__support/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,15 @@ add_header_library(
313313
libc.src.string.memory_utils.inline_memset
314314
)
315315

316+
add_header_library(
317+
vector
318+
HDRS
319+
vector.h
320+
DEPENDS
321+
libc.hdr.stdint_proxy
322+
libc.src.__support.macros.attributes
323+
)
324+
316325
add_header_library(
317326
char_vector
318327
HDRS

libc/src/__support/macros/attributes.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,13 @@
4848
#define LIBC_PREFERED_TYPE(TYPE)
4949
#endif
5050

51+
#if __has_attribute(ext_vector_type) && defined(__clang__) && \
52+
__clang_major__ >= 15 && \
53+
(defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_ARM))
54+
#define LIBC_HAS_VECTOR_TYPE 1
55+
#define LIBC_VECTOR_TYPE(N) __attribute__((ext_vector_type(N)))
56+
#else
57+
#define LIBC_HAS_VECTOR_TYPE 0
58+
#endif
59+
5160
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_ATTRIBUTES_H

libc/src/__support/macros/properties/cpu_features.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@
5959
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
6060
#endif // __ARM_FP
6161

62+
#if defined(__ARM_NEON)
63+
#define LIBC_TARGET_CPU_HAS_ARM_NEON
64+
#endif
65+
6266
#if defined(__riscv_flen)
6367
// https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc
6468
#if defined(__riscv_zfhmin)

libc/src/__support/vector.h

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
//===-- Helper functions for SIMD extensions --------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_VECTOR_H
10+
#define LLVM_LIBC_SRC___SUPPORT_VECTOR_H
11+
12+
#include "hdr/stdint_proxy.h"
13+
#include "src/__support/CPP/bit.h"
14+
#include "src/__support/CPP/type_traits.h"
15+
#include "src/__support/common.h"
16+
#include "src/__support/macros/properties/cpu_features.h"
17+
18+
#include <stddef.h>
19+
20+
namespace LIBC_NAMESPACE_DECL {
21+
22+
static_assert(LIBC_HAS_VECTOR_TYPE, "Compiler does not support vector types.");
23+
24+
namespace vector {
25+
26+
template <size_t N> struct BitmaskTy;
27+
template <> struct BitmaskTy<1> {
28+
using type = uint8_t;
29+
};
30+
template <> struct BitmaskTy<8> {
31+
using type = uint8_t;
32+
};
33+
template <> struct BitmaskTy<16> {
34+
using type = uint16_t;
35+
};
36+
template <> struct BitmaskTy<32> {
37+
using type = uint32_t;
38+
};
39+
template <> struct BitmaskTy<64> {
40+
using type = uint64_t;
41+
};
42+
43+
template <typename T> struct SSE2 {
44+
static constexpr size_t WIDTH = 16;
45+
static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T);
46+
};
47+
template <typename T> struct AVX2 {
48+
static constexpr size_t WIDTH = 32;
49+
static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T);
50+
};
51+
template <typename T> struct AVX512 {
52+
static constexpr size_t WIDTH = 64;
53+
static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T);
54+
};
55+
template <typename T> struct Neon {
56+
static constexpr size_t WIDTH = 16;
57+
static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T);
58+
};
59+
60+
#if defined(LIBC_TARGET_CPU_HAS_AVX512F)
61+
template <typename T> using Platform = AVX512<T>;
62+
#elif defined(LIBC_TARGET_CPU_HAS_AVX2)
63+
template <typename T> using Platform = AVX2<T>;
64+
#elif defined(LIBC_TARGET_CPU_HAS_SSE2)
65+
template <typename T> using Platform = SSE2<T>;
66+
#elif defined(LIBC_TARGET_CPU_HAS_ARM_NEON)
67+
template <typename T> using Platform = Neon<T>;
68+
#endif
69+
70+
template <typename T, size_t N = Platform<T>::NUM_ELEMENTS>
71+
using Vector = T LIBC_VECTOR_TYPE(N);
72+
73+
template <typename To, typename From, size_t N>
74+
LIBC_INLINE Vector<To, N> convert(const Vector<From, N> &v) {
75+
return __builtin_convertvector(v, Vector<To, N>);
76+
}
77+
78+
template <typename T, size_t N>
79+
LIBC_INLINE typename BitmaskTy<N>::type to_bitmask(const Vector<T, N> &v) {
80+
return cpp::bit_cast<typename BitmaskTy<N>::type>(convert<bool, T, N>(v));
81+
}
82+
} // namespace vector
83+
84+
} // namespace LIBC_NAMESPACE_DECL
85+
86+
#endif // LLVM_LIBC_SRC___SUPPORT_VECTOR_H

libc/src/string/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ add_header_library(
2020
libc.hdr.stdint_proxy
2121
libc.src.__support.CPP.bitset
2222
libc.src.__support.CPP.type_traits
23+
libc.src.__support.CPP.vector
2324
libc.src.__support.common
2425
${string_config_options}
2526
)

libc/src/string/string_utils.h

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
#include "src/__support/macros/config.h"
2323
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
2424

25+
#if LIBC_HAS_VECTOR_TYPE
26+
#include "src/__support/vector.h"
27+
#endif
28+
2529
namespace LIBC_NAMESPACE_DECL {
2630
namespace internal {
2731

@@ -61,7 +65,7 @@ template <typename Word> LIBC_INLINE constexpr bool has_zeroes(Word block) {
6165
}
6266

6367
template <typename Word>
64-
LIBC_INLINE size_t string_length_wide_read(const char *src) {
68+
LIBC_INLINE size_t string_length_wide_read_chars(const char *src) {
6569
const char *char_ptr = src;
6670
// Step 1: read 1 byte at a time to align to block size
6771
for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0;
@@ -81,6 +85,38 @@ LIBC_INLINE size_t string_length_wide_read(const char *src) {
8185
return static_cast<size_t>(char_ptr - src);
8286
}
8387

88+
#if LIBC_HAS_VECTOR_TYPE
89+
LIBC_INLINE size_t string_length_wide_read_vector(const char *src) {
90+
using namespace vector;
91+
92+
const Vector<char> null('\0');
93+
94+
// Align the pointer to the native vector width and shift out unused byted.
95+
const char *aligned = __builtin_align_down(src, sizeof(Vector<char>));
96+
const Vector<char> *char_ptr =
97+
reinterpret_cast<const Vector<char> *>(aligned);
98+
auto bitmask = to_bitmask(*char_ptr == null);
99+
if (decltype(bitmask) shifted = bitmask >> (src - aligned))
100+
return cpp::countr_zero(shifted);
101+
102+
// Continue until we find the null byte.
103+
for (;;) {
104+
++char_ptr;
105+
if (auto bitmask = to_bitmask(*char_ptr == null))
106+
return (reinterpret_cast<const char *>(char_ptr) - src) +
107+
cpp::countr_zero(bitmask);
108+
}
109+
}
110+
#endif
111+
112+
LIBC_INLINE size_t string_length_wide_read(const char *src) {
113+
#if LIBC_HAS_VECTOR_TYPE
114+
return string_length_wide_read_vector(src);
115+
#else
116+
return string_length_wide_read_chars<unsigned int>(src);
117+
#endif
118+
}
119+
84120
// Returns the length of a string, denoted by the first occurrence
85121
// of a null terminator.
86122
template <typename T> LIBC_INLINE size_t string_length(const T *src) {
@@ -90,7 +126,7 @@ template <typename T> LIBC_INLINE size_t string_length(const T *src) {
90126
// be aligned to a word boundary, so it's the size we use for reading the
91127
// string a block at a time.
92128
if constexpr (cpp::is_same_v<T, char>)
93-
return string_length_wide_read<unsigned int>(src);
129+
return string_length_wide_read(src);
94130
#endif
95131
size_t length;
96132
for (length = 0; *src; ++src, ++length)

0 commit comments

Comments
 (0)