-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[libc] Implement wide read strlen with LLVM vector type #152605
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-libc Author: Joseph Huber (jhuber6) ChangesSummary: It's based off of #152389 and Full diff: https://github.com/llvm/llvm-project/pull/152605.diff 7 Files Affected:
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index 2196d9e23bba7..e2722ed352f71 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -313,6 +313,15 @@ add_header_library(
libc.src.string.memory_utils.inline_memset
)
+add_header_library(
+ vector
+ HDRS
+ vector.h
+ DEPENDS
+ libc.hdr.stdint_proxy
+ libc.src.__support.macros.attributes
+)
+
add_header_library(
char_vector
HDRS
diff --git a/libc/src/__support/macros/attributes.h b/libc/src/__support/macros/attributes.h
index c6474673de85a..53912dce427d2 100644
--- a/libc/src/__support/macros/attributes.h
+++ b/libc/src/__support/macros/attributes.h
@@ -48,4 +48,12 @@
#define LIBC_PREFERED_TYPE(TYPE)
#endif
+#if __has_attribute(ext_vector_type) && defined(__clang__) && \
+ __clang_major__ >= 15
+#define LIBC_HAS_VECTOR_TYPE 1
+#define LIBC_VECTOR_TYPE(N) __attribute__((ext_vector_type(N)))
+#else
+#define LIBC_HAS_VECTOR_TYPE 0
+#endif
+
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_ATTRIBUTES_H
diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h
index fde30eadfd83b..fc6099ca6ccc5 100644
--- a/libc/src/__support/macros/properties/cpu_features.h
+++ b/libc/src/__support/macros/properties/cpu_features.h
@@ -59,6 +59,10 @@
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
#endif // __ARM_FP
+#if defined(__ARM_NEON)
+#define LIBC_TARGET_CPU_HAS_ARM_NEON
+#endif
+
#if defined(__riscv_flen)
// https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc
#if defined(__riscv_zfhmin)
diff --git a/libc/src/__support/vector.h b/libc/src/__support/vector.h
new file mode 100644
index 0000000000000..581e727349761
--- /dev/null
+++ b/libc/src/__support/vector.h
@@ -0,0 +1,93 @@
+//===-- Helper functions for SIMD extensions --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_VECTOR_H
+#define LLVM_LIBC_SRC___SUPPORT_VECTOR_H
+
+#include "hdr/stdint_proxy.h"
+#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/type_traits.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/properties/cpu_features.h"
+
+#include <stddef.h>
+
+namespace LIBC_NAMESPACE_DECL {
+
+static_assert(LIBC_HAS_VECTOR_TYPE, "Compiler does not support vector types.");
+
+namespace vector {
+
+template <size_t N> struct BitmaskTy;
+template <> struct BitmaskTy<1> {
+ using type = uint8_t;
+};
+template <> struct BitmaskTy<8> {
+ using type = uint8_t;
+};
+template <> struct BitmaskTy<16> {
+ using type = uint16_t;
+};
+template <> struct BitmaskTy<32> {
+ using type = uint32_t;
+};
+template <> struct BitmaskTy<64> {
+ using type = uint64_t;
+};
+
+template <typename T> struct Scalar {
+ static constexpr size_t WIDTH = sizeof(T);
+ static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T);
+};
+template <typename T> struct SSE2 {
+ static constexpr size_t WIDTH = 16;
+ static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T);
+};
+template <typename T> struct AVX2 {
+ static constexpr size_t WIDTH = 32;
+ static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T);
+};
+template <typename T> struct AVX512 {
+ static constexpr size_t WIDTH = 64;
+ static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T);
+};
+template <typename T> struct Neon {
+ static constexpr size_t WIDTH = 16;
+ static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T);
+};
+
+#if defined(LIBC_TARGET_CPU_HAS_AVX512F)
+template <typename T> using Platform = AVX512<T>;
+#elif defined(LIBC_TARGET_CPU_HAS_AVX2)
+template <typename T> using Platform = AVX2<T>;
+#elif defined(LIBC_TARGET_CPU_HAS_SSE2)
+template <typename T> using Platform = SSE2<T>;
+#elif defined(LIBC_TARGET_CPU_HAS_ARM_NEON)
+template <typename T> using Platform = Neon<T>;
+#else
+template <typename T> using Platform = Scalar<T>;
+#endif
+
+template <typename T, size_t N = Platform<T>::NUM_ELEMENTS>
+using Vector = T LIBC_VECTOR_TYPE(N);
+
+template <typename To, typename From, size_t N>
+LIBC_INLINE Vector<To, N> convert(const Vector<From, N> &v) {
+ return __builtin_convertvector(v, Vector<To, N>);
+}
+
+template <typename T, size_t N>
+LIBC_INLINE typename BitmaskTy<N>::type to_bitmask(const Vector<T, N> &v) {
+ return __builtin_bit_cast(typename BitmaskTy<N>::type,
+ convert<bool, T, N>(v));
+}
+} // namespace vector
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_VECTOR_H
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index 809decfbe5f08..cee1a80d4566a 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -20,6 +20,7 @@ add_header_library(
libc.hdr.stdint_proxy
libc.src.__support.CPP.bitset
libc.src.__support.CPP.type_traits
+ libc.src.__support.CPP.vector
libc.src.__support.common
${string_config_options}
)
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 80e5783c7890b..e2549f55972b5 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -22,6 +22,10 @@
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#if LIBC_HAS_VECTOR_TYPE
+#include "src/__support/vector.h"
+#endif
+
namespace LIBC_NAMESPACE_DECL {
namespace internal {
@@ -61,7 +65,7 @@ template <typename Word> LIBC_INLINE constexpr bool has_zeroes(Word block) {
}
template <typename Word>
-LIBC_INLINE size_t string_length_wide_read(const char *src) {
+LIBC_INLINE size_t string_length_wide_read_chars(const char *src) {
const char *char_ptr = src;
// Step 1: read 1 byte at a time to align to block size
for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0;
@@ -81,6 +85,40 @@ LIBC_INLINE size_t string_length_wide_read(const char *src) {
return static_cast<size_t>(char_ptr - src);
}
+#if LIBC_HAS_VECTOR_TYPE
+LIBC_INLINE size_t string_length_wide_read_vector(const char *src) {
+ using namespace vector;
+
+ const Vector<char> null('\0');
+
+ // Align the pointer to the native vector width and shift out unused byted.
+ const char *aligned = reinterpret_cast<const char *>(
+ reinterpret_cast<uintptr_t>(src) &
+ ~static_cast<uintptr_t>(sizeof(Vector<char>) - 1));
+ const Vector<char> *char_ptr =
+ reinterpret_cast<const Vector<char> *>(aligned);
+ auto bitmask = to_bitmask(*char_ptr == null);
+ if (decltype(bitmask) shifted = bitmask >> (src - aligned))
+ return cpp::countr_zero(shifted);
+
+ // Continue until we find the null byte.
+ for (;;) {
+ ++char_ptr;
+ if (auto bitmask = to_bitmask(*char_ptr == null))
+ return (reinterpret_cast<const char *>(char_ptr) - src) +
+ cpp::countr_zero(bitmask);
+ }
+}
+#endif
+
+LIBC_INLINE size_t string_length_wide_read(const char *src) {
+#if LIBC_HAS_VECTOR_TYPE
+ return string_length_wide_read_vector(src);
+#else
+ return string_length_wide_read_chars(src);
+#endif
+}
+
// Returns the length of a string, denoted by the first occurrence
// of a null terminator.
template <typename T> LIBC_INLINE size_t string_length(const T *src) {
diff --git a/libc/src/string/strlen.cpp b/libc/src/string/strlen.cpp
index 234edb81d4c8c..3cb58df080db5 100644
--- a/libc/src/string/strlen.cpp
+++ b/libc/src/string/strlen.cpp
@@ -11,6 +11,8 @@
#include "src/__support/macros/null_check.h"
#include "src/string/string_utils.h"
+#include "src/__support/vector.h"
+
#include "src/__support/common.h"
namespace LIBC_NAMESPACE_DECL {
@@ -19,6 +21,7 @@ namespace LIBC_NAMESPACE_DECL {
// There might be potential for compiler optimization.
LLVM_LIBC_FUNCTION(size_t, strlen, (const char *src)) {
LIBC_CRASH_ON_NULLPTR(src);
+
return internal::string_length(src);
}
|
04640be
to
1513156
Compare
Summary: This is a PR to show how this could be done cross-platform with LLVM vectors. The downside is that this only works with LLVM/Clang 15 due to the needed support for boolean vectors, It's based off of llvm#152389 and mostly just shows a common `vector` helper that could be used for anything SIMD related.
What will be the design ideas on RVV/SVE2 support? |
I don't think there's a good generic way to access variable length vectors in clang yet, it's all through really Arm specific types. But, I'm not an expert on using those. |
#if __has_attribute(ext_vector_type) && defined(__clang__) && \ | ||
__clang_major__ >= 15 && \ | ||
(defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_ARM)) | ||
#define LIBC_HAS_VECTOR_TYPE 1 | ||
#define LIBC_VECTOR_TYPE(N) __attribute__((ext_vector_type(N))) | ||
#else | ||
#define LIBC_HAS_VECTOR_TYPE 0 | ||
#endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't exactly follow this condition. __has_attribute(ext_vector_type) should be all you need for a "has vector type". I would separate out the profitability concern
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's because clang 15 added support for bool vectors, which is what emits to something optimal here. Before clang 15 it was an error.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But you aren't using bool vectors? Also this would need to be much more specific and drop the target check
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's inside the get_bitmask
utility, we convert the -1
comparison mask to a bool vector and then bitcast it to a integer mask we use ctz
on. Is there a more idiomatic check for bool vectors?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This needs to be broken up and commented, this is not a mere "has vector type"
Also, assembly comparison between the hand-written versions for x64 in the other patch, seems roughly the same https://godbolt.org/z/nbPxoMrcM. |
Summary:
This is a PR to show how this could be done cross-platform with LLVM
vectors. The downside is that this only works with LLVM/Clang 15 due to
the needed support for boolean vectors,
It's based off of #152389 and
mostly just shows a common
vector
helper that could be used foranything SIMD related.