diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 2196d9e23bba7..e2722ed352f71 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -313,6 +313,15 @@ add_header_library( libc.src.string.memory_utils.inline_memset ) +add_header_library( + vector + HDRS + vector.h + DEPENDS + libc.hdr.stdint_proxy + libc.src.__support.macros.attributes +) + add_header_library( char_vector HDRS diff --git a/libc/src/__support/macros/attributes.h b/libc/src/__support/macros/attributes.h index c6474673de85a..3bf1c5788de26 100644 --- a/libc/src/__support/macros/attributes.h +++ b/libc/src/__support/macros/attributes.h @@ -48,4 +48,13 @@ #define LIBC_PREFERED_TYPE(TYPE) #endif +#if __has_attribute(ext_vector_type) && defined(__clang__) && \ + __clang_major__ >= 15 && \ + (defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_ARM)) +#define LIBC_HAS_VECTOR_TYPE 1 +#define LIBC_VECTOR_TYPE(N) __attribute__((ext_vector_type(N))) +#else +#define LIBC_HAS_VECTOR_TYPE 0 +#endif + #endif // LLVM_LIBC_SRC___SUPPORT_MACROS_ATTRIBUTES_H diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h index fde30eadfd83b..fc6099ca6ccc5 100644 --- a/libc/src/__support/macros/properties/cpu_features.h +++ b/libc/src/__support/macros/properties/cpu_features.h @@ -59,6 +59,10 @@ #endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE #endif // __ARM_FP +#if defined(__ARM_NEON) +#define LIBC_TARGET_CPU_HAS_ARM_NEON +#endif + #if defined(__riscv_flen) // https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc #if defined(__riscv_zfhmin) diff --git a/libc/src/__support/vector.h b/libc/src/__support/vector.h new file mode 100644 index 0000000000000..bebe2f65efaf7 --- /dev/null +++ b/libc/src/__support/vector.h @@ -0,0 +1,86 @@ +//===-- Helper functions for SIMD extensions --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_VECTOR_H +#define LLVM_LIBC_SRC___SUPPORT_VECTOR_H + +#include "hdr/stdint_proxy.h" +#include "src/__support/CPP/bit.h" +#include "src/__support/CPP/type_traits.h" +#include "src/__support/common.h" +#include "src/__support/macros/properties/cpu_features.h" + +#include + +namespace LIBC_NAMESPACE_DECL { + +static_assert(LIBC_HAS_VECTOR_TYPE, "Compiler does not support vector types."); + +namespace vector { + +template struct BitmaskTy; +template <> struct BitmaskTy<1> { + using type = uint8_t; +}; +template <> struct BitmaskTy<8> { + using type = uint8_t; +}; +template <> struct BitmaskTy<16> { + using type = uint16_t; +}; +template <> struct BitmaskTy<32> { + using type = uint32_t; +}; +template <> struct BitmaskTy<64> { + using type = uint64_t; +}; + +template struct SSE2 { + static constexpr size_t WIDTH = 16; + static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T); +}; +template struct AVX2 { + static constexpr size_t WIDTH = 32; + static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T); +}; +template struct AVX512 { + static constexpr size_t WIDTH = 64; + static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T); +}; +template struct Neon { + static constexpr size_t WIDTH = 16; + static constexpr size_t NUM_ELEMENTS = WIDTH / sizeof(T); +}; + +#if defined(LIBC_TARGET_CPU_HAS_AVX512F) +template using Platform = AVX512; +#elif defined(LIBC_TARGET_CPU_HAS_AVX2) +template using Platform = AVX2; +#elif defined(LIBC_TARGET_CPU_HAS_SSE2) +template using Platform = SSE2; +#elif defined(LIBC_TARGET_CPU_HAS_ARM_NEON) +template using Platform = Neon; +#endif + +template ::NUM_ELEMENTS> +using Vector = T LIBC_VECTOR_TYPE(N); + +template +LIBC_INLINE Vector convert(const Vector &v) { + return __builtin_convertvector(v, Vector); +} + +template +LIBC_INLINE typename BitmaskTy::type to_bitmask(const Vector &v) { + return cpp::bit_cast::type>(convert(v)); +} +} // namespace vector + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_VECTOR_H diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 809decfbe5f08..cee1a80d4566a 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -20,6 +20,7 @@ add_header_library( libc.hdr.stdint_proxy libc.src.__support.CPP.bitset libc.src.__support.CPP.type_traits + libc.src.__support.CPP.vector libc.src.__support.common ${string_config_options} ) diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index 80e5783c7890b..5b5932d78b767 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -22,6 +22,10 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#if LIBC_HAS_VECTOR_TYPE +#include "src/__support/vector.h" +#endif + namespace LIBC_NAMESPACE_DECL { namespace internal { @@ -61,7 +65,7 @@ template LIBC_INLINE constexpr bool has_zeroes(Word block) { } template -LIBC_INLINE size_t string_length_wide_read(const char *src) { +LIBC_INLINE size_t string_length_wide_read_chars(const char *src) { const char *char_ptr = src; // Step 1: read 1 byte at a time to align to block size for (; reinterpret_cast(char_ptr) % sizeof(Word) != 0; @@ -81,6 +85,38 @@ LIBC_INLINE size_t string_length_wide_read(const char *src) { return static_cast(char_ptr - src); } +#if LIBC_HAS_VECTOR_TYPE +LIBC_INLINE size_t string_length_wide_read_vector(const char *src) { + using namespace vector; + + const Vector null('\0'); + + // Align the pointer to the native vector width and shift out unused byted. + const char *aligned = __builtin_align_down(src, sizeof(Vector)); + const Vector *char_ptr = + reinterpret_cast *>(aligned); + auto bitmask = to_bitmask(*char_ptr == null); + if (decltype(bitmask) shifted = bitmask >> (src - aligned)) + return cpp::countr_zero(shifted); + + // Continue until we find the null byte. + for (;;) { + ++char_ptr; + if (auto bitmask = to_bitmask(*char_ptr == null)) + return (reinterpret_cast(char_ptr) - src) + + cpp::countr_zero(bitmask); + } +} +#endif + +LIBC_INLINE size_t string_length_wide_read(const char *src) { +#if LIBC_HAS_VECTOR_TYPE + return string_length_wide_read_vector(src); +#else + return string_length_wide_read_chars(src); +#endif +} + // Returns the length of a string, denoted by the first occurrence // of a null terminator. template LIBC_INLINE size_t string_length(const T *src) { @@ -90,7 +126,7 @@ template LIBC_INLINE size_t string_length(const T *src) { // be aligned to a word boundary, so it's the size we use for reading the // string a block at a time. if constexpr (cpp::is_same_v) - return string_length_wide_read(src); + return string_length_wide_read(src); #endif size_t length; for (length = 0; *src; ++src, ++length)