Skip to content
51 changes: 51 additions & 0 deletions libc/src/string/memory_utils/aarch64/inline_strlen.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
//===-- Strlen implementation for aarch64 ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H

#if defined(__ARM_NEON)
#include "src/__support/CPP/bit.h" // countr_zero

#include <arm_neon.h>
#include <stddef.h> // size_t

namespace LIBC_NAMESPACE_DECL {

namespace neon {
[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
using Vector __attribute__((may_alias)) = uint8x8_t;

uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
Vector *block_ptr = reinterpret_cast<Vector *>(src - misalign_bytes);
Vector v = *block_ptr;
Vector vcmp = vceqz_u8(v);
uint64x1_t cmp_mask = vreinterpret_u64_s8(vcmp);
uint64_t cmp = vget_lane_u64(cmp_mask, 0);
cmp = cmp >> (misalign_bytes << 3);
if (cmp)
return cpp::countr_zero(cmp) >> 3;

while (true) {
++block_ptr;
v = *block_ptr;
vcmp = vceqz_u8(v);
cmp_mask = vreinterpret_u64_s8(vcmp);
cmp = vget_lane_u64(cmp_mask, 0);
if (cmp)
return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
reinterpret_cast<uintptr_t>(src) +
(cpp::countr_zero(cmp) >> 3));
}
}
} // namespace neon

namespace string_length_impl = neon;

} // namespace LIBC_NAMESPACE_DECL
#endif // __ARM_NEON
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
102 changes: 102 additions & 0 deletions libc/src/string/memory_utils/x86_64/inline_strlen.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
//===-- Strlen implementation for x86_64 ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H

#include "src/__support/CPP/bit.h" // countr_zero

#include <immintrin.h>
#include <stddef.h> // size_t

namespace LIBC_NAMESPACE_DECL {

namespace string_length_internal {
// Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero.
template <typename Vector, typename Mask>
Mask CompareAndMask(const Vector *block_ptr);

template <typename Vector, typename Mask,
decltype(CompareAndMask<Vector, Mask>)>
size_t string_length_vector(const char *src) {
uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);

const Vector *block_ptr =
reinterpret_cast<const Vector *>(src - misalign_bytes);
auto cmp = CompareAndMask<Vector, Mask>(block_ptr) >> misalign_bytes;
if (cmp)
return cpp::countr_zero(cmp);

while (true) {
block_ptr++;
cmp = CompareAndMask<Vector, Mask>(block_ptr);
if (cmp)
return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
reinterpret_cast<uintptr_t>(src) +
cpp::countr_zero(cmp));
}
}

template <>
uint32_t CompareAndMask<__m128i, uint32_t>(const __m128i *block_ptr) {
__m128i v = _mm_load_si128(block_ptr);
__m128i z = _mm_setzero_si128();
__m128i c = _mm_cmpeq_epi8(z, v);
return _mm_movemask_epi8(c);
}

namespace sse2 {
[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
return string_length_vector<__m128i, uint32_t,
CompareAndMask<__m128i, uint32_t>>(src);
}
} // namespace sse2

#if defined(__AVX2__)
template <>
uint32_t CompareAndMask<__m256i, uint32_t>(const __m256i *block_ptr) {
__m256i v = _mm256_load_si256(block_ptr);
__m256i z = _mm256_setzero_si256();
__m256i c = _mm256_cmpeq_epi8(z, v);
return _mm256_movemask_epi8(c);
}

namespace avx2 {
[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
return string_length_vector<__m256i, uint32_t,
CompareAndMask<__m256i, uint32_t>>(src);
}
} // namespace avx2
#endif

#if defined(__AVX512F__)
template <>
__mmask64 CompareAndMask<__m512i, __mmask64>(const __m512i *block_ptr) {
__m512i v = _mm512_load_si512(block_ptr);
__m512i z = _mm512_setzero_si512();
return _mm512_cmp_epu8_mask(z, v, _MM_CMPINT_EQ);
}
namespace avx512 {
[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
return string_length_vector<__m512i, __mmask64,
CompareAndMask<__m512i, __mmask64>>(src);
}
} // namespace avx512
#endif
} // namespace string_length_internal

#if defined(__AVX512F__)
namespace string_length_impl = string_length_internal::avx512;
#elif defined(__AVX2__)
namespace string_length_impl = string_length_internal::avx2;
#else
namespace string_length_impl = string_length_internal::sse2;
#endif

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
29 changes: 23 additions & 6 deletions libc/src/string/string_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,16 @@
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY

#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
#if defined(LIBC_TARGET_ARCH_IS_X86)
#include "src/string/memory_utils/x86_64/inline_strlen.h"
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_NEON)
#include "src/string/memory_utils/aarch64/inline_strlen.h"
#else
namespace string_length_impl = LIBC_NAMESPACE::wide_read;
#endif
#endif

namespace LIBC_NAMESPACE_DECL {
namespace internal {

Expand Down Expand Up @@ -53,7 +63,7 @@ template <typename Word> LIBC_INLINE constexpr Word repeat_byte(Word byte) {
// high bit set will no longer have it set, narrowing the list of bytes which
// result in non-zero values to just the zero byte.
template <typename Word> LIBC_INLINE constexpr bool has_zeroes(Word block) {
constexpr Word LOW_BITS = repeat_byte<Word>(0x01);
constexpr unsigned int LOW_BITS = repeat_byte<Word>(0x01);
constexpr Word HIGH_BITS = repeat_byte<Word>(0x80);
Word subtracted = block - LOW_BITS;
Word inverted = ~block;
Expand Down Expand Up @@ -81,16 +91,23 @@ LIBC_INLINE size_t string_length_wide_read(const char *src) {
return static_cast<size_t>(char_ptr - src);
}

// Returns the length of a string, denoted by the first occurrence
// of a null terminator.
template <typename T> LIBC_INLINE size_t string_length(const T *src) {
#ifdef LIBC_COPT_STRING_UNSAFE_WIDE_READ
namespace wide_read {
LIBC_INLINE size_t string_length(const char *src) {
// Unsigned int is the default size for most processors, and on x86-64 it
// performs better than larger sizes when the src pointer can't be assumed to
// be aligned to a word boundary, so it's the size we use for reading the
// string a block at a time.
return string_length_wide_read<unsigned int>(src);
}

} // namespace wide_read

// Returns the length of a string, denoted by the first occurrence
// of a null terminator.
template <typename T> LIBC_INLINE size_t string_length(const T *src) {
#ifdef LIBC_COPT_STRING_UNSAFE_WIDE_READ
if constexpr (cpp::is_same_v<T, char>)
return string_length_wide_read<unsigned int>(src);
return string_length_impl::string_length(src);
#endif
size_t length;
for (length = 0; *src; ++src, ++length)
Expand Down
3 changes: 3 additions & 0 deletions utils/bazel/llvm-project-overlay/libc/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -4894,6 +4894,7 @@ libc_support_library(
"src/string/memory_utils/aarch64/inline_memcpy.h",
"src/string/memory_utils/aarch64/inline_memmove.h",
"src/string/memory_utils/aarch64/inline_memset.h",
"src/string/memory_utils/aarch64/inline_strlen.h",
"src/string/memory_utils/arm/common.h",
"src/string/memory_utils/arm/inline_memcpy.h",
"src/string/memory_utils/arm/inline_memset.h",
Expand All @@ -4918,6 +4919,7 @@ libc_support_library(
"src/string/memory_utils/x86_64/inline_memcpy.h",
"src/string/memory_utils/x86_64/inline_memmove.h",
"src/string/memory_utils/x86_64/inline_memset.h",
"src/string/memory_utils/x86_64/inline_strlen.h",
],
deps = [
":__support_common",
Expand All @@ -4942,6 +4944,7 @@ libc_support_library(
":__support_macros_optimization",
":hdr_limits_macros",
":llvm_libc_types_size_t",
":string_memory_utils",
":types_size_t",
],
)
Expand Down
Loading