From de76b6a2b0210131c8316d47323c20d3ac59ff9b Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Sun, 9 Nov 2025 20:09:04 -0500 Subject: [PATCH 1/2] [libc] add an SVE implementation of strlen --- libc/fuzzing/__support/freelist_heap_fuzz.cpp | 2 +- libc/fuzzing/string/CMakeLists.txt | 8 +++ libc/fuzzing/string/strlen_fuzz.cpp | 32 ++++++++++ .../memory_utils/aarch64/inline_strlen.h | 63 +++++++++++++++++-- libc/src/string/string_utils.h | 10 ++- libc/test/src/string/strlen_test.cpp | 12 ++++ 6 files changed, 118 insertions(+), 9 deletions(-) create mode 100644 libc/fuzzing/string/strlen_fuzz.cpp diff --git a/libc/fuzzing/__support/freelist_heap_fuzz.cpp b/libc/fuzzing/__support/freelist_heap_fuzz.cpp index 7b7985a83c3e6..0b400cb156491 100644 --- a/libc/fuzzing/__support/freelist_heap_fuzz.cpp +++ b/libc/fuzzing/__support/freelist_heap_fuzz.cpp @@ -24,7 +24,7 @@ asm(R"( _end: .fill 1024 __llvm_libc_heap_limit: -)"; +)"); using LIBC_NAMESPACE::FreeListHeap; using LIBC_NAMESPACE::inline_memset; diff --git a/libc/fuzzing/string/CMakeLists.txt b/libc/fuzzing/string/CMakeLists.txt index efda80b59c951..0918e92552ea7 100644 --- a/libc/fuzzing/string/CMakeLists.txt +++ b/libc/fuzzing/string/CMakeLists.txt @@ -40,3 +40,11 @@ add_libc_fuzzer( DEPENDS libc.src.strings.bcmp ) + +add_libc_fuzzer( + strlen_fuzz + SRCS + strlen_fuzz.cpp + DEPENDS + libc.src.string.strlen +) diff --git a/libc/fuzzing/string/strlen_fuzz.cpp b/libc/fuzzing/string/strlen_fuzz.cpp new file mode 100644 index 0000000000000..dd72c19b7fdc7 --- /dev/null +++ b/libc/fuzzing/string/strlen_fuzz.cpp @@ -0,0 +1,32 @@ +//===-- strlen_fuzz.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Fuzzing test for llvm-libc strlen implementation. +/// +//===----------------------------------------------------------------------===// + +#include "src/string/strlen.h" +#include +#include + +// always null terminate the data +extern "C" size_t LLVMFuzzerMutate(uint8_t *data, size_t size, size_t max_size); +extern "C" size_t LLVMFuzzerCustomMutator(uint8_t *data, size_t size, + size_t max_size, unsigned int seed) { + size = LLVMFuzzerMutate(data, size, max_size); + data[size - 1] = '\0'; + return size; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + size_t ref = ::strlen(reinterpret_cast(data)); + size_t impl = LIBC_NAMESPACE::strlen(reinterpret_cast(data)); + if (ref != impl) + __builtin_trap(); + return 0; +} diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h index 87f5ccdd56e23..eafaca9776a42 100644 --- a/libc/src/string/memory_utils/aarch64/inline_strlen.h +++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h @@ -8,14 +8,13 @@ #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H +#include "src/__support/macros/properties/cpu_features.h" + #if defined(__ARM_NEON) #include "src/__support/CPP/bit.h" // countr_zero - #include #include // size_t - namespace LIBC_NAMESPACE_DECL { - namespace neon { [[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t string_length(const char *src) { @@ -45,9 +44,63 @@ string_length(const char *src) { } } } // namespace neon +} // namespace LIBC_NAMESPACE_DECL +#endif // __ARM_NEON -namespace string_length_impl = neon; +#ifdef LIBC_TARGET_CPU_HAS_SVE +#include "src/__support/macros/optimization.h" +#include +namespace LIBC_NAMESPACE_DECL { +namespace sve { +[[maybe_unused]] LIBC_INLINE static size_t string_length(const char *src) { + const uint8_t *ptr = reinterpret_cast(src); + // Initialize the first-fault register to all true + svsetffr(); + const svbool_t all_true = svptrue_b8(); // all true predicate + svbool_t cmp_zero; + size_t len = 0; + for (;;) { + // Read a vector's worth of bytes, stopping on first fault. + svuint8_t data = svldff1_u8(all_true, &ptr[len]); + svbool_t fault_mask = svrdffr_z(all_true); + bool has_no_fault = svptest_last(all_true, fault_mask); + if (LIBC_LIKELY(has_no_fault)) { + // First fault did not fail: the whole vector is valid. + // Avoid depending on the contents of FFR beyond the branch. + len += svcntb(); // speculative increment + cmp_zero = svcmpeq_n_u8(all_true, data, 0); + bool has_no_zero = !svptest_any(all_true, cmp_zero); + if (LIBC_LIKELY(has_no_zero)) + continue; + len -= svcntb(); // undo speculative increment + break; + } else { + // First fault failed: only some of the vector is valid. + // Perform the comparison only on the valid bytes. + cmp_zero = svcmpeq_n_u8(fault_mask, data, 0); + bool has_zero = svptest_any(fault_mask, cmp_zero); + if (LIBC_LIKELY(has_zero)) + break; + svsetffr(); + len += svcntp_b8(all_true, fault_mask); + continue; + } + } + // Select the bytes before the first and count them. + svbool_t before_zero = svbrkb_z(all_true, cmp_zero); + len += svcntp_b8(all_true, before_zero); + return len; +} +} // namespace sve +} // namespace LIBC_NAMESPACE_DECL +#endif // LIBC_TARGET_CPU_HAS_SVE + +namespace LIBC_NAMESPACE_DECL { +#ifdef LIBC_TARGET_CPU_HAS_SVE +namespace string_length_impl = sve; +#elif defined(__ARM_NEON) +namespace string_length_impl = neon; +#endif } // namespace LIBC_NAMESPACE_DECL -#endif // __ARM_NEON #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index cbce62ead0328..bd45bddf0b2a8 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -22,9 +22,13 @@ #include "src/__support/macros/attributes.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/macros/properties/cpu_features.h" #include "src/string/memory_utils/inline_memcpy.h" -#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) +// SVE implementation has fault safety +#if defined(LIBC_TARGET_CPU_HAS_SVE) +#include "src/string/memory_utils/aarch64/inline_strlen.h" +#elif defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) #if LIBC_HAS_VECTOR_TYPE #include "src/string/memory_utils/generic/inline_strlen.h" #elif defined(LIBC_TARGET_ARCH_IS_X86) @@ -33,8 +37,8 @@ #include "src/string/memory_utils/aarch64/inline_strlen.h" #else namespace string_length_impl = LIBC_NAMESPACE::wide_read; -#endif -#endif // defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) +#endif // LIBC_TARGET_CPU_HAS_SVE +#endif // defined(LIBC_TARGET_CPU_HAS_SVE) namespace LIBC_NAMESPACE_DECL { namespace internal { diff --git a/libc/test/src/string/strlen_test.cpp b/libc/test/src/string/strlen_test.cpp index 4eb9d47e9209d..784dd7b194b3f 100644 --- a/libc/test/src/string/strlen_test.cpp +++ b/libc/test/src/string/strlen_test.cpp @@ -22,3 +22,15 @@ TEST(LlvmLibcStrLenTest, AnyString) { size_t result = LIBC_NAMESPACE::strlen(any); ASSERT_EQ((size_t)12, result); } + +TEST(LlvmLibcStrLenTest, DataAfterNulString) { + constexpr char A[10] = {'a', 'b', 'c', 'd', 'e', 'f', 0, 'h', 'i', 'j'}; + size_t result = LIBC_NAMESPACE::strlen(A); + ASSERT_EQ((size_t)6, result); +} + +TEST(LlvmLibcStrLenTest, MultipleNulsInOneWord) { + constexpr char A[10] = {'a', 'b', 0, 'd', 'e', 'f', 0, 'h', 'i', 'j'}; + size_t result = LIBC_NAMESPACE::strlen(A); + ASSERT_EQ((size_t)2, result); +} From 2b43a62896b25dbcc509e43d6e70e030215fcf37 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Mon, 10 Nov 2025 17:36:14 -0500 Subject: [PATCH 2/2] [libc] address CR and dispatch header changes --- libc/src/string/string_utils.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index bd45bddf0b2a8..cbce62ead0328 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -22,13 +22,9 @@ #include "src/__support/macros/attributes.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/macros/properties/cpu_features.h" #include "src/string/memory_utils/inline_memcpy.h" -// SVE implementation has fault safety -#if defined(LIBC_TARGET_CPU_HAS_SVE) -#include "src/string/memory_utils/aarch64/inline_strlen.h" -#elif defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) +#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) #if LIBC_HAS_VECTOR_TYPE #include "src/string/memory_utils/generic/inline_strlen.h" #elif defined(LIBC_TARGET_ARCH_IS_X86) @@ -37,8 +33,8 @@ #include "src/string/memory_utils/aarch64/inline_strlen.h" #else namespace string_length_impl = LIBC_NAMESPACE::wide_read; -#endif // LIBC_TARGET_CPU_HAS_SVE -#endif // defined(LIBC_TARGET_CPU_HAS_SVE) +#endif +#endif // defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) namespace LIBC_NAMESPACE_DECL { namespace internal {