From d7b5e8b22ce2a0bfb7740d4124f836ec0d7dfc55 Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Thu, 23 Oct 2025 09:58:48 -0700 Subject: [PATCH 01/12] Refactor WIDE_READ mechanism to allow finer control over function selection --- .../modules/LLVMLibCCompileOptionRules.cmake | 5 +- libc/config/config.json | 11 +- libc/config/linux/arm/config.json | 7 +- libc/config/linux/config.json | 7 +- libc/config/linux/riscv/config.json | 7 +- libc/docs/configure.rst | 3 +- .../memory_utils/aarch64/inline_strlen.h | 6 +- .../memory_utils/generic/inline_strlen.h | 5 +- .../memory_utils/x86_64/inline_strlen.h | 14 +- libc/src/string/string_utils.h | 138 +++++++++++------- .../libc/libc_configure_options.bzl | 3 +- 11 files changed, 126 insertions(+), 80 deletions(-) diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake index 4e9a9b66a63a7..f4e2a62d14b31 100644 --- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake +++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake @@ -81,9 +81,8 @@ function(_get_compile_options_from_config output_var) list(APPEND config_options "-DLIBC_QSORT_IMPL=${LIBC_CONF_QSORT_IMPL}") endif() - if(LIBC_CONF_STRING_UNSAFE_WIDE_READ) - list(APPEND config_options "-DLIBC_COPT_STRING_UNSAFE_WIDE_READ") - endif() + list(APPEND config_options "-DLIBC_COPT_STRING_LENGTH_IMPL=${LIBC_CONF_STRING_LENGTH_IMPL}") + list(APPEND config_options "-DLIBC_COPT_FIND_FIRST_CHARACTER_IMPL=${LIBC_CONF_FIND_FIRST_CHARACTER_IMPL}") if(LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING) list(APPEND config_options "-DLIBC_COPT_MEMSET_X86_USE_SOFTWARE_PREFETCHING") diff --git a/libc/config/config.json b/libc/config/config.json index cfbe9a43948ea..12596a00911e2 100644 --- a/libc/config/config.json +++ b/libc/config/config.json @@ -40,6 +40,7 @@ "value": false, "doc": "Use an alternative printf float implementation based on 320-bit floats" }, + "LIBC_CONF_PRINTF_DISABLE_FIXED_POINT": { "value": false, "doc": "Disable printing fixed point values in printf and friends." @@ -64,9 +65,13 @@ } }, "string": { - "LIBC_CONF_STRING_UNSAFE_WIDE_READ": { - "value": false, - "doc": "Read more than a byte at a time to perform byte-string operations like strlen." + "LIBC_CONF_STRING_LENGTH_IMPL": { + "value": "element", + "doc": "Selects the implementation for string-length: 'element', 'wide', 'generic' (vector), or 'arch'." + }, + "LIBC_CONF_FIND_FIRST_CHARACTER_IMPL": { + "value": "element", + "doc": "Selects the implementation for find-first-character-related functions: 'element', 'wide', 'generic' (vector), or 'arch'." }, "LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING": { "value": false, diff --git a/libc/config/linux/arm/config.json b/libc/config/linux/arm/config.json index e7ad4544b104d..caa16744d389f 100644 --- a/libc/config/linux/arm/config.json +++ b/libc/config/linux/arm/config.json @@ -1,7 +1,10 @@ { "string": { - "LIBC_CONF_STRING_UNSAFE_WIDE_READ": { - "value": false + "LIBC_CONF_STRING_LENGTH_IMPL": { + "value": "element" + } + "LIBC_CONF_FIND_FIRST_CHARACTER_IMPL": { + "value": "element" } } } diff --git a/libc/config/linux/config.json b/libc/config/linux/config.json index 30e8b2cdadabe..93f5a1ef1f184 100644 --- a/libc/config/linux/config.json +++ b/libc/config/linux/config.json @@ -1,7 +1,10 @@ { "string": { - "LIBC_CONF_STRING_UNSAFE_WIDE_READ": { - "value": true + "LIBC_CONF_STRING_LENGTH_IMPL": { + "value": "generic", + }, + "LIBC_CONF_FIND_FIRST_CHARACTER_IMPL": { + "value": "wide", } } } diff --git a/libc/config/linux/riscv/config.json b/libc/config/linux/riscv/config.json index e7ad4544b104d..caa16744d389f 100644 --- a/libc/config/linux/riscv/config.json +++ b/libc/config/linux/riscv/config.json @@ -1,7 +1,10 @@ { "string": { - "LIBC_CONF_STRING_UNSAFE_WIDE_READ": { - "value": false + "LIBC_CONF_STRING_LENGTH_IMPL": { + "value": "element" + } + "LIBC_CONF_FIND_FIRST_CHARACTER_IMPL": { + "value": "element" } } } diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst index e23fc824ce7c8..3049738aff6e7 100644 --- a/libc/docs/configure.rst +++ b/libc/docs/configure.rst @@ -58,8 +58,9 @@ to learn about the defaults for your platform and target. * **"setjmp" options** - ``LIBC_CONF_SETJMP_AARCH64_RESTORE_PLATFORM_REGISTER``: Make setjmp save the value of x18, and longjmp restore it. The AArch64 ABI delegates this register to platform ABIs, which can choose whether to make it caller-saved. * **"string" options** + - ``LIBC_CONF_FIND_FIRST_CHARACTER_IMPL``: Selects the implementation for find-first-character-related functions: 'element', 'wide', 'generic' (vector), or 'arch'. - ``LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING``: Inserts prefetch for write instructions (PREFETCHW) for memset on x86 to recover performance when hardware prefetcher is disabled. - - ``LIBC_CONF_STRING_UNSAFE_WIDE_READ``: Read more than a byte at a time to perform byte-string operations like strlen. + - ``LIBC_CONF_STRING_LENGTH_IMPL``: Selects the implementation for string-length: 'element', 'wide', 'generic' (vector), or 'arch'. * **"threads" options** - ``LIBC_CONF_THREAD_MODE``: The implementation used for Mutex, acceptable values are LIBC_THREAD_MODE_PLATFORM, LIBC_THREAD_MODE_SINGLE, and LIBC_THREAD_MODE_EXTERNAL. * **"time" options** diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h index 87f5ccdd56e23..b39df3e474669 100644 --- a/libc/src/string/memory_utils/aarch64/inline_strlen.h +++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h @@ -16,7 +16,7 @@ namespace LIBC_NAMESPACE_DECL { -namespace neon { +namespace arch { [[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t string_length(const char *src) { using Vector __attribute__((may_alias)) = uint8x8_t; @@ -44,9 +44,7 @@ string_length(const char *src) { (cpp::countr_zero(cmp) >> 3)); } } -} // namespace neon - -namespace string_length_impl = neon; +} // namespace arch } // namespace LIBC_NAMESPACE_DECL #endif // __ARM_NEON diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h index 69700e801bcea..7630c0b7caedf 100644 --- a/libc/src/string/memory_utils/generic/inline_strlen.h +++ b/libc/src/string/memory_utils/generic/inline_strlen.h @@ -14,7 +14,7 @@ #include "src/__support/common.h" namespace LIBC_NAMESPACE_DECL { -namespace internal { +namespace generic { // Exploit the underlying integer representation to do a variable shift. LIBC_INLINE constexpr cpp::simd_mask shift_mask(cpp::simd_mask m, @@ -46,9 +46,8 @@ LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE size_t string_length(const char *src) { cpp::find_first_set(mask); } } -} // namespace internal +} // namespace generic -namespace string_length_impl = internal; } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H diff --git a/libc/src/string/memory_utils/x86_64/inline_strlen.h b/libc/src/string/memory_utils/x86_64/inline_strlen.h index 9e10d58363393..3d93960605f0c 100644 --- a/libc/src/string/memory_utils/x86_64/inline_strlen.h +++ b/libc/src/string/memory_utils/x86_64/inline_strlen.h @@ -15,7 +15,8 @@ namespace LIBC_NAMESPACE_DECL { -namespace string_length_internal { +namespace internal::arch { + // Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero. template LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static Mask @@ -92,15 +93,18 @@ namespace avx512 { } } // namespace avx512 #endif -} // namespace string_length_internal +[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) { #if defined(__AVX512F__) -namespace string_length_impl = string_length_internal::avx512; + return avx512::string_length(src); #elif defined(__AVX2__) -namespace string_length_impl = string_length_internal::avx2; + return avx2::string_length(src); #else -namespace string_length_impl = string_length_internal::sse2; + return sse2::string_length(src); #endif +} + +} // namespace internal::arch } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index 7feef56fb3676..b9f020ca9097c 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -24,21 +24,56 @@ #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/string/memory_utils/inline_memcpy.h" -#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) #if LIBC_HAS_VECTOR_TYPE #include "src/string/memory_utils/generic/inline_strlen.h" -#elif defined(LIBC_TARGET_ARCH_IS_X86) +#endif +#if defined(LIBC_TARGET_ARCH_IS_X86) #include "src/string/memory_utils/x86_64/inline_strlen.h" -#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_NEON) +#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) #include "src/string/memory_utils/aarch64/inline_strlen.h" -#else -namespace string_length_impl = LIBC_NAMESPACE::wide_read; #endif -#endif // defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) namespace LIBC_NAMESPACE_DECL { namespace internal { +#if !LIBC_HAS_VECTOR_TYPE +// Foreword any generic vector impls to architecture specific ones +namespace arch {} +namespace generic = arch; +#endif + +namespace element { +// Element-by-element (usually a byte, but wider for wchar) implementations of +// functions that search for data. Slow, but easy to understand and analyze. + +// Returns the length of a string, denoted by the first occurrence +// of a null terminator. +LIBC_INLINE size_t string_length(const char *src) { + size_t length; + for (length = 0; *src; ++src, ++length) + ; + return length; +} + +template LIBC_INLINE size_t string_length_element(const T *src) { + size_t length; + for (length = 0; *src; ++src, ++length) + ; + return length; +} + +LIBC_INLINE void *find_first_character(const unsigned char *src, + unsigned char ch, size_t n) { + for (; n && *src != ch; --n, ++src) + ; + return n ? const_cast(src) : nullptr; +} +} // namespace element + +namespace wide { +// Generic, non-vector, implementations of functions that search for data +// by reading from memory block-by-block. + template LIBC_INLINE constexpr Word repeat_byte(Word byte) { static_assert(CHAR_BIT == 8, "repeat_byte assumes a byte is 8 bits."); constexpr size_t BITS_IN_BYTE = CHAR_BIT; @@ -74,8 +109,13 @@ template LIBC_INLINE constexpr bool has_zeroes(Word block) { return (subtracted & inverted & HIGH_BITS) != 0; } -template -LIBC_INLINE size_t string_length_wide_read(const char *src) { +// Unsigned int is the default size for most processors, and on x86-64 it +// performs better than larger sizes when the src pointer can't be assumed to +// be aligned to a word boundary, so it's the size we use for reading the +// string a block at a time. + +LIBC_INLINE size_t string_length(const char *src) { + using Word = unsigned int; const char *char_ptr = src; // Step 1: read 1 byte at a time to align to block size for (; reinterpret_cast(char_ptr) % sizeof(Word) != 0; @@ -95,37 +135,23 @@ LIBC_INLINE size_t string_length_wide_read(const char *src) { return static_cast(char_ptr - src); } -namespace wide_read { -LIBC_INLINE size_t string_length(const char *src) { - // Unsigned int is the default size for most processors, and on x86-64 it - // performs better than larger sizes when the src pointer can't be assumed to - // be aligned to a word boundary, so it's the size we use for reading the - // string a block at a time. - return string_length_wide_read(src); -} - -} // namespace wide_read - -// Returns the length of a string, denoted by the first occurrence -// of a null terminator. -template LIBC_INLINE size_t string_length(const T *src) { -#ifdef LIBC_COPT_STRING_UNSAFE_WIDE_READ - if constexpr (cpp::is_same_v) - return string_length_impl::string_length(src); -#endif - size_t length; - for (length = 0; *src; ++src, ++length) - ; - return length; -} - -template LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE void * -find_first_character_wide_read(const unsigned char *src, unsigned char ch, - size_t n) { +find_first_character(const unsigned char *src, unsigned char ch, + size_t max_strlen = cpp::numeric_limits::max()) { + using Word = unsigned int; const unsigned char *char_ptr = src; size_t cur = 0; + // If the maximum size of the string is small, the overhead of aligning to a + // word boundary and generating a bitmask of the appropriate size may be + // greater than the gains from reading larger chunks. Based on some testing, + // the crossover point between when it's faster to just read bytewise and read + // blocks is somewhere between 16 and 32, so 4 times the size of the block + // should be in that range. + if (max_strlen < (sizeof(Word) * 4)) { + return element::find_first_character(src, ch, max_strlen); + } + size_t n = max_strlen; // Step 1: read 1 byte at a time to align to block size for (; reinterpret_cast(char_ptr) % sizeof(Word) != 0 && cur < n; ++char_ptr, ++cur) { @@ -153,31 +179,35 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch, return const_cast(char_ptr); } -LIBC_INLINE void *find_first_character_byte_read(const unsigned char *src, - unsigned char ch, size_t n) { - for (; n && *src != ch; --n, ++src) - ; - return n ? const_cast(src) : nullptr; +} // namespace wide + +// Dispatch mechanism for implementations of performance-sensitive +// functions. Always measure, but generally from lower- to higher-performance +// order: +// +// 1. element - read char-by-char or wchar-by-wchar +// 3. wide - read word-by-word +// 3. generic - read using clang's internal vector types +// 4. arch - hand-coded per architecture. Possibly in asm, or with intrinsics. +// +//The called implemenation is chosen at build-time by setting +// LIBC_CONF_{FUNC}_IMPL in config.json +static constexpr auto &string_length_impl = + LIBC_COPT_STRING_LENGTH_IMPL::string_length; +static constexpr auto &find_first_character_impl = + LIBC_COPT_FIND_FIRST_CHARACTER_IMPL::find_first_character; + +template LIBC_INLINE size_t string_length(const T *src) { + if constexpr (cpp::is_same_v) + return string_length_impl(src); + return element::string_length_element(src); } // Returns the first occurrence of 'ch' within the first 'n' characters of // 'src'. If 'ch' is not found, returns nullptr. LIBC_INLINE void *find_first_character(const unsigned char *src, unsigned char ch, size_t max_strlen) { -#ifdef LIBC_COPT_STRING_UNSAFE_WIDE_READ - // If the maximum size of the string is small, the overhead of aligning to a - // word boundary and generating a bitmask of the appropriate size may be - // greater than the gains from reading larger chunks. Based on some testing, - // the crossover point between when it's faster to just read bytewise and read - // blocks is somewhere between 16 and 32, so 4 times the size of the block - // should be in that range. - // Unsigned int is used for the same reason as in strlen. - using BlockType = unsigned int; - if (max_strlen > (sizeof(BlockType) * 4)) { - return find_first_character_wide_read(src, ch, max_strlen); - } -#endif - return find_first_character_byte_read(src, ch, max_strlen); + return find_first_character_impl(src, ch, max_strlen); } // Returns the maximum length span that contains only characters not found in diff --git a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl index 259d4d292fcf4..6166f52f80f8b 100644 --- a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl +++ b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl @@ -39,7 +39,8 @@ LIBC_CONFIGURE_OPTIONS = [ # "LIBC_COPT_SCANF_DISABLE_FLOAT", # "LIBC_COPT_SCANF_DISABLE_INDEX_MODE", "LIBC_COPT_STDIO_USE_SYSTEM_FILE", - "LIBC_COPT_STRING_UNSAFE_WIDE_READ", + "LIBC_COPT_STRING_LENGTH_IMPL=generic", + "LIBC_COPT_FIND_FIRST_CHARACTER_IMPL=wide", # "LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH", # "LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE", # "LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION", From 5df3ff461f3c33c5ddd678f986feb06f58339661 Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Wed, 29 Oct 2025 12:01:35 -0700 Subject: [PATCH 02/12] Fix formatting --- libc/src/string/string_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index b9f020ca9097c..833879795a236 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -190,7 +190,7 @@ find_first_character(const unsigned char *src, unsigned char ch, // 3. generic - read using clang's internal vector types // 4. arch - hand-coded per architecture. Possibly in asm, or with intrinsics. // -//The called implemenation is chosen at build-time by setting +// The called implemenation is chosen at build-time by setting // LIBC_CONF_{FUNC}_IMPL in config.json static constexpr auto &string_length_impl = LIBC_COPT_STRING_LENGTH_IMPL::string_length; From 051f4ab0d3b5ed2078427723745e40fb5fe1c7e5 Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Fri, 31 Oct 2025 13:19:05 -0700 Subject: [PATCH 03/12] Fix aarch64 build. --- libc/src/string/memory_utils/aarch64/inline_strlen.h | 4 ++-- libc/src/string/string_utils.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h index b39df3e474669..f7385ce75a813 100644 --- a/libc/src/string/memory_utils/aarch64/inline_strlen.h +++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h @@ -16,7 +16,7 @@ namespace LIBC_NAMESPACE_DECL { -namespace arch { +namespace internal::arch { [[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t string_length(const char *src) { using Vector __attribute__((may_alias)) = uint8x8_t; @@ -44,7 +44,7 @@ string_length(const char *src) { (cpp::countr_zero(cmp) >> 3)); } } -} // namespace arch +} // namespace internal::arch } // namespace LIBC_NAMESPACE_DECL #endif // __ARM_NEON diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index 833879795a236..f4cecc5973978 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -37,7 +37,7 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { #if !LIBC_HAS_VECTOR_TYPE -// Foreword any generic vector impls to architecture specific ones +// Forward any generic vector impls to architecture specific ones namespace arch {} namespace generic = arch; #endif From 303929b6307301611b3662d1efd80e74da9f6f42 Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Tue, 18 Nov 2025 13:52:17 -0800 Subject: [PATCH 04/12] Rename namespaces to match review sugestions. laksfdj --- libc/config/config.json | 4 ++-- libc/config/linux/config.json | 4 ++-- libc/docs/configure.rst | 4 ++-- .../memory_utils/aarch64/inline_strlen.h | 4 ++-- .../memory_utils/generic/inline_strlen.h | 4 ++-- .../memory_utils/x86_64/inline_strlen.h | 4 ++-- libc/src/string/string_utils.h | 21 ++++++++++--------- .../libc/libc_configure_options.bzl | 4 ++-- 8 files changed, 25 insertions(+), 24 deletions(-) diff --git a/libc/config/config.json b/libc/config/config.json index 12596a00911e2..71384c98083c2 100644 --- a/libc/config/config.json +++ b/libc/config/config.json @@ -67,11 +67,11 @@ "string": { "LIBC_CONF_STRING_LENGTH_IMPL": { "value": "element", - "doc": "Selects the implementation for string-length: 'element', 'wide', 'generic' (vector), or 'arch'." + "doc": "Selects the implementation for string-length: 'element', 'word', 'clang_vector', or 'arch_vector'." }, "LIBC_CONF_FIND_FIRST_CHARACTER_IMPL": { "value": "element", - "doc": "Selects the implementation for find-first-character-related functions: 'element', 'wide', 'generic' (vector), or 'arch'." + "doc": "Selects the implementation for find-first-character-related functions: 'element', 'word', 'clang_vector', or 'arch_vector'." }, "LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING": { "value": false, diff --git a/libc/config/linux/config.json b/libc/config/linux/config.json index 93f5a1ef1f184..8e7db248dc1bd 100644 --- a/libc/config/linux/config.json +++ b/libc/config/linux/config.json @@ -1,10 +1,10 @@ { "string": { "LIBC_CONF_STRING_LENGTH_IMPL": { - "value": "generic", + "value": "clang_vector", }, "LIBC_CONF_FIND_FIRST_CHARACTER_IMPL": { - "value": "wide", + "value": "word", } } } diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst index 3049738aff6e7..ff1f82f932521 100644 --- a/libc/docs/configure.rst +++ b/libc/docs/configure.rst @@ -58,9 +58,9 @@ to learn about the defaults for your platform and target. * **"setjmp" options** - ``LIBC_CONF_SETJMP_AARCH64_RESTORE_PLATFORM_REGISTER``: Make setjmp save the value of x18, and longjmp restore it. The AArch64 ABI delegates this register to platform ABIs, which can choose whether to make it caller-saved. * **"string" options** - - ``LIBC_CONF_FIND_FIRST_CHARACTER_IMPL``: Selects the implementation for find-first-character-related functions: 'element', 'wide', 'generic' (vector), or 'arch'. + - ``LIBC_CONF_FIND_FIRST_CHARACTER_IMPL``: Selects the implementation for find-first-character-related functions: 'element', 'word', 'clang_vector', or 'arch_vector'. - ``LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING``: Inserts prefetch for write instructions (PREFETCHW) for memset on x86 to recover performance when hardware prefetcher is disabled. - - ``LIBC_CONF_STRING_LENGTH_IMPL``: Selects the implementation for string-length: 'element', 'wide', 'generic' (vector), or 'arch'. + - ``LIBC_CONF_STRING_LENGTH_IMPL``: Selects the implementation for string-length: 'element', 'word', 'clang_vector', or 'arch_vector'. * **"threads" options** - ``LIBC_CONF_THREAD_MODE``: The implementation used for Mutex, acceptable values are LIBC_THREAD_MODE_PLATFORM, LIBC_THREAD_MODE_SINGLE, and LIBC_THREAD_MODE_EXTERNAL. * **"time" options** diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h index f7385ce75a813..78330fe0876c4 100644 --- a/libc/src/string/memory_utils/aarch64/inline_strlen.h +++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h @@ -16,7 +16,7 @@ namespace LIBC_NAMESPACE_DECL { -namespace internal::arch { +namespace internal::arch_vector { [[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t string_length(const char *src) { using Vector __attribute__((may_alias)) = uint8x8_t; @@ -44,7 +44,7 @@ string_length(const char *src) { (cpp::countr_zero(cmp) >> 3)); } } -} // namespace internal::arch +} // namespace internal::arch_vector } // namespace LIBC_NAMESPACE_DECL #endif // __ARM_NEON diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h index 7630c0b7caedf..7a565b36617ed 100644 --- a/libc/src/string/memory_utils/generic/inline_strlen.h +++ b/libc/src/string/memory_utils/generic/inline_strlen.h @@ -14,7 +14,7 @@ #include "src/__support/common.h" namespace LIBC_NAMESPACE_DECL { -namespace generic { +namespace clang_vector { // Exploit the underlying integer representation to do a variable shift. LIBC_INLINE constexpr cpp::simd_mask shift_mask(cpp::simd_mask m, @@ -46,7 +46,7 @@ LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE size_t string_length(const char *src) { cpp::find_first_set(mask); } } -} // namespace generic +} // namespace clang_vector } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/string/memory_utils/x86_64/inline_strlen.h b/libc/src/string/memory_utils/x86_64/inline_strlen.h index 3d93960605f0c..07b4a470f0d77 100644 --- a/libc/src/string/memory_utils/x86_64/inline_strlen.h +++ b/libc/src/string/memory_utils/x86_64/inline_strlen.h @@ -15,7 +15,7 @@ namespace LIBC_NAMESPACE_DECL { -namespace internal::arch { +namespace internal::arch_vector { // Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero. template @@ -104,7 +104,7 @@ namespace avx512 { #endif } -} // namespace internal::arch +} // namespace internal::arch_vector } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index f4cecc5973978..7325bf10fb644 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -37,9 +37,9 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { #if !LIBC_HAS_VECTOR_TYPE -// Forward any generic vector impls to architecture specific ones -namespace arch {} -namespace generic = arch; +// Forward any clang vector impls to architecture specific ones +namespace arch_vector {} +namespace clang_vector = arch_vector; #endif namespace element { @@ -70,9 +70,9 @@ LIBC_INLINE void *find_first_character(const unsigned char *src, } } // namespace element -namespace wide { -// Generic, non-vector, implementations of functions that search for data -// by reading from memory block-by-block. +namespace word { +// Non-vector, implementations of functions that search for data by reading from +// memory word-by-word. template LIBC_INLINE constexpr Word repeat_byte(Word byte) { static_assert(CHAR_BIT == 8, "repeat_byte assumes a byte is 8 bits."); @@ -179,16 +179,17 @@ find_first_character(const unsigned char *src, unsigned char ch, return const_cast(char_ptr); } -} // namespace wide +} // namespace word // Dispatch mechanism for implementations of performance-sensitive // functions. Always measure, but generally from lower- to higher-performance // order: // // 1. element - read char-by-char or wchar-by-wchar -// 3. wide - read word-by-word -// 3. generic - read using clang's internal vector types -// 4. arch - hand-coded per architecture. Possibly in asm, or with intrinsics. +// 3. word - read word-by-word +// 3. clang_vector - read using clang's internal vector types +// 4. arch_vector - hand-coded per architecture. Possibly in asm, or with +// intrinsics. // // The called implemenation is chosen at build-time by setting // LIBC_CONF_{FUNC}_IMPL in config.json diff --git a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl index 6166f52f80f8b..1c2b65ea20113 100644 --- a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl +++ b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl @@ -39,8 +39,8 @@ LIBC_CONFIGURE_OPTIONS = [ # "LIBC_COPT_SCANF_DISABLE_FLOAT", # "LIBC_COPT_SCANF_DISABLE_INDEX_MODE", "LIBC_COPT_STDIO_USE_SYSTEM_FILE", - "LIBC_COPT_STRING_LENGTH_IMPL=generic", - "LIBC_COPT_FIND_FIRST_CHARACTER_IMPL=wide", + "LIBC_COPT_STRING_LENGTH_IMPL=clang_vector", + "LIBC_COPT_FIND_FIRST_CHARACTER_IMPL=word", # "LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH", # "LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE", # "LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION", From 0257a5d0edbc2913b89f71501f00444647afbc80 Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Tue, 18 Nov 2025 15:15:27 -0800 Subject: [PATCH 05/12] Move optimization routines to string_optimization.h --- libc/src/string/string_optimization.h | 212 ++++++++++++++++++++++++++ libc/src/string/string_utils.h | 191 +---------------------- 2 files changed, 213 insertions(+), 190 deletions(-) create mode 100644 libc/src/string/string_optimization.h diff --git a/libc/src/string/string_optimization.h b/libc/src/string/string_optimization.h new file mode 100644 index 0000000000000..f47ef5d7d8387 --- /dev/null +++ b/libc/src/string/string_optimization.h @@ -0,0 +1,212 @@ +//===-- String Optimization -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Basic implementation and dispatch mechanism for performance-sensitive string- +// related code. +// +//===----------------------------------------------------------------------===// + +#include "hdr/limits_macros.h" +#include "hdr/stdint_proxy.h" // uintptr_t +#include "hdr/types/size_t.h" +#include "src/__support/CPP/type_traits.h" // cpp::is_same_v + +#if LIBC_HAS_VECTOR_TYPE +#include "src/string/memory_utils/generic/inline_strlen.h" +#endif +#if defined(LIBC_TARGET_ARCH_IS_X86) +#include "src/string/memory_utils/x86_64/inline_strlen.h" +#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) +#include "src/string/memory_utils/aarch64/inline_strlen.h" +#endif + +// Set sensible defaults +#ifndef LIBC_COPT_STRING_LENGTH_IMPL +#define LIBC_COPT_STRING_LENGTH_IMPL element +#endif +#ifndef LIBC_COPT_FIND_FIRST_CHARACTER_IMPL +#define LIBC_COPT_STRING_LENGTH_IMPL element +#endif + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +#if !LIBC_HAS_VECTOR_TYPE +// Forward any clang vector impls to architecture specific ones +namespace arch_vector {} +namespace clang_vector = arch_vector; +#endif + +namespace element { +// Element-by-element (usually a byte, but wider for wchar) implementations of +// functions that search for data. Slow, but easy to understand and analyze. + +// Returns the length of a string, denoted by the first occurrence +// of a null terminator. +LIBC_INLINE size_t string_length(const char *src) { + size_t length; + for (length = 0; *src; ++src, ++length) + ; + return length; +} + +template LIBC_INLINE size_t string_length_element(const T *src) { + size_t length; + for (length = 0; *src; ++src, ++length) + ; + return length; +} + +LIBC_INLINE void *find_first_character(const unsigned char *src, + unsigned char ch, size_t n) { + for (; n && *src != ch; --n, ++src) + ; + return n ? const_cast(src) : nullptr; +} +} // namespace element + +namespace word { +// Non-vector, implementations of functions that search for data by reading from +// memory word-by-word. + +template LIBC_INLINE constexpr Word repeat_byte(Word byte) { + static_assert(CHAR_BIT == 8, "repeat_byte assumes a byte is 8 bits."); + constexpr size_t BITS_IN_BYTE = CHAR_BIT; + constexpr size_t BYTE_MASK = 0xff; + Word result = 0; + byte = byte & BYTE_MASK; + for (size_t i = 0; i < sizeof(Word); ++i) + result = (result << BITS_IN_BYTE) | byte; + return result; +} + +// The goal of this function is to take in a block of arbitrary size and return +// if it has any bytes equal to zero without branching. This is done by +// transforming the block such that zero bytes become non-zero and non-zero +// bytes become zero. +// The first transformation relies on the properties of carrying in arithmetic +// subtraction. Specifically, if 0x01 is subtracted from a byte that is 0x00, +// then the result for that byte must be equal to 0xff (or 0xfe if the next byte +// needs a carry as well). +// The next transformation is a simple mask. All zero bytes will have the high +// bit set after the subtraction, so each byte is masked with 0x80. This narrows +// the set of bytes that result in a non-zero value to only zero bytes and bytes +// with the high bit and any other bit set. +// The final transformation masks the result of the previous transformations +// with the inverse of the original byte. This means that any byte that had the +// high bit set will no longer have it set, narrowing the list of bytes which +// result in non-zero values to just the zero byte. +template LIBC_INLINE constexpr bool has_zeroes(Word block) { + constexpr unsigned int LOW_BITS = repeat_byte(0x01); + constexpr Word HIGH_BITS = repeat_byte(0x80); + Word subtracted = block - LOW_BITS; + Word inverted = ~block; + return (subtracted & inverted & HIGH_BITS) != 0; +} + +// Unsigned int is the default size for most processors, and on x86-64 it +// performs better than larger sizes when the src pointer can't be assumed to +// be aligned to a word boundary, so it's the size we use for reading the +// string a block at a time. + +LIBC_INLINE size_t string_length(const char *src) { + using Word = unsigned int; + const char *char_ptr = src; + // Step 1: read 1 byte at a time to align to block size + for (; reinterpret_cast(char_ptr) % sizeof(Word) != 0; + ++char_ptr) { + if (*char_ptr == '\0') + return static_cast(char_ptr - src); + } + // Step 2: read blocks + for (const Word *block_ptr = reinterpret_cast(char_ptr); + !has_zeroes(*block_ptr); ++block_ptr) { + char_ptr = reinterpret_cast(block_ptr); + } + // Step 3: find the zero in the block + for (; *char_ptr != '\0'; ++char_ptr) { + ; + } + return static_cast(char_ptr - src); +} + +LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE void * +find_first_character(const unsigned char *src, unsigned char ch, + size_t max_strlen = cpp::numeric_limits::max()) { + using Word = unsigned int; + const unsigned char *char_ptr = src; + size_t cur = 0; + + // If the maximum size of the string is small, the overhead of aligning to a + // word boundary and generating a bitmask of the appropriate size may be + // greater than the gains from reading larger chunks. Based on some testing, + // the crossover point between when it's faster to just read bytewise and read + // blocks is somewhere between 16 and 32, so 4 times the size of the block + // should be in that range. + if (max_strlen < (sizeof(Word) * 4)) { + return element::find_first_character(src, ch, max_strlen); + } + size_t n = max_strlen; + // Step 1: read 1 byte at a time to align to block size + for (; reinterpret_cast(char_ptr) % sizeof(Word) != 0 && cur < n; + ++char_ptr, ++cur) { + if (*char_ptr == ch) + return const_cast(char_ptr); + } + + const Word ch_mask = repeat_byte(ch); + + // Step 2: read blocks + for (const Word *block_ptr = reinterpret_cast(char_ptr); + !has_zeroes((*block_ptr) ^ ch_mask) && cur < n; + ++block_ptr, cur += sizeof(Word)) { + char_ptr = reinterpret_cast(block_ptr); + } + + // Step 3: find the match in the block + for (; *char_ptr != ch && cur < n; ++char_ptr, ++cur) { + ; + } + + if (*char_ptr != ch || cur >= n) + return static_cast(nullptr); + + return const_cast(char_ptr); +} + +} // namespace word + +// Dispatch mechanism for implementations of performance-sensitive +// functions. Always measure, but generally from lower- to higher-performance +// order: +// +// 1. element - read char-by-char or wchar-by-wchar +// 3. word - read word-by-word +// 3. clang_vector - read using clang's internal vector types +// 4. arch_vector - hand-coded per architecture. Possibly in asm, or with +// intrinsics. +// +// The called implemenation is chosen at build-time by setting +// LIBC_CONF_{FUNC}_IMPL in config.json +static constexpr auto &string_length_impl = + LIBC_COPT_STRING_LENGTH_IMPL::string_length; +static constexpr auto &find_first_character_impl = + LIBC_COPT_FIND_FIRST_CHARACTER_IMPL::find_first_character; + +template LIBC_INLINE size_t string_length(const T *src) { + if constexpr (cpp::is_same_v) + return string_length_impl(src); + return element::string_length_element(src); +} + +// Returns the first occurrence of 'ch' within the first 'n' characters of +// 'src'. If 'ch' is not found, returns nullptr. +LIBC_INLINE void *find_first_character(const unsigned char *src, + unsigned char ch, size_t max_strlen) { + return find_first_character_impl(src, ch, max_strlen); +} diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index 7325bf10fb644..262b11fc92ea0 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -14,202 +14,13 @@ #ifndef LLVM_LIBC_SRC_STRING_STRING_UTILS_H #define LLVM_LIBC_SRC_STRING_STRING_UTILS_H -#include "hdr/limits_macros.h" -#include "hdr/stdint_proxy.h" // uintptr_t #include "hdr/types/size_t.h" #include "src/__support/CPP/bitset.h" -#include "src/__support/CPP/type_traits.h" // cpp::is_same_v #include "src/__support/macros/attributes.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/string/memory_utils/inline_memcpy.h" - -#if LIBC_HAS_VECTOR_TYPE -#include "src/string/memory_utils/generic/inline_strlen.h" -#endif -#if defined(LIBC_TARGET_ARCH_IS_X86) -#include "src/string/memory_utils/x86_64/inline_strlen.h" -#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) -#include "src/string/memory_utils/aarch64/inline_strlen.h" -#endif - -namespace LIBC_NAMESPACE_DECL { -namespace internal { - -#if !LIBC_HAS_VECTOR_TYPE -// Forward any clang vector impls to architecture specific ones -namespace arch_vector {} -namespace clang_vector = arch_vector; -#endif - -namespace element { -// Element-by-element (usually a byte, but wider for wchar) implementations of -// functions that search for data. Slow, but easy to understand and analyze. - -// Returns the length of a string, denoted by the first occurrence -// of a null terminator. -LIBC_INLINE size_t string_length(const char *src) { - size_t length; - for (length = 0; *src; ++src, ++length) - ; - return length; -} - -template LIBC_INLINE size_t string_length_element(const T *src) { - size_t length; - for (length = 0; *src; ++src, ++length) - ; - return length; -} - -LIBC_INLINE void *find_first_character(const unsigned char *src, - unsigned char ch, size_t n) { - for (; n && *src != ch; --n, ++src) - ; - return n ? const_cast(src) : nullptr; -} -} // namespace element - -namespace word { -// Non-vector, implementations of functions that search for data by reading from -// memory word-by-word. - -template LIBC_INLINE constexpr Word repeat_byte(Word byte) { - static_assert(CHAR_BIT == 8, "repeat_byte assumes a byte is 8 bits."); - constexpr size_t BITS_IN_BYTE = CHAR_BIT; - constexpr size_t BYTE_MASK = 0xff; - Word result = 0; - byte = byte & BYTE_MASK; - for (size_t i = 0; i < sizeof(Word); ++i) - result = (result << BITS_IN_BYTE) | byte; - return result; -} - -// The goal of this function is to take in a block of arbitrary size and return -// if it has any bytes equal to zero without branching. This is done by -// transforming the block such that zero bytes become non-zero and non-zero -// bytes become zero. -// The first transformation relies on the properties of carrying in arithmetic -// subtraction. Specifically, if 0x01 is subtracted from a byte that is 0x00, -// then the result for that byte must be equal to 0xff (or 0xfe if the next byte -// needs a carry as well). -// The next transformation is a simple mask. All zero bytes will have the high -// bit set after the subtraction, so each byte is masked with 0x80. This narrows -// the set of bytes that result in a non-zero value to only zero bytes and bytes -// with the high bit and any other bit set. -// The final transformation masks the result of the previous transformations -// with the inverse of the original byte. This means that any byte that had the -// high bit set will no longer have it set, narrowing the list of bytes which -// result in non-zero values to just the zero byte. -template LIBC_INLINE constexpr bool has_zeroes(Word block) { - constexpr unsigned int LOW_BITS = repeat_byte(0x01); - constexpr Word HIGH_BITS = repeat_byte(0x80); - Word subtracted = block - LOW_BITS; - Word inverted = ~block; - return (subtracted & inverted & HIGH_BITS) != 0; -} - -// Unsigned int is the default size for most processors, and on x86-64 it -// performs better than larger sizes when the src pointer can't be assumed to -// be aligned to a word boundary, so it's the size we use for reading the -// string a block at a time. - -LIBC_INLINE size_t string_length(const char *src) { - using Word = unsigned int; - const char *char_ptr = src; - // Step 1: read 1 byte at a time to align to block size - for (; reinterpret_cast(char_ptr) % sizeof(Word) != 0; - ++char_ptr) { - if (*char_ptr == '\0') - return static_cast(char_ptr - src); - } - // Step 2: read blocks - for (const Word *block_ptr = reinterpret_cast(char_ptr); - !has_zeroes(*block_ptr); ++block_ptr) { - char_ptr = reinterpret_cast(block_ptr); - } - // Step 3: find the zero in the block - for (; *char_ptr != '\0'; ++char_ptr) { - ; - } - return static_cast(char_ptr - src); -} - -LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE void * -find_first_character(const unsigned char *src, unsigned char ch, - size_t max_strlen = cpp::numeric_limits::max()) { - using Word = unsigned int; - const unsigned char *char_ptr = src; - size_t cur = 0; - - // If the maximum size of the string is small, the overhead of aligning to a - // word boundary and generating a bitmask of the appropriate size may be - // greater than the gains from reading larger chunks. Based on some testing, - // the crossover point between when it's faster to just read bytewise and read - // blocks is somewhere between 16 and 32, so 4 times the size of the block - // should be in that range. - if (max_strlen < (sizeof(Word) * 4)) { - return element::find_first_character(src, ch, max_strlen); - } - size_t n = max_strlen; - // Step 1: read 1 byte at a time to align to block size - for (; reinterpret_cast(char_ptr) % sizeof(Word) != 0 && cur < n; - ++char_ptr, ++cur) { - if (*char_ptr == ch) - return const_cast(char_ptr); - } - - const Word ch_mask = repeat_byte(ch); - - // Step 2: read blocks - for (const Word *block_ptr = reinterpret_cast(char_ptr); - !has_zeroes((*block_ptr) ^ ch_mask) && cur < n; - ++block_ptr, cur += sizeof(Word)) { - char_ptr = reinterpret_cast(block_ptr); - } - - // Step 3: find the match in the block - for (; *char_ptr != ch && cur < n; ++char_ptr, ++cur) { - ; - } - - if (*char_ptr != ch || cur >= n) - return static_cast(nullptr); - - return const_cast(char_ptr); -} - -} // namespace word - -// Dispatch mechanism for implementations of performance-sensitive -// functions. Always measure, but generally from lower- to higher-performance -// order: -// -// 1. element - read char-by-char or wchar-by-wchar -// 3. word - read word-by-word -// 3. clang_vector - read using clang's internal vector types -// 4. arch_vector - hand-coded per architecture. Possibly in asm, or with -// intrinsics. -// -// The called implemenation is chosen at build-time by setting -// LIBC_CONF_{FUNC}_IMPL in config.json -static constexpr auto &string_length_impl = - LIBC_COPT_STRING_LENGTH_IMPL::string_length; -static constexpr auto &find_first_character_impl = - LIBC_COPT_FIND_FIRST_CHARACTER_IMPL::find_first_character; - -template LIBC_INLINE size_t string_length(const T *src) { - if constexpr (cpp::is_same_v) - return string_length_impl(src); - return element::string_length_element(src); -} - -// Returns the first occurrence of 'ch' within the first 'n' characters of -// 'src'. If 'ch' is not found, returns nullptr. -LIBC_INLINE void *find_first_character(const unsigned char *src, - unsigned char ch, size_t max_strlen) { - return find_first_character_impl(src, ch, max_strlen); -} +#include "src/string/string_optimization.h" // Returns the maximum length span that contains only characters not found in // 'segment'. If no characters are found, returns the length of 'src'. From 232be6458c465d389ac8fd7fc0180d278b62740c Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Wed, 19 Nov 2025 15:55:29 -0800 Subject: [PATCH 06/12] Cleanup header-syntax, rename file. --- .../{string_optimization.h => string_length.h} | 15 ++++++++------- libc/src/string/string_utils.h | 13 ++++++++++++- 2 files changed, 20 insertions(+), 8 deletions(-) rename libc/src/string/{string_optimization.h => string_length.h} (95%) diff --git a/libc/src/string/string_optimization.h b/libc/src/string/string_length.h similarity index 95% rename from libc/src/string/string_optimization.h rename to libc/src/string/string_length.h index f47ef5d7d8387..80f8ca495a3a1 100644 --- a/libc/src/string/string_optimization.h +++ b/libc/src/string/string_length.h @@ -1,4 +1,4 @@ -//===-- String Optimization -------------------------------------*- C++ -*-===// +//===-- String Length -------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,6 +11,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_SRC_STRING_STRING_LENGTH_H +#define LLVM_LIBC_SRC_STRING_STRING_LENGTH_H + #include "hdr/limits_macros.h" #include "hdr/stdint_proxy.h" // uintptr_t #include "hdr/types/size_t.h" @@ -204,9 +207,7 @@ template LIBC_INLINE size_t string_length(const T *src) { return element::string_length_element(src); } -// Returns the first occurrence of 'ch' within the first 'n' characters of -// 'src'. If 'ch' is not found, returns nullptr. -LIBC_INLINE void *find_first_character(const unsigned char *src, - unsigned char ch, size_t max_strlen) { - return find_first_character_impl(src, ch, max_strlen); -} +} // namespace internal +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_STRING_STRING_LENGTH_H diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index 262b11fc92ea0..205a124fad906 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -20,7 +20,10 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/string/memory_utils/inline_memcpy.h" -#include "src/string/string_optimization.h" +#include "src/string/string_length.h" + +namespace LIBC_NAMESPACE_DECL { +namespace internal { // Returns the maximum length span that contains only characters not found in // 'segment'. If no characters are found, returns the length of 'src'. @@ -114,6 +117,14 @@ LIBC_INLINE constexpr static char *strrchr_implementation(const char *src, } } + +// Returns the first occurrence of 'ch' within the first 'n' characters of +// 'src'. If 'ch' is not found, returns nullptr. +LIBC_INLINE void *find_first_character(const unsigned char *src, + unsigned char ch, size_t max_strlen) { + return find_first_character_impl(src, ch, max_strlen); +} + } // namespace internal } // namespace LIBC_NAMESPACE_DECL From 81edcb865c9d732181192f98fcee64d767839629 Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Thu, 20 Nov 2025 15:43:38 -0800 Subject: [PATCH 07/12] Fix formatting and rework namespace names --- .../memory_utils/aarch64/inline_strlen.h | 57 +++++++++---------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h index 50fc751868b17..b0cd6518445f6 100644 --- a/libc/src/string/memory_utils/aarch64/inline_strlen.h +++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h @@ -10,50 +10,49 @@ #include "src/__support/macros/properties/cpu_features.h" +namespace LIBC_NAMESPACE_DECL { + +namespace internal::arch_vector { #if defined(__ARM_NEON) #include "src/__support/CPP/bit.h" // countr_zero #include #include // size_t -namespace LIBC_NAMESPACE_DECL { - -namespace internal::arch_vector { namepace neon { -[[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t -string_length(const char *src) { - using Vector __attribute__((may_alias)) = uint8x8_t; + [[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t + string_length(const char *src) { + using Vector __attribute__((may_alias)) = uint8x8_t; - uintptr_t misalign_bytes = reinterpret_cast(src) % sizeof(Vector); - const Vector *block_ptr = - reinterpret_cast(src - misalign_bytes); - Vector v = *block_ptr; - Vector vcmp = vceqz_u8(v); - uint64x1_t cmp_mask = vreinterpret_u64_u8(vcmp); - uint64_t cmp = vget_lane_u64(cmp_mask, 0); - cmp = cmp >> (misalign_bytes << 3); - if (cmp) - return cpp::countr_zero(cmp) >> 3; - - while (true) { - ++block_ptr; - v = *block_ptr; - vcmp = vceqz_u8(v); - cmp_mask = vreinterpret_u64_u8(vcmp); - cmp = vget_lane_u64(cmp_mask, 0); + uintptr_t misalign_bytes = + reinterpret_cast(src) % sizeof(Vector); + const Vector *block_ptr = + reinterpret_cast(src - misalign_bytes); + Vector v = *block_ptr; + Vector vcmp = vceqz_u8(v); + uint64x1_t cmp_mask = vreinterpret_u64_u8(vcmp); + uint64_t cmp = vget_lane_u64(cmp_mask, 0); + cmp = cmp >> (misalign_bytes << 3); if (cmp) - return static_cast(reinterpret_cast(block_ptr) - - reinterpret_cast(src) + - (cpp::countr_zero(cmp) >> 3)); + return cpp::countr_zero(cmp) >> 3; + + while (true) { + ++block_ptr; + v = *block_ptr; + vcmp = vceqz_u8(v); + cmp_mask = vreinterpret_u64_u8(vcmp); + cmp = vget_lane_u64(cmp_mask, 0); + if (cmp) + return static_cast(reinterpret_cast(block_ptr) - + reinterpret_cast(src) + + (cpp::countr_zero(cmp) >> 3)); + } } -} } // namespace neon -} // namespace LIBC_NAMESPACE_DECL #endif // __ARM_NEON #ifdef LIBC_TARGET_CPU_HAS_SVE #include "src/__support/macros/optimization.h" #include -namespace LIBC_NAMESPACE_DECL { namespace sve { [[maybe_unused]] LIBC_INLINE static size_t string_length(const char *src) { const uint8_t *ptr = reinterpret_cast(src); From fb5c5ea02c9a4b6cf0c131f50e2702febb893986 Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Thu, 20 Nov 2025 15:44:03 -0800 Subject: [PATCH 08/12] Fix bazel build --- utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index bd48222856f22..5ccaadca427d1 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -5449,7 +5449,10 @@ libc_support_library( libc_support_library( name = "string_utils", - hdrs = ["src/string/string_utils.h"], + hdrs = [ + "src/string/string_utils.h", + "src/string/string_length.h", + ], deps = [ ":__support_common", ":__support_cpp_bitset", From db7168a2d3c34deb5250eeb4394f5e7f7e6ec09b Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Thu, 20 Nov 2025 15:48:49 -0800 Subject: [PATCH 09/12] More aarch64 fixes --- libc/src/string/memory_utils/aarch64/inline_strlen.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h index b0cd6518445f6..49db3df4863b2 100644 --- a/libc/src/string/memory_utils/aarch64/inline_strlen.h +++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h @@ -95,15 +95,16 @@ namespace sve { return len; } } // namespace sve -} // internal::arch_vector -} // namespace LIBC_NAMESPACE_DECL #endif // LIBC_TARGET_CPU_HAS_SVE -namespace LIBC_NAMESPACE_DECL { +[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) { #ifdef LIBC_TARGET_CPU_HAS_SVE -namespace string_length_impl = sve; + return sve::string_length(src); #elif defined(__ARM_NEON) -namespace string_length_impl = neon; + return neon::string_length(src); #endif +} + +} // internal::arch_vector } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H From 1f2d783be6f7499369f6e4efdb5a053906928db3 Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Thu, 20 Nov 2025 15:52:50 -0800 Subject: [PATCH 10/12] Formatting fixes. --- libc/src/string/memory_utils/aarch64/inline_strlen.h | 2 +- libc/src/string/string_utils.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h index 49db3df4863b2..3101a55f75ccf 100644 --- a/libc/src/string/memory_utils/aarch64/inline_strlen.h +++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h @@ -105,6 +105,6 @@ namespace sve { #endif } -} // internal::arch_vector +} // namespace internal::arch_vector } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index 205a124fad906..b0144e01a9006 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -117,7 +117,6 @@ LIBC_INLINE constexpr static char *strrchr_implementation(const char *src, } } - // Returns the first occurrence of 'ch' within the first 'n' characters of // 'src'. If 'ch' is not found, returns nullptr. LIBC_INLINE void *find_first_character(const unsigned char *src, From 9fd2e2b75213c8546bd88af10c0b077c77bd6a01 Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Thu, 20 Nov 2025 15:59:36 -0800 Subject: [PATCH 11/12] namepace --> namespace --- libc/src/string/memory_utils/aarch64/inline_strlen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h index 3101a55f75ccf..a684600b3c3ef 100644 --- a/libc/src/string/memory_utils/aarch64/inline_strlen.h +++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h @@ -17,7 +17,7 @@ namespace internal::arch_vector { #include "src/__support/CPP/bit.h" // countr_zero #include #include // size_t -namepace neon { +namespace neon { [[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t string_length(const char *src) { using Vector __attribute__((may_alias)) = uint8x8_t; From 2a464ac96b638139fd2a6fa583d832df8dd4cf3c Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Thu, 20 Nov 2025 16:41:50 -0800 Subject: [PATCH 12/12] aarch64 include pattern is load bearing. Minor bazel fix. --- .../memory_utils/aarch64/inline_strlen.h | 62 +++++++++---------- .../llvm-project-overlay/libc/BUILD.bazel | 2 +- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h index a684600b3c3ef..4fc54827b1ddf 100644 --- a/libc/src/string/memory_utils/aarch64/inline_strlen.h +++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h @@ -10,49 +10,47 @@ #include "src/__support/macros/properties/cpu_features.h" -namespace LIBC_NAMESPACE_DECL { - -namespace internal::arch_vector { #if defined(__ARM_NEON) #include "src/__support/CPP/bit.h" // countr_zero #include #include // size_t +namespace LIBC_NAMESPACE_DECL::arch_vector { namespace neon { - [[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t - string_length(const char *src) { - using Vector __attribute__((may_alias)) = uint8x8_t; +[[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t +string_length(const char *src) { + using Vector __attribute__((may_alias)) = uint8x8_t; - uintptr_t misalign_bytes = - reinterpret_cast(src) % sizeof(Vector); - const Vector *block_ptr = - reinterpret_cast(src - misalign_bytes); - Vector v = *block_ptr; - Vector vcmp = vceqz_u8(v); - uint64x1_t cmp_mask = vreinterpret_u64_u8(vcmp); - uint64_t cmp = vget_lane_u64(cmp_mask, 0); - cmp = cmp >> (misalign_bytes << 3); - if (cmp) - return cpp::countr_zero(cmp) >> 3; + uintptr_t misalign_bytes = reinterpret_cast(src) % sizeof(Vector); + const Vector *block_ptr = + reinterpret_cast(src - misalign_bytes); + Vector v = *block_ptr; + Vector vcmp = vceqz_u8(v); + uint64x1_t cmp_mask = vreinterpret_u64_u8(vcmp); + uint64_t cmp = vget_lane_u64(cmp_mask, 0); + cmp = cmp >> (misalign_bytes << 3); + if (cmp) + return cpp::countr_zero(cmp) >> 3; - while (true) { - ++block_ptr; - v = *block_ptr; - vcmp = vceqz_u8(v); - cmp_mask = vreinterpret_u64_u8(vcmp); - cmp = vget_lane_u64(cmp_mask, 0); - if (cmp) - return static_cast(reinterpret_cast(block_ptr) - - reinterpret_cast(src) + - (cpp::countr_zero(cmp) >> 3)); - } + while (true) { + ++block_ptr; + v = *block_ptr; + vcmp = vceqz_u8(v); + cmp_mask = vreinterpret_u64_u8(vcmp); + cmp = vget_lane_u64(cmp_mask, 0); + if (cmp) + return static_cast(reinterpret_cast(block_ptr) - + reinterpret_cast(src) + + (cpp::countr_zero(cmp) >> 3)); } +} } // namespace neon - +} // namespace LIBC_NAMESPACE_DECL::arch_vector #endif // __ARM_NEON #ifdef LIBC_TARGET_CPU_HAS_SVE #include "src/__support/macros/optimization.h" #include +namespace LIBC_NAMESPACE_DECL::arch_vector { namespace sve { [[maybe_unused]] LIBC_INLINE static size_t string_length(const char *src) { const uint8_t *ptr = reinterpret_cast(src); @@ -95,8 +93,10 @@ namespace sve { return len; } } // namespace sve +} // namespace LIBC_NAMESPACE_DECL::arch_vector #endif // LIBC_TARGET_CPU_HAS_SVE +namespace LIBC_NAMESPACE_DECL::arch_vector { [[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) { #ifdef LIBC_TARGET_CPU_HAS_SVE return sve::string_length(src); @@ -104,7 +104,5 @@ namespace sve { return neon::string_length(src); #endif } - -} // namespace internal::arch_vector -} // namespace LIBC_NAMESPACE_DECL +} // namespace LIBC_NAMESPACE_DECL::arch_vector #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 5ccaadca427d1..22c6d29486e00 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -5450,8 +5450,8 @@ libc_support_library( libc_support_library( name = "string_utils", hdrs = [ - "src/string/string_utils.h", "src/string/string_length.h", + "src/string/string_utils.h", ], deps = [ ":__support_common",