From 4a4f77347cd41d8c86b23e333e717a030b2e9a31 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 20 Aug 2025 17:28:45 -0500 Subject: [PATCH] Reapply "[libc] Enable wide-read memory operations by default on Linux (#154602)" This reverts commit 27fc9671f93556b2af36d028b0b47ab7edd3cc3d. --- libc/config/linux/arm/config.json | 7 +++++ libc/config/linux/config.json | 7 +++++ .../memory_utils/aarch64/inline_strlen.h | 10 ++++--- .../memory_utils/x86_64/inline_strlen.h | 26 +++++++++++-------- 4 files changed, 35 insertions(+), 15 deletions(-) create mode 100644 libc/config/linux/arm/config.json create mode 100644 libc/config/linux/config.json diff --git a/libc/config/linux/arm/config.json b/libc/config/linux/arm/config.json new file mode 100644 index 0000000000000..e7ad4544b104d --- /dev/null +++ b/libc/config/linux/arm/config.json @@ -0,0 +1,7 @@ +{ + "string": { + "LIBC_CONF_STRING_UNSAFE_WIDE_READ": { + "value": false + } + } +} diff --git a/libc/config/linux/config.json b/libc/config/linux/config.json new file mode 100644 index 0000000000000..30e8b2cdadabe --- /dev/null +++ b/libc/config/linux/config.json @@ -0,0 +1,7 @@ +{ + "string": { + "LIBC_CONF_STRING_UNSAFE_WIDE_READ": { + "value": true + } + } +} diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h index 79487f4752b83..36fd1aa636b54 100644 --- a/libc/src/string/memory_utils/aarch64/inline_strlen.h +++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h @@ -17,14 +17,16 @@ namespace LIBC_NAMESPACE_DECL { namespace neon { -[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) { +[[gnu::no_sanitize_address]] [[maybe_unused]] LIBC_INLINE static size_t +string_length(const char *src) { using Vector __attribute__((may_alias)) = uint8x8_t; uintptr_t misalign_bytes = reinterpret_cast(src) % sizeof(Vector); - Vector *block_ptr = reinterpret_cast(src - misalign_bytes); + const Vector *block_ptr = + reinterpret_cast(src - misalign_bytes); Vector v = *block_ptr; Vector vcmp = vceqz_u8(v); - uint64x1_t cmp_mask = vreinterpret_u64_s8(vcmp); + uint64x1_t cmp_mask = vreinterpret_u64_u8(vcmp); uint64_t cmp = vget_lane_u64(cmp_mask, 0); cmp = cmp >> (misalign_bytes << 3); if (cmp) @@ -34,7 +36,7 @@ namespace neon { ++block_ptr; v = *block_ptr; vcmp = vceqz_u8(v); - cmp_mask = vreinterpret_u64_s8(vcmp); + cmp_mask = vreinterpret_u64_u8(vcmp); cmp = vget_lane_u64(cmp_mask, 0); if (cmp) return static_cast(reinterpret_cast(block_ptr) - diff --git a/libc/src/string/memory_utils/x86_64/inline_strlen.h b/libc/src/string/memory_utils/x86_64/inline_strlen.h index 5eb184cbf8107..6dad6acc08928 100644 --- a/libc/src/string/memory_utils/x86_64/inline_strlen.h +++ b/libc/src/string/memory_utils/x86_64/inline_strlen.h @@ -18,22 +18,23 @@ namespace LIBC_NAMESPACE_DECL { namespace string_length_internal { // Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero. template -Mask CompareAndMask(const Vector *block_ptr); +LIBC_INLINE static Mask compare_and_mask(const Vector *block_ptr); template )> -size_t string_length_vector(const char *src) { + decltype(compare_and_mask)> +[[gnu::no_sanitize_address]] LIBC_INLINE static size_t +string_length_vector(const char *src) { uintptr_t misalign_bytes = reinterpret_cast(src) % sizeof(Vector); const Vector *block_ptr = reinterpret_cast(src - misalign_bytes); - auto cmp = CompareAndMask(block_ptr) >> misalign_bytes; + auto cmp = compare_and_mask(block_ptr) >> misalign_bytes; if (cmp) return cpp::countr_zero(cmp); while (true) { block_ptr++; - cmp = CompareAndMask(block_ptr); + cmp = compare_and_mask(block_ptr); if (cmp) return static_cast(reinterpret_cast(block_ptr) - reinterpret_cast(src) + @@ -42,7 +43,8 @@ size_t string_length_vector(const char *src) { } template <> -uint32_t CompareAndMask<__m128i, uint32_t>(const __m128i *block_ptr) { +LIBC_INLINE uint32_t +compare_and_mask<__m128i, uint32_t>(const __m128i *block_ptr) { __m128i v = _mm_load_si128(block_ptr); __m128i z = _mm_setzero_si128(); __m128i c = _mm_cmpeq_epi8(z, v); @@ -52,13 +54,14 @@ uint32_t CompareAndMask<__m128i, uint32_t>(const __m128i *block_ptr) { namespace sse2 { [[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) { return string_length_vector<__m128i, uint32_t, - CompareAndMask<__m128i, uint32_t>>(src); + compare_and_mask<__m128i, uint32_t>>(src); } } // namespace sse2 #if defined(__AVX2__) template <> -uint32_t CompareAndMask<__m256i, uint32_t>(const __m256i *block_ptr) { +LIBC_INLINE uint32_t +compare_and_mask<__m256i, uint32_t>(const __m256i *block_ptr) { __m256i v = _mm256_load_si256(block_ptr); __m256i z = _mm256_setzero_si256(); __m256i c = _mm256_cmpeq_epi8(z, v); @@ -68,14 +71,15 @@ uint32_t CompareAndMask<__m256i, uint32_t>(const __m256i *block_ptr) { namespace avx2 { [[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) { return string_length_vector<__m256i, uint32_t, - CompareAndMask<__m256i, uint32_t>>(src); + compare_and_mask<__m256i, uint32_t>>(src); } } // namespace avx2 #endif #if defined(__AVX512F__) template <> -__mmask64 CompareAndMask<__m512i, __mmask64>(const __m512i *block_ptr) { +LIBC_INLINE __mmask64 +compare_and_mask<__m512i, __mmask64>(const __m512i *block_ptr) { __m512i v = _mm512_load_si512(block_ptr); __m512i z = _mm512_setzero_si512(); return _mm512_cmp_epu8_mask(z, v, _MM_CMPINT_EQ); @@ -83,7 +87,7 @@ __mmask64 CompareAndMask<__m512i, __mmask64>(const __m512i *block_ptr) { namespace avx512 { [[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) { return string_length_vector<__m512i, __mmask64, - CompareAndMask<__m512i, __mmask64>>(src); + compare_and_mask<__m512i, __mmask64>>(src); } } // namespace avx512 #endif