diff --git a/libc/config/linux/config.json b/libc/config/linux/config.json new file mode 100644 index 0000000000000..30e8b2cdadabe --- /dev/null +++ b/libc/config/linux/config.json @@ -0,0 +1,7 @@ +{ + "string": { + "LIBC_CONF_STRING_UNSAFE_WIDE_READ": { + "value": true + } + } +} diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h index 79487f4752b83..ba28b1894e67f 100644 --- a/libc/src/string/memory_utils/aarch64/inline_strlen.h +++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h @@ -17,14 +17,15 @@ namespace LIBC_NAMESPACE_DECL { namespace neon { -[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) { +[[maybe_unused]] LIBC_INLINE static size_t string_length(const char *src) { using Vector __attribute__((may_alias)) = uint8x8_t; uintptr_t misalign_bytes = reinterpret_cast(src) % sizeof(Vector); - Vector *block_ptr = reinterpret_cast(src - misalign_bytes); + const Vector *block_ptr = + reinterpret_cast(src - misalign_bytes); Vector v = *block_ptr; Vector vcmp = vceqz_u8(v); - uint64x1_t cmp_mask = vreinterpret_u64_s8(vcmp); + uint64x1_t cmp_mask = vreinterpret_u64_u8(vcmp); uint64_t cmp = vget_lane_u64(cmp_mask, 0); cmp = cmp >> (misalign_bytes << 3); if (cmp) @@ -34,7 +35,7 @@ namespace neon { ++block_ptr; v = *block_ptr; vcmp = vceqz_u8(v); - cmp_mask = vreinterpret_u64_s8(vcmp); + cmp_mask = vreinterpret_u64_u8(vcmp); cmp = vget_lane_u64(cmp_mask, 0); if (cmp) return static_cast(reinterpret_cast(block_ptr) - diff --git a/libc/src/string/memory_utils/x86_64/inline_strlen.h b/libc/src/string/memory_utils/x86_64/inline_strlen.h index 5eb184cbf8107..379fbc11af8cd 100644 --- a/libc/src/string/memory_utils/x86_64/inline_strlen.h +++ b/libc/src/string/memory_utils/x86_64/inline_strlen.h @@ -18,22 +18,22 @@ namespace LIBC_NAMESPACE_DECL { namespace string_length_internal { // Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero. template -Mask CompareAndMask(const Vector *block_ptr); +LIBC_INLINE static Mask compare_and_mask(const Vector *block_ptr); template )> + decltype(compare_and_mask)> size_t string_length_vector(const char *src) { uintptr_t misalign_bytes = reinterpret_cast(src) % sizeof(Vector); const Vector *block_ptr = reinterpret_cast(src - misalign_bytes); - auto cmp = CompareAndMask(block_ptr) >> misalign_bytes; + auto cmp = compare_and_mask(block_ptr) >> misalign_bytes; if (cmp) return cpp::countr_zero(cmp); while (true) { block_ptr++; - cmp = CompareAndMask(block_ptr); + cmp = compare_and_mask(block_ptr); if (cmp) return static_cast(reinterpret_cast(block_ptr) - reinterpret_cast(src) + @@ -42,7 +42,8 @@ size_t string_length_vector(const char *src) { } template <> -uint32_t CompareAndMask<__m128i, uint32_t>(const __m128i *block_ptr) { +LIBC_INLINE uint32_t +compare_and_mask<__m128i, uint32_t>(const __m128i *block_ptr) { __m128i v = _mm_load_si128(block_ptr); __m128i z = _mm_setzero_si128(); __m128i c = _mm_cmpeq_epi8(z, v); @@ -52,13 +53,14 @@ uint32_t CompareAndMask<__m128i, uint32_t>(const __m128i *block_ptr) { namespace sse2 { [[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) { return string_length_vector<__m128i, uint32_t, - CompareAndMask<__m128i, uint32_t>>(src); + compare_and_mask<__m128i, uint32_t>>(src); } } // namespace sse2 #if defined(__AVX2__) template <> -uint32_t CompareAndMask<__m256i, uint32_t>(const __m256i *block_ptr) { +LIBC_INLINE uint32_t +compare_and_mask<__m256i, uint32_t>(const __m256i *block_ptr) { __m256i v = _mm256_load_si256(block_ptr); __m256i z = _mm256_setzero_si256(); __m256i c = _mm256_cmpeq_epi8(z, v); @@ -68,14 +70,15 @@ uint32_t CompareAndMask<__m256i, uint32_t>(const __m256i *block_ptr) { namespace avx2 { [[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) { return string_length_vector<__m256i, uint32_t, - CompareAndMask<__m256i, uint32_t>>(src); + compare_and_mask<__m256i, uint32_t>>(src); } } // namespace avx2 #endif #if defined(__AVX512F__) template <> -__mmask64 CompareAndMask<__m512i, __mmask64>(const __m512i *block_ptr) { +LIBC_INLINE __mmask64 +compare_and_mask<__m512i, __mmask64>(const __m512i *block_ptr) { __m512i v = _mm512_load_si512(block_ptr); __m512i z = _mm512_setzero_si512(); return _mm512_cmp_epu8_mask(z, v, _MM_CMPINT_EQ); @@ -83,7 +86,7 @@ __mmask64 CompareAndMask<__m512i, __mmask64>(const __m512i *block_ptr) { namespace avx512 { [[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) { return string_length_vector<__m512i, __mmask64, - CompareAndMask<__m512i, __mmask64>>(src); + compare_and_mask<__m512i, __mmask64>>(src); } } // namespace avx512 #endif