Skip to content

Commit 3fda3ee

Browse files
committed
[libc] Enable wide-read memory operations by default on Linux
Summary: This patch changes the linux build to use the wide reads on the memory operations by default. These memory functions will now potentially read outside of the bounds explicitly allowed by the current function. While technically undefined behavior in the standard, plenty of C library implementations do this. it will not cause a segmentation fault on linux as long as you do not cross a page boundary, and because we are only *reading* memory it should not have atomic effects.
1 parent 8d7b50e commit 3fda3ee

File tree

3 files changed

+25
-14
lines changed

3 files changed

+25
-14
lines changed

libc/config/linux/config.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"string": {
3+
"LIBC_CONF_STRING_UNSAFE_WIDE_READ": {
4+
"value": true
5+
}
6+
}
7+
}

libc/src/string/memory_utils/aarch64/inline_strlen.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@
1717
namespace LIBC_NAMESPACE_DECL {
1818

1919
namespace neon {
20-
[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
20+
[[maybe_unused]] LIBC_INLINE static size_t string_length(const char *src) {
2121
using Vector __attribute__((may_alias)) = uint8x8_t;
2222

2323
uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
24-
Vector *block_ptr = reinterpret_cast<Vector *>(src - misalign_bytes);
24+
const Vector *block_ptr =
25+
reinterpret_cast<const Vector *>(src - misalign_bytes);
2526
Vector v = *block_ptr;
2627
Vector vcmp = vceqz_u8(v);
27-
uint64x1_t cmp_mask = vreinterpret_u64_s8(vcmp);
28+
uint64x1_t cmp_mask = vreinterpret_u64_u8(vcmp);
2829
uint64_t cmp = vget_lane_u64(cmp_mask, 0);
2930
cmp = cmp >> (misalign_bytes << 3);
3031
if (cmp)
@@ -34,7 +35,7 @@ namespace neon {
3435
++block_ptr;
3536
v = *block_ptr;
3637
vcmp = vceqz_u8(v);
37-
cmp_mask = vreinterpret_u64_s8(vcmp);
38+
cmp_mask = vreinterpret_u64_u8(vcmp);
3839
cmp = vget_lane_u64(cmp_mask, 0);
3940
if (cmp)
4041
return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -

libc/src/string/memory_utils/x86_64/inline_strlen.h

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,22 @@ namespace LIBC_NAMESPACE_DECL {
1818
namespace string_length_internal {
1919
// Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero.
2020
template <typename Vector, typename Mask>
21-
Mask CompareAndMask(const Vector *block_ptr);
21+
LIBC_INLINE static Mask compare_and_mask(const Vector *block_ptr);
2222

2323
template <typename Vector, typename Mask,
24-
decltype(CompareAndMask<Vector, Mask>)>
24+
decltype(compare_and_mask<Vector, Mask>)>
2525
size_t string_length_vector(const char *src) {
2626
uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
2727

2828
const Vector *block_ptr =
2929
reinterpret_cast<const Vector *>(src - misalign_bytes);
30-
auto cmp = CompareAndMask<Vector, Mask>(block_ptr) >> misalign_bytes;
30+
auto cmp = compare_and_mask<Vector, Mask>(block_ptr) >> misalign_bytes;
3131
if (cmp)
3232
return cpp::countr_zero(cmp);
3333

3434
while (true) {
3535
block_ptr++;
36-
cmp = CompareAndMask<Vector, Mask>(block_ptr);
36+
cmp = compare_and_mask<Vector, Mask>(block_ptr);
3737
if (cmp)
3838
return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
3939
reinterpret_cast<uintptr_t>(src) +
@@ -42,7 +42,8 @@ size_t string_length_vector(const char *src) {
4242
}
4343

4444
template <>
45-
uint32_t CompareAndMask<__m128i, uint32_t>(const __m128i *block_ptr) {
45+
LIBC_INLINE uint32_t
46+
compare_and_mask<__m128i, uint32_t>(const __m128i *block_ptr) {
4647
__m128i v = _mm_load_si128(block_ptr);
4748
__m128i z = _mm_setzero_si128();
4849
__m128i c = _mm_cmpeq_epi8(z, v);
@@ -52,13 +53,14 @@ uint32_t CompareAndMask<__m128i, uint32_t>(const __m128i *block_ptr) {
5253
namespace sse2 {
5354
[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
5455
return string_length_vector<__m128i, uint32_t,
55-
CompareAndMask<__m128i, uint32_t>>(src);
56+
compare_and_mask<__m128i, uint32_t>>(src);
5657
}
5758
} // namespace sse2
5859

5960
#if defined(__AVX2__)
6061
template <>
61-
uint32_t CompareAndMask<__m256i, uint32_t>(const __m256i *block_ptr) {
62+
LIBC_INLINE
63+
uint32_t compare_and_mask<__m256i, uint32_t>(const __m256i *block_ptr) {
6264
__m256i v = _mm256_load_si256(block_ptr);
6365
__m256i z = _mm256_setzero_si256();
6466
__m256i c = _mm256_cmpeq_epi8(z, v);
@@ -68,22 +70,23 @@ uint32_t CompareAndMask<__m256i, uint32_t>(const __m256i *block_ptr) {
6870
namespace avx2 {
6971
[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
7072
return string_length_vector<__m256i, uint32_t,
71-
CompareAndMask<__m256i, uint32_t>>(src);
73+
compare_and_mask<__m256i, uint32_t>>(src);
7274
}
7375
} // namespace avx2
7476
#endif
7577

7678
#if defined(__AVX512F__)
7779
template <>
78-
__mmask64 CompareAndMask<__m512i, __mmask64>(const __m512i *block_ptr) {
80+
LIBC_INLINE
81+
__mmask64 compare_and_mask<__m512i, __mmask64>(const __m512i *block_ptr) {
7982
__m512i v = _mm512_load_si512(block_ptr);
8083
__m512i z = _mm512_setzero_si512();
8184
return _mm512_cmp_epu8_mask(z, v, _MM_CMPINT_EQ);
8285
}
8386
namespace avx512 {
8487
[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
8588
return string_length_vector<__m512i, __mmask64,
86-
CompareAndMask<__m512i, __mmask64>>(src);
89+
compare_and_mask<__m512i, __mmask64>>(src);
8790
}
8891
} // namespace avx512
8992
#endif

0 commit comments

Comments
 (0)