Skip to content

Commit 863ad74

Browse files
Address some comments
1 parent fd74316 commit 863ad74

File tree

4 files changed

+71
-109
lines changed

4 files changed

+71
-109
lines changed

libc/src/string/inline_strlen.h

Lines changed: 0 additions & 38 deletions
This file was deleted.

libc/src/string/memory_utils/aarch64/inline_strlen.h

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,43 +8,44 @@
88
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
99
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
1010

11+
#if defined(__ARM_NEON)
12+
#include "src/__support/CPP/bit.h" // countr_zero
13+
1114
#include <arm_neon.h>
1215
#include <stddef.h> // size_t
1316

1417
namespace LIBC_NAMESPACE_DECL {
15-
16-
size_t string_length_neon(const char *src) {
18+
[[maybe_unused]] LIBC_INLINE size_t string_length_neon(const char *src) {
1719
using Vector __attribute__((may_alias)) = uint8x8_t;
18-
uintptr_t misalign_bytes = reinterpret_case<uintptr_t>(src) % sizeof(Vector);
20+
21+
uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
1922
Vector *block_ptr = reinterpret_cast<Vector *>(src - misalign_bytes);
20-
if (misalign_bytes) {
21-
Vector v = *block_ptr;
22-
Vector vcmp = vceqz_u8(v);
23-
uint64x1_t cmp_mask = vreinterpret_u64_s8(vcmp);
24-
uint64_t cmp = vget_lane_u64(cmp_mask, 0);
25-
cmp = cmp >> (misalign_bytes << 3);
26-
if (cmp)
27-
return __builtin_ctzl(cmp) >> 3;
28-
++block_ptr;
29-
}
23+
Vector v = *block_ptr;
24+
Vector vcmp = vceqz_u8(v);
25+
uint64x1_t cmp_mask = vreinterpret_u64_s8(vcmp);
26+
uint64_t cmp = vget_lane_u64(cmp_mask, 0);
27+
cmp = cmp >> (misalign_bytes << 3);
28+
if (cmp)
29+
return cpp::countr_zero(cmp) >> 3;
30+
3031
while (true) {
31-
Vector v = *block_ptr;
32-
Vector vcmp = vceqz_u8(v);
33-
uint64x1_t cmp_mask = vreinterpret_u64_s8(vcmp);
34-
uint64_t cmp = vget_lane_u64(cmp_mask, 0);
32+
++block_ptr;
33+
v = *block_ptr;
34+
vcmp = vceqz_u8(v);
35+
cmp_mask = vreinterpret_u64_s8(vcmp);
36+
cmp = vget_lane_u64(cmp_mask, 0);
3537
if (cmp)
36-
return static_cast<size_t>(reinterpret_case<uintptr_t>(block_ptr) -
37-
reinterpret_case<uintptr_t>(src) +
38-
(__builtin_ctzl(cmp) >> 3));
39-
block_ptr++;
38+
return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
39+
reinterpret_cast<uintptr_t>(src) +
40+
(cpp::countr_zero(cmp) >> 3));
4041
}
4142
}
4243

4344
template <typename T>
4445
[[maybe_unused]] LIBC_INLINE void string_length_aarch64(const char *src) {
4546
return inline_string_length_neon(src);
4647
}
47-
4848
} // namespace LIBC_NAMESPACE_DECL
4949

50+
#endif // __ARM_NEON
5051
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H

libc/src/string/memory_utils/x86_64/inline_strlen.h

Lines changed: 40 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -8,105 +8,102 @@
88
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
99
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
1010

11+
#include "src/__support/CPP/bit.h" // countr_zero
1112
#include "src/string/memory_utils/op_x86.h" // K_AVX
1213

1314
#include <stddef.h> // size_t
14-
#include <x86intrin.h>
15+
1516
namespace LIBC_NAMESPACE_DECL {
1617

17-
#if defined(__SSE2__)
1818
[[maybe_unused]] LIBC_INLINE size_t string_length_sse2(const char *src) {
1919
using Vector __attribute__((may_alias)) = __m128i;
20+
2021
Vector z = _mm_setzero_si128();
2122
uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
2223
const Vector *block_ptr =
2324
reinterpret_cast<const Vector *>(src - misalign_bytes);
24-
if (misalign_bytes) {
25-
Vector v = _mm_load_si128(block_ptr);
26-
Vector vcmp = _mm_cmpeq_epi8(z, v);
27-
// shift away results in irrelevant bytes.
28-
int cmp = _mm_movemask_epi8(vcmp) >> misalign_bytes;
29-
if (cmp)
30-
return __builtin_ctz(cmp);
31-
block_ptr++;
32-
}
25+
Vector v = _mm_load_si128(block_ptr);
26+
Vector vcmp = _mm_cmpeq_epi8(z, v);
27+
// shift away results in irrelevant bytes.
28+
uint32_t cmp = _mm_movemask_epi8(vcmp) >> misalign_bytes;
29+
if (cmp)
30+
return cpp::countr_zero(cmp);
31+
3332
while (true) {
34-
Vector v = _mm_load_si128(block_ptr);
35-
Vector vcmp = _mm_cmpeq_epi8(z, v);
36-
int cmp = _mm_movemask_epi8(vcmp);
33+
block_ptr++;
34+
v = _mm_load_si128(block_ptr);
35+
vcmp = _mm_cmpeq_epi8(z, v);
36+
cmp = _mm_movemask_epi8(vcmp);
3737
if (cmp)
3838
return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
3939
reinterpret_cast<uintptr_t>(src) +
40-
__builtin_ctz(cmp));
41-
block_ptr++;
40+
cpp::countr_zero(cmp));
4241
}
4342
}
44-
#endif
4543

4644
#if defined(__AVX2__)
4745
[[maybe_unused]] LIBC_INLINE size_t string_length_avx2(const char *src) {
4846
using Vector __attribute__((may_alias)) = __mm256i;
47+
4948
Vector z = _mm256_setzero_si256();
5049
uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
5150
const Vector *block_ptr =
5251
reinterpret_cast<const Vector *>(src - misalign_bytes);
53-
if (misalign_bytes) {
54-
Vector v = _mm256_load_si256(block_ptr);
55-
Vector vcmp = _mm256_cmpeq_epi8(z, v);
56-
// shift away results in irrelevant bytes.
57-
int cmp = _mm256_movemask_epi8(vcmp) >> misalign_bytes;
58-
if (cmp)
59-
return __builtin_ctz(cmp);
60-
block_ptr++;
61-
}
52+
Vector v = _mm256_load_si256(block_ptr);
53+
Vector vcmp = _mm256_cmpeq_epi8(z, v);
54+
// shift away results in irrelevant bytes.
55+
int cmp = _mm256_movemask_epi8(vcmp) >> misalign_bytes;
56+
if (cmp)
57+
return cpp::countr_zero(cmp);
58+
6259
while (true) {
63-
Vector v = _mm256_load_si256(block_ptr);
64-
Vector vcmp = _mm256_cmpeq_epi8(z, v);
65-
int cmp = _mm256_movemask_epi8(vcmp);
60+
block_ptr++;
61+
v = _mm256_load_si256(block_ptr);
62+
vcmp = _mm256_cmpeq_epi8(z, v);
63+
cmp = _mm256_movemask_epi8(vcmp);
6664
if (cmp)
6765
return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
6866
reinterpret_cast<uintptr_t>(src) +
69-
__builtin_ctz(cmp));
70-
block_ptr++;
67+
cpp::countr_zero(cmp));
7168
}
7269
}
73-
#endif // __AVX__
70+
#endif // __AVX2__
7471

7572
#if defined(__AVX512F__)
7673
[[maybe_unused]] LIBC_INLINE size_t string_length_avx512(const char *src) {
7774
using Vector __attribute__((may_alias)) = __mm512i;
75+
7876
Vector z = _mm512_setzero_si512();
7977
uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
8078
const Vector *block_ptr =
8179
reinterpret_cast<const Vector *>(src - misalign_bytes);
82-
if (misalign_bytes) {
83-
Vector v = _mm512_load_si512(block_ptr);
84-
__mmask64 cmp = _mm512_cmp_epu8_mask(z, v, _MM_CMPINT_EQ) >> misalign_bytes;
85-
if (cmp)
86-
return __builtin_ctzl(cmp);
87-
block_ptr++;
88-
}
80+
Vector v = _mm512_load_si512(block_ptr);
81+
__mmask64 cmp = _mm512_cmp_epu8_mask(z, v, _MM_CMPINT_EQ) >> misalign_bytes;
82+
if (cmp)
83+
return cpp::countr_zero(cmp);
84+
8985
while (true) {
86+
block_ptr++;
9087
Vector v = _mm512_load_si512(block_ptr);
9188
__mmask64 cmp = _mm512_cmp_epu8_mask(z, v, _MM_CMPINT_EQ);
9289
if (cmp)
9390
return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
9491
reinterpret_cast<uintptr_t>(src) +
95-
__builtin_ctz(cmp));
96-
block_ptr++;
92+
cpp::countr_zero(cmp));
9793
}
9894
}
9995
#endif // __AVX512F__
10096

97+
namespace x86 {
10198
template <typename T> LIBC_INLINE size_t string_length_x86_64(const char *src) {
10299
#if defined(__AVX512F__)
103100
return string_length_avx512(src);
104-
#endif
105-
#if defined(__AVX__)
101+
#elif defined(__AVX2__)
106102
return string_length_avx2(src);
107103
#endif
108104
return string_length_sse2(src);
109105
}
106+
}
110107

111108
} // namespace LIBC_NAMESPACE_DECL
112109

libc/src/string/string_utils.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@
2525
#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
2626
#if defined(LIBC_TARGET_ARCH_IS_X86)
2727
#include "src/string/memory_utils/x86_64/inline_strlen.h"
28-
#define LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ string_length_x86_64
29-
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
28+
namespace wide_read_impl = x86;
29+
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_NEON)
3030
#include "src/string/memory_utils/aarch64/inline_strlen.h"
31-
#define LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ string_length_aarch64
31+
namespace wide_read_impl = aarch64;
3232
#else
33-
#define LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ string_length_wide_read
33+
namespace wide_read_impl = default_wide_read;
3434
#endif
3535
#endif
3636

@@ -65,13 +65,14 @@ template <typename Word> LIBC_INLINE constexpr Word repeat_byte(Word byte) {
6565
// high bit set will no longer have it set, narrowing the list of bytes which
6666
// result in non-zero values to just the zero byte.
6767
template <typename Word> LIBC_INLINE constexpr bool has_zeroes(Word block) {
68-
constexpr Word LOW_BITS = repeat_byte<Word>(0x01);
68+
constexpr unsigned int LOW_BITS = repeat_byte<Word>(0x01);
6969
constexpr Word HIGH_BITS = repeat_byte<Word>(0x80);
7070
Word subtracted = block - LOW_BITS;
7171
Word inverted = ~block;
7272
return (subtracted & inverted & HIGH_BITS) != 0;
7373
}
7474

75+
namespace default_wide_read {
7576
template <typename Word>
7677
LIBC_INLINE size_t string_length_wide_read(const char *src) {
7778
const char *char_ptr = src;
@@ -92,6 +93,7 @@ LIBC_INLINE size_t string_length_wide_read(const char *src) {
9293
}
9394
return static_cast<size_t>(char_ptr - src);
9495
}
96+
} // namespace default_wide_read
9597

9698
// Returns the length of a string, denoted by the first occurrence
9799
// of a null terminator.
@@ -102,7 +104,7 @@ template <typename T> LIBC_INLINE size_t string_length(const T *src) {
102104
// be aligned to a word boundary, so it's the size we use for reading the
103105
// string a block at a time.
104106
if constexpr (cpp::is_same_v<T, char>)
105-
return LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ<unsigned int>(src);
107+
return wide_read_impl::string_length(src);
106108
#endif
107109
size_t length;
108110
for (length = 0; *src; ++src, ++length)

0 commit comments

Comments
 (0)