88#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
99#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
1010
11- #include " src/string/memory_utils/op_x86.h" // K_AVX
11+ #include " src/string/memory_utils/op_x86.h" // K_AVX
1212
1313#include < stddef.h> // size_t
1414#include < x86intrin.h>
@@ -19,28 +19,27 @@ namespace LIBC_NAMESPACE_DECL {
1919 using Vector __attribute__ ((may_alias)) = __m128i;
2020 Vector z = _mm_setzero_si128 ();
2121 uintptr_t misalign_bytes = reinterpret_cast <uintptr_t >(src) % sizeof (Vector);
22- const Vector *block_ptr = reinterpret_cast <const Vector *>(src - misalign_bytes);
23- if (misalign_bytes)
24- {
25- Vector v = _mm_load_si128 (block_ptr);
26- Vector vcmp = _mm_cmpeq_epi8 (z, v);
27- // shift away results in irrelevant bytes.
28- int cmp = _mm_movemask_epi8 (vcmp) >> misalign_bytes;
29- if (cmp)
30- return __builtin_ctz (cmp);
31- block_ptr++;
32- }
33- while (true )
34- {
35- Vector v = _mm_load_si128 (block_ptr);
36- Vector vcmp = _mm_cmpeq_epi8 (z, v);
37- int cmp = _mm_movemask_epi8 (vcmp);
38- if (cmp)
39- return static_cast <size_t >(reinterpret_cast <uintptr_t >(block_ptr) -
40- reinterpret_cast <uintptr_t >(src) +
41- __builtin_ctz (cmp));
42- block_ptr++;
43- }
22+ const Vector *block_ptr =
23+ reinterpret_cast <const Vector *>(src - misalign_bytes);
24+ if (misalign_bytes) {
25+ Vector v = _mm_load_si128 (block_ptr);
26+ Vector vcmp = _mm_cmpeq_epi8 (z, v);
27+ // shift away results in irrelevant bytes.
28+ int cmp = _mm_movemask_epi8 (vcmp) >> misalign_bytes;
29+ if (cmp)
30+ return __builtin_ctz (cmp);
31+ block_ptr++;
32+ }
33+ while (true ) {
34+ Vector v = _mm_load_si128 (block_ptr);
35+ Vector vcmp = _mm_cmpeq_epi8 (z, v);
36+ int cmp = _mm_movemask_epi8 (vcmp);
37+ if (cmp)
38+ return static_cast <size_t >(reinterpret_cast <uintptr_t >(block_ptr) -
39+ reinterpret_cast <uintptr_t >(src) +
40+ __builtin_ctz (cmp));
41+ block_ptr++;
42+ }
4443}
4544#endif
4645
@@ -49,59 +48,57 @@ namespace LIBC_NAMESPACE_DECL {
4948 using Vector __attribute__ ((may_alias)) = __mm256i;
5049 Vector z = _mm256_setzero_si256 ();
5150 uintptr_t misalign_bytes = reinterpret_cast <uintptr_t >(src) % sizeof (Vector);
52- const Vector *block_ptr = reinterpret_cast <const Vector *>(src - misalign_bytes);
53- if (misalign_bytes)
54- {
55- Vector v = _mm256_load_si256 (block_ptr);
56- Vector vcmp = _mm256_cmpeq_epi8 (z, v);
57- // shift away results in irrelevant bytes.
58- int cmp = _mm256_movemask_epi8 (vcmp) >> misalign_bytes;
59- if (cmp)
60- return __builtin_ctz (cmp);
61- block_ptr++;
51+ const Vector *block_ptr =
52+ reinterpret_cast <const Vector *>(src - misalign_bytes);
53+ if (misalign_bytes) {
54+ Vector v = _mm256_load_si256 (block_ptr);
55+ Vector vcmp = _mm256_cmpeq_epi8 (z, v);
56+ // shift away results in irrelevant bytes.
57+ int cmp = _mm256_movemask_epi8 (vcmp) >> misalign_bytes;
58+ if (cmp)
59+ return __builtin_ctz (cmp);
60+ block_ptr++;
61+ }
62+ while (true ) {
63+ Vector v = _mm256_load_si256 (block_ptr);
64+ Vector vcmp = _mm256_cmpeq_epi8 (z, v);
65+ int cmp = _mm256_movemask_epi8 (vcmp);
66+ if (cmp)
67+ return static_cast <size_t >(reinterpret_cast <uintptr_t >(block_ptr) -
68+ reinterpret_cast <uintptr_t >(src) +
69+ __builtin_ctz (cmp));
70+ block_ptr++;
6271 }
63- while (true )
64- {
65- Vector v = _mm256_load_si256 (block_ptr);
66- Vector vcmp = _mm256_cmpeq_epi8 (z, v);
67- int cmp = _mm256_movemask_epi8 (vcmp);
68- if (cmp)
69- return static_cast <size_t >(reinterpret_cast <uintptr_t >(block_ptr) -
70- reinterpret_cast <uintptr_t >(src) +
71- __builtin_ctz (cmp));
72- block_ptr++;
73- }
7472}
75- #endif // __AVX__
73+ #endif // __AVX__
7674
7775#if defined(__AVX512F__)
7876[[maybe_unused]] LIBC_INLINE size_t string_length_avx512 (const char *src) {
7977 using Vector __attribute__ ((may_alias)) = __mm512i;
8078 Vector z = _mm512_setzero_si512 ();
8179 uintptr_t misalign_bytes = reinterpret_cast <uintptr_t >(src) % sizeof (Vector);
82- const Vector *block_ptr = reinterpret_cast <const Vector *>(src - misalign_bytes);
80+ const Vector *block_ptr =
81+ reinterpret_cast <const Vector *>(src - misalign_bytes);
8382 if (misalign_bytes) {
8483 Vector v = _mm512_load_si512 (block_ptr);
8584 __mmask64 cmp = _mm512_cmp_epu8_mask (z, v, _MM_CMPINT_EQ) >> misalign_bytes;
8685 if (cmp)
8786 return __builtin_ctzl (cmp);
8887 block_ptr++;
8988 }
90- while (true )
91- {
89+ while (true ) {
9290 Vector v = _mm512_load_si512 (block_ptr);
9391 __mmask64 cmp = _mm512_cmp_epu8_mask (z, v, _MM_CMPINT_EQ);
9492 if (cmp)
9593 return static_cast <size_t >(reinterpret_cast <uintptr_t >(block_ptr) -
9694 reinterpret_cast <uintptr_t >(src) +
9795 __builtin_ctz (cmp));
9896 block_ptr++;
99- }
97+ }
10098}
101- #endif // __AVX512F__
99+ #endif // __AVX512F__
102100
103- template <typename T> LIBC_INLINE
104- size_t string_length_x86_64 (const char *src) {
101+ template <typename T> LIBC_INLINE size_t string_length_x86_64 (const char *src) {
105102#if defined(__AVX512F__)
106103 return string_length_avx512 (src);
107104#endif
0 commit comments