@@ -18,22 +18,23 @@ namespace LIBC_NAMESPACE_DECL {
18
18
namespace string_length_internal {
19
19
// Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero.
20
20
template <typename Vector, typename Mask>
21
- Mask CompareAndMask (const Vector *block_ptr);
21
+ LIBC_INLINE static Mask compare_and_mask (const Vector *block_ptr);
22
22
23
23
template <typename Vector, typename Mask,
24
- decltype (CompareAndMask<Vector, Mask>)>
25
- size_t string_length_vector (const char *src) {
24
+ decltype (compare_and_mask<Vector, Mask>)>
25
+ LIBC_INLINE static [[clang::no_sanitize(" address" )]] size_t
26
+ string_length_vector (const char *src) {
26
27
uintptr_t misalign_bytes = reinterpret_cast <uintptr_t >(src) % sizeof (Vector);
27
28
28
29
const Vector *block_ptr =
29
30
reinterpret_cast <const Vector *>(src - misalign_bytes);
30
- auto cmp = CompareAndMask <Vector, Mask>(block_ptr) >> misalign_bytes;
31
+ auto cmp = compare_and_mask <Vector, Mask>(block_ptr) >> misalign_bytes;
31
32
if (cmp)
32
33
return cpp::countr_zero (cmp);
33
34
34
35
while (true ) {
35
36
block_ptr++;
36
- cmp = CompareAndMask <Vector, Mask>(block_ptr);
37
+ cmp = compare_and_mask <Vector, Mask>(block_ptr);
37
38
if (cmp)
38
39
return static_cast <size_t >(reinterpret_cast <uintptr_t >(block_ptr) -
39
40
reinterpret_cast <uintptr_t >(src) +
@@ -42,7 +43,8 @@ size_t string_length_vector(const char *src) {
42
43
}
43
44
44
45
template <>
45
- uint32_t CompareAndMask<__m128i, uint32_t >(const __m128i *block_ptr) {
46
+ LIBC_INLINE uint32_t
47
+ compare_and_mask<__m128i, uint32_t >(const __m128i *block_ptr) {
46
48
__m128i v = _mm_load_si128 (block_ptr);
47
49
__m128i z = _mm_setzero_si128 ();
48
50
__m128i c = _mm_cmpeq_epi8 (z, v);
@@ -52,13 +54,14 @@ uint32_t CompareAndMask<__m128i, uint32_t>(const __m128i *block_ptr) {
52
54
namespace sse2 {
53
55
[[maybe_unused]] LIBC_INLINE size_t string_length (const char *src) {
54
56
return string_length_vector<__m128i, uint32_t ,
55
- CompareAndMask <__m128i, uint32_t >>(src);
57
+ compare_and_mask <__m128i, uint32_t >>(src);
56
58
}
57
59
} // namespace sse2
58
60
59
61
#if defined(__AVX2__)
60
62
template <>
61
- uint32_t CompareAndMask<__m256i, uint32_t >(const __m256i *block_ptr) {
63
+ LIBC_INLINE uint32_t
64
+ compare_and_mask<__m256i, uint32_t >(const __m256i *block_ptr) {
62
65
__m256i v = _mm256_load_si256 (block_ptr);
63
66
__m256i z = _mm256_setzero_si256 ();
64
67
__m256i c = _mm256_cmpeq_epi8 (z, v);
@@ -68,22 +71,23 @@ uint32_t CompareAndMask<__m256i, uint32_t>(const __m256i *block_ptr) {
68
71
namespace avx2 {
69
72
[[maybe_unused]] LIBC_INLINE size_t string_length (const char *src) {
70
73
return string_length_vector<__m256i, uint32_t ,
71
- CompareAndMask <__m256i, uint32_t >>(src);
74
+ compare_and_mask <__m256i, uint32_t >>(src);
72
75
}
73
76
} // namespace avx2
74
77
#endif
75
78
76
79
#if defined(__AVX512F__)
77
80
template <>
78
- __mmask64 CompareAndMask<__m512i, __mmask64>(const __m512i *block_ptr) {
81
+ LIBC_INLINE __mmask64
82
+ compare_and_mask<__m512i, __mmask64>(const __m512i *block_ptr) {
79
83
__m512i v = _mm512_load_si512 (block_ptr);
80
84
__m512i z = _mm512_setzero_si512 ();
81
85
return _mm512_cmp_epu8_mask (z, v, _MM_CMPINT_EQ);
82
86
}
83
87
namespace avx512 {
84
88
[[maybe_unused]] LIBC_INLINE size_t string_length (const char *src) {
85
89
return string_length_vector<__m512i, __mmask64,
86
- CompareAndMask <__m512i, __mmask64>>(src);
90
+ compare_and_mask <__m512i, __mmask64>>(src);
87
91
}
88
92
} // namespace avx512
89
93
#endif
0 commit comments