@@ -18,22 +18,24 @@ namespace LIBC_NAMESPACE_DECL {
18
18
namespace string_length_internal {
19
19
// Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero.
20
20
template <typename Vector, typename Mask>
21
- Mask CompareAndMask (const Vector *block_ptr);
21
+ LIBC_INLINE static Mask
22
+ compare_and_mask (const Vector *block_ptr);
22
23
23
24
template <typename Vector, typename Mask,
24
- decltype (CompareAndMask<Vector, Mask>)>
25
- size_t string_length_vector (const char *src) {
25
+ decltype (compare_and_mask<Vector, Mask>)>
26
+ [[gnu::no_sanitize_address]] LIBC_INLINE static size_t
27
+ string_length_vector (const char *src) {
26
28
uintptr_t misalign_bytes = reinterpret_cast <uintptr_t >(src) % sizeof (Vector);
27
29
28
30
const Vector *block_ptr =
29
31
reinterpret_cast <const Vector *>(src - misalign_bytes);
30
- auto cmp = CompareAndMask <Vector, Mask>(block_ptr) >> misalign_bytes;
32
+ auto cmp = compare_and_mask <Vector, Mask>(block_ptr) >> misalign_bytes;
31
33
if (cmp)
32
34
return cpp::countr_zero (cmp);
33
35
34
36
while (true ) {
35
37
block_ptr++;
36
- cmp = CompareAndMask <Vector, Mask>(block_ptr);
38
+ cmp = compare_and_mask <Vector, Mask>(block_ptr);
37
39
if (cmp)
38
40
return static_cast <size_t >(reinterpret_cast <uintptr_t >(block_ptr) -
39
41
reinterpret_cast <uintptr_t >(src) +
@@ -42,7 +44,8 @@ size_t string_length_vector(const char *src) {
42
44
}
43
45
44
46
template <>
45
- uint32_t CompareAndMask<__m128i, uint32_t >(const __m128i *block_ptr) {
47
+ LIBC_INLINE uint32_t
48
+ compare_and_mask<__m128i, uint32_t >(const __m128i *block_ptr) {
46
49
__m128i v = _mm_load_si128 (block_ptr);
47
50
__m128i z = _mm_setzero_si128 ();
48
51
__m128i c = _mm_cmpeq_epi8 (z, v);
@@ -52,13 +55,14 @@ uint32_t CompareAndMask<__m128i, uint32_t>(const __m128i *block_ptr) {
52
55
namespace sse2 {
53
56
[[maybe_unused]] LIBC_INLINE size_t string_length (const char *src) {
54
57
return string_length_vector<__m128i, uint32_t ,
55
- CompareAndMask <__m128i, uint32_t >>(src);
58
+ compare_and_mask <__m128i, uint32_t >>(src);
56
59
}
57
60
} // namespace sse2
58
61
59
62
#if defined(__AVX2__)
60
63
template <>
61
- uint32_t CompareAndMask<__m256i, uint32_t >(const __m256i *block_ptr) {
64
+ LIBC_INLINE uint32_t
65
+ compare_and_mask<__m256i, uint32_t >(const __m256i *block_ptr) {
62
66
__m256i v = _mm256_load_si256 (block_ptr);
63
67
__m256i z = _mm256_setzero_si256 ();
64
68
__m256i c = _mm256_cmpeq_epi8 (z, v);
@@ -68,22 +72,23 @@ uint32_t CompareAndMask<__m256i, uint32_t>(const __m256i *block_ptr) {
68
72
namespace avx2 {
69
73
[[maybe_unused]] LIBC_INLINE size_t string_length (const char *src) {
70
74
return string_length_vector<__m256i, uint32_t ,
71
- CompareAndMask <__m256i, uint32_t >>(src);
75
+ compare_and_mask <__m256i, uint32_t >>(src);
72
76
}
73
77
} // namespace avx2
74
78
#endif
75
79
76
80
#if defined(__AVX512F__)
77
81
template <>
78
- __mmask64 CompareAndMask<__m512i, __mmask64>(const __m512i *block_ptr) {
82
+ LIBC_INLINE __mmask64
83
+ compare_and_mask<__m512i, __mmask64>(const __m512i *block_ptr) {
79
84
__m512i v = _mm512_load_si512 (block_ptr);
80
85
__m512i z = _mm512_setzero_si512 ();
81
86
return _mm512_cmp_epu8_mask (z, v, _MM_CMPINT_EQ);
82
87
}
83
88
namespace avx512 {
84
89
[[maybe_unused]] LIBC_INLINE size_t string_length (const char *src) {
85
90
return string_length_vector<__m512i, __mmask64,
86
- CompareAndMask <__m512i, __mmask64>>(src);
91
+ compare_and_mask <__m512i, __mmask64>>(src);
87
92
}
88
93
} // namespace avx512
89
94
#endif
0 commit comments