@@ -18,22 +18,22 @@ namespace LIBC_NAMESPACE_DECL {
18
18
namespace string_length_internal {
19
19
// Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero.
20
20
template <typename Vector, typename Mask>
21
- Mask CompareAndMask (const Vector *block_ptr);
21
+ LIBC_INLINE static Mask compare_and_mask (const Vector *block_ptr);
22
22
23
23
template <typename Vector, typename Mask,
24
- decltype (CompareAndMask <Vector, Mask>)>
24
+ decltype (compare_and_mask <Vector, Mask>)>
25
25
size_t string_length_vector (const char *src) {
26
26
uintptr_t misalign_bytes = reinterpret_cast <uintptr_t >(src) % sizeof (Vector);
27
27
28
28
const Vector *block_ptr =
29
29
reinterpret_cast <const Vector *>(src - misalign_bytes);
30
- auto cmp = CompareAndMask <Vector, Mask>(block_ptr) >> misalign_bytes;
30
+ auto cmp = compare_and_mask <Vector, Mask>(block_ptr) >> misalign_bytes;
31
31
if (cmp)
32
32
return cpp::countr_zero (cmp);
33
33
34
34
while (true ) {
35
35
block_ptr++;
36
- cmp = CompareAndMask <Vector, Mask>(block_ptr);
36
+ cmp = compare_and_mask <Vector, Mask>(block_ptr);
37
37
if (cmp)
38
38
return static_cast <size_t >(reinterpret_cast <uintptr_t >(block_ptr) -
39
39
reinterpret_cast <uintptr_t >(src) +
@@ -42,7 +42,8 @@ size_t string_length_vector(const char *src) {
42
42
}
43
43
44
44
template <>
45
- uint32_t CompareAndMask<__m128i, uint32_t >(const __m128i *block_ptr) {
45
+ LIBC_INLINE uint32_t
46
+ compare_and_mask<__m128i, uint32_t >(const __m128i *block_ptr) {
46
47
__m128i v = _mm_load_si128 (block_ptr);
47
48
__m128i z = _mm_setzero_si128 ();
48
49
__m128i c = _mm_cmpeq_epi8 (z, v);
@@ -52,13 +53,14 @@ uint32_t CompareAndMask<__m128i, uint32_t>(const __m128i *block_ptr) {
52
53
namespace sse2 {
53
54
[[maybe_unused]] LIBC_INLINE size_t string_length (const char *src) {
54
55
return string_length_vector<__m128i, uint32_t ,
55
- CompareAndMask <__m128i, uint32_t >>(src);
56
+ compare_and_mask <__m128i, uint32_t >>(src);
56
57
}
57
58
} // namespace sse2
58
59
59
60
#if defined(__AVX2__)
60
61
template <>
61
- uint32_t CompareAndMask<__m256i, uint32_t >(const __m256i *block_ptr) {
62
+ LIBC_INLINE
63
+ uint32_t compare_and_mask<__m256i, uint32_t >(const __m256i *block_ptr) {
62
64
__m256i v = _mm256_load_si256 (block_ptr);
63
65
__m256i z = _mm256_setzero_si256 ();
64
66
__m256i c = _mm256_cmpeq_epi8 (z, v);
@@ -68,22 +70,23 @@ uint32_t CompareAndMask<__m256i, uint32_t>(const __m256i *block_ptr) {
68
70
namespace avx2 {
69
71
[[maybe_unused]] LIBC_INLINE size_t string_length (const char *src) {
70
72
return string_length_vector<__m256i, uint32_t ,
71
- CompareAndMask <__m256i, uint32_t >>(src);
73
+ compare_and_mask <__m256i, uint32_t >>(src);
72
74
}
73
75
} // namespace avx2
74
76
#endif
75
77
76
78
#if defined(__AVX512F__)
77
79
template <>
78
- __mmask64 CompareAndMask<__m512i, __mmask64>(const __m512i *block_ptr) {
80
+ LIBC_INLINE
81
+ __mmask64 compare_and_mask<__m512i, __mmask64>(const __m512i *block_ptr) {
79
82
__m512i v = _mm512_load_si512 (block_ptr);
80
83
__m512i z = _mm512_setzero_si512 ();
81
84
return _mm512_cmp_epu8_mask (z, v, _MM_CMPINT_EQ);
82
85
}
83
86
namespace avx512 {
84
87
[[maybe_unused]] LIBC_INLINE size_t string_length (const char *src) {
85
88
return string_length_vector<__m512i, __mmask64,
86
- CompareAndMask <__m512i, __mmask64>>(src);
89
+ compare_and_mask <__m512i, __mmask64>>(src);
87
90
}
88
91
} // namespace avx512
89
92
#endif
0 commit comments