2424#include " src/__support/macros/optimization.h" // LIBC_UNLIKELY
2525#include " src/string/memory_utils/inline_memcpy.h"
2626
27- #if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
2827#if LIBC_HAS_VECTOR_TYPE
2928#include " src/string/memory_utils/generic/inline_strlen.h"
30- #elif defined(LIBC_TARGET_ARCH_IS_X86)
29+ #endif
30+ #if defined(LIBC_TARGET_ARCH_IS_X86)
3131#include " src/string/memory_utils/x86_64/inline_strlen.h"
32- #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_NEON)
32+ #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
3333#include " src/string/memory_utils/aarch64/inline_strlen.h"
34- #else
35- namespace string_length_impl = LIBC_NAMESPACE::wide_read;
3634#endif
37- #endif // defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
3835
3936namespace LIBC_NAMESPACE_DECL {
4037namespace internal {
4138
39+ #if !LIBC_HAS_VECTOR_TYPE
40+ // Foreword any generic vector impls to architecture specific ones
41+ namespace arch {}
42+ namespace generic = arch;
43+ #endif
44+
45+ namespace element {
46+ // Element-by-element (usually a byte, but wider for wchar) implementations of
47+ // functions that search for data. Slow, but easy to understand and analyze.
48+
49+ // Returns the length of a string, denoted by the first occurrence
50+ // of a null terminator.
51+ LIBC_INLINE size_t string_length (const char *src) {
52+ size_t length;
53+ for (length = 0 ; *src; ++src, ++length)
54+ ;
55+ return length;
56+ }
57+
58+ template <typename T> LIBC_INLINE size_t string_length_element (const T *src) {
59+ size_t length;
60+ for (length = 0 ; *src; ++src, ++length)
61+ ;
62+ return length;
63+ }
64+
65+ LIBC_INLINE void *find_first_character (const unsigned char *src,
66+ unsigned char ch, size_t n) {
67+ for (; n && *src != ch; --n, ++src)
68+ ;
69+ return n ? const_cast <unsigned char *>(src) : nullptr ;
70+ }
71+ } // namespace element
72+
73+ namespace wide {
74+ // Generic, non-vector, implementations of functions that search for data
75+ // by reading from memory block-by-block.
76+
4277template <typename Word> LIBC_INLINE constexpr Word repeat_byte (Word byte) {
4378 static_assert (CHAR_BIT == 8 , " repeat_byte assumes a byte is 8 bits." );
4479 constexpr size_t BITS_IN_BYTE = CHAR_BIT;
@@ -74,8 +109,13 @@ template <typename Word> LIBC_INLINE constexpr bool has_zeroes(Word block) {
74109 return (subtracted & inverted & HIGH_BITS) != 0 ;
75110}
76111
77- template <typename Word>
78- LIBC_INLINE size_t string_length_wide_read (const char *src) {
112+ // Unsigned int is the default size for most processors, and on x86-64 it
113+ // performs better than larger sizes when the src pointer can't be assumed to
114+ // be aligned to a word boundary, so it's the size we use for reading the
115+ // string a block at a time.
116+
117+ LIBC_INLINE size_t string_length (const char *src) {
118+ using Word = unsigned int ;
79119 const char *char_ptr = src;
80120 // Step 1: read 1 byte at a time to align to block size
81121 for (; reinterpret_cast <uintptr_t >(char_ptr) % sizeof (Word) != 0 ;
@@ -95,37 +135,23 @@ LIBC_INLINE size_t string_length_wide_read(const char *src) {
95135 return static_cast <size_t >(char_ptr - src);
96136}
97137
98- namespace wide_read {
99- LIBC_INLINE size_t string_length (const char *src) {
100- // Unsigned int is the default size for most processors, and on x86-64 it
101- // performs better than larger sizes when the src pointer can't be assumed to
102- // be aligned to a word boundary, so it's the size we use for reading the
103- // string a block at a time.
104- return string_length_wide_read<unsigned int >(src);
105- }
106-
107- } // namespace wide_read
108-
109- // Returns the length of a string, denoted by the first occurrence
110- // of a null terminator.
111- template <typename T> LIBC_INLINE size_t string_length (const T *src) {
112- #ifdef LIBC_COPT_STRING_UNSAFE_WIDE_READ
113- if constexpr (cpp::is_same_v<T, char >)
114- return string_length_impl::string_length (src);
115- #endif
116- size_t length;
117- for (length = 0 ; *src; ++src, ++length)
118- ;
119- return length;
120- }
121-
122- template <typename Word>
123138LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE void *
124- find_first_character_wide_read (const unsigned char *src, unsigned char ch,
125- size_t n) {
139+ find_first_character (const unsigned char *src, unsigned char ch,
140+ size_t max_strlen = cpp::numeric_limits<size_t >::max()) {
141+ using Word = unsigned int ;
126142 const unsigned char *char_ptr = src;
127143 size_t cur = 0 ;
128144
145+ // If the maximum size of the string is small, the overhead of aligning to a
146+ // word boundary and generating a bitmask of the appropriate size may be
147+ // greater than the gains from reading larger chunks. Based on some testing,
148+ // the crossover point between when it's faster to just read bytewise and read
149+ // blocks is somewhere between 16 and 32, so 4 times the size of the block
150+ // should be in that range.
151+ if (max_strlen < (sizeof (Word) * 4 )) {
152+ return element::find_first_character (src, ch, max_strlen);
153+ }
154+ size_t n = max_strlen;
129155 // Step 1: read 1 byte at a time to align to block size
130156 for (; reinterpret_cast <uintptr_t >(char_ptr) % sizeof (Word) != 0 && cur < n;
131157 ++char_ptr, ++cur) {
@@ -153,31 +179,35 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch,
153179 return const_cast <unsigned char *>(char_ptr);
154180}
155181
156- LIBC_INLINE void *find_first_character_byte_read (const unsigned char *src,
157- unsigned char ch, size_t n) {
158- for (; n && *src != ch; --n, ++src)
159- ;
160- return n ? const_cast <unsigned char *>(src) : nullptr ;
182+ } // namespace wide
183+
184+ // Dispatch mechanism for implementations of performance-sensitive
185+ // functions. Always measure, but generally from lower- to higher-performance
186+ // order:
187+ //
188+ // 1. element - read char-by-char or wchar-by-wchar
189+ // 3. wide - read word-by-word
190+ // 3. generic - read using clang's internal vector types
191+ // 4. arch - hand-coded per architecture. Possibly in asm, or with intrinsics.
192+ //
193+ // The called implemenation is chosen at build-time by setting
194+ // LIBC_CONF_{FUNC}_IMPL in config.json
195+ static constexpr auto &string_length_impl =
196+ LIBC_COPT_STRING_LENGTH_IMPL::string_length;
197+ static constexpr auto &find_first_character_impl =
198+ LIBC_COPT_FIND_FIRST_CHARACTER_IMPL::find_first_character;
199+
200+ template <typename T> LIBC_INLINE size_t string_length (const T *src) {
201+ if constexpr (cpp::is_same_v<T, char >)
202+ return string_length_impl (src);
203+ return element::string_length_element<T>(src);
161204}
162205
163206// Returns the first occurrence of 'ch' within the first 'n' characters of
164207// 'src'. If 'ch' is not found, returns nullptr.
165208LIBC_INLINE void *find_first_character (const unsigned char *src,
166209 unsigned char ch, size_t max_strlen) {
167- #ifdef LIBC_COPT_STRING_UNSAFE_WIDE_READ
168- // If the maximum size of the string is small, the overhead of aligning to a
169- // word boundary and generating a bitmask of the appropriate size may be
170- // greater than the gains from reading larger chunks. Based on some testing,
171- // the crossover point between when it's faster to just read bytewise and read
172- // blocks is somewhere between 16 and 32, so 4 times the size of the block
173- // should be in that range.
174- // Unsigned int is used for the same reason as in strlen.
175- using BlockType = unsigned int ;
176- if (max_strlen > (sizeof (BlockType) * 4 )) {
177- return find_first_character_wide_read<BlockType>(src, ch, max_strlen);
178- }
179- #endif
180- return find_first_character_byte_read (src, ch, max_strlen);
210+ return find_first_character_impl (src, ch, max_strlen);
181211}
182212
183213// Returns the maximum length span that contains only characters not found in
0 commit comments