@@ -48,6 +48,17 @@ constexpr bool to_lower_ascii(char* input, size_t length) noexcept {
48
48
#if ADA_NEON
49
49
ada_really_inline bool has_tabs_or_newline (
50
50
std::string_view user_input) noexcept {
51
+ // first check for short strings in which case we do it naively.
52
+ if (user_input.size () < 16 ) { // slow path
53
+ for (size_t i = 0 ; i < user_input.size (); i++) {
54
+ if (user_input[i] == ' \r ' || user_input[i] == ' \n ' ||
55
+ user_input[i] == ' \t ' ) {
56
+ return true ;
57
+ }
58
+ }
59
+ return false ;
60
+ }
61
+ // fast path for long strings (expected to be common)
51
62
size_t i = 0 ;
52
63
const uint8x16_t mask1 = vmovq_n_u8 (' \r ' );
53
64
const uint8x16_t mask2 = vmovq_n_u8 (' \n ' );
@@ -60,9 +71,8 @@ ada_really_inline bool has_tabs_or_newline(
60
71
vceqq_u8 (word, mask3));
61
72
}
62
73
if (i < user_input.size ()) {
63
- uint8_t buffer[16 ]{};
64
- memcpy (buffer, user_input.data () + i, user_input.size () - i);
65
- uint8x16_t word = vld1q_u8 ((const uint8_t *)user_input.data () + i);
74
+ uint8x16_t word =
75
+ vld1q_u8 ((const uint8_t *)user_input.data () + user_input.length () - 16 );
66
76
running = vorrq_u8 (vorrq_u8 (running, vorrq_u8 (vceqq_u8 (word, mask1),
67
77
vceqq_u8 (word, mask2))),
68
78
vceqq_u8 (word, mask3));
@@ -72,6 +82,17 @@ ada_really_inline bool has_tabs_or_newline(
72
82
#elif ADA_SSE2
73
83
ada_really_inline bool has_tabs_or_newline (
74
84
std::string_view user_input) noexcept {
85
+ // first check for short strings in which case we do it naively.
86
+ if (user_input.size () < 16 ) { // slow path
87
+ for (size_t i = 0 ; i < user_input.size (); i++) {
88
+ if (user_input[i] == ' \r ' || user_input[i] == ' \n ' ||
89
+ user_input[i] == ' \t ' ) {
90
+ return true ;
91
+ }
92
+ }
93
+ return false ;
94
+ }
95
+ // fast path for long strings (expected to be common)
75
96
size_t i = 0 ;
76
97
const __m128i mask1 = _mm_set1_epi8 (' \r ' );
77
98
const __m128i mask2 = _mm_set1_epi8 (' \n ' );
@@ -85,9 +106,8 @@ ada_really_inline bool has_tabs_or_newline(
85
106
_mm_cmpeq_epi8 (word, mask3));
86
107
}
87
108
if (i < user_input.size ()) {
88
- alignas (16 ) uint8_t buffer[16 ]{};
89
- memcpy (buffer, user_input.data () + i, user_input.size () - i);
90
- __m128i word = _mm_load_si128 ((const __m128i*)buffer);
109
+ __m128i word = _mm_loadu_si128 (
110
+ (const __m128i*)(user_input.data () + user_input.length () - 16 ));
91
111
running = _mm_or_si128 (
92
112
_mm_or_si128 (running, _mm_or_si128 (_mm_cmpeq_epi8 (word, mask1),
93
113
_mm_cmpeq_epi8 (word, mask2))),
0 commit comments