@@ -8,6 +8,11 @@ ADA_PUSH_DISABLE_ALL_WARNINGS
8
8
ADA_POP_DISABLE_WARNINGS
9
9
10
10
#include < algorithm>
11
+ #if ADA_NEON
12
+ #include < arm_neon.h>
13
+ #elif ADA_SSE2
14
+ #include < emmintrin.h>
15
+ #endif
11
16
12
17
namespace ada ::unicode {
13
18
@@ -39,8 +44,58 @@ constexpr bool to_lower_ascii(char* input, size_t length) noexcept {
39
44
}
40
45
return non_ascii == 0 ;
41
46
}
42
-
43
- ada_really_inline constexpr bool has_tabs_or_newline (
47
+ #if ADA_NEON
48
+ ada_really_inline bool has_tabs_or_newline (
49
+ std::string_view user_input) noexcept {
50
+ size_t i = 0 ;
51
+ const uint8x16_t mask1 = vmovq_n_u8 (' \r ' );
52
+ const uint8x16_t mask2 = vmovq_n_u8 (' \n ' );
53
+ const uint8x16_t mask3 = vmovq_n_u8 (' \t ' );
54
+ uint8x16_t running{0 };
55
+ for (; i + 15 < user_input.size (); i += 16 ) {
56
+ uint8x16_t word = vld1q_u8 ((const uint8_t *)user_input.data () + i);
57
+ running = vorrq_u8 (vorrq_u8 (running, vorrq_u8 (vceqq_u8 (word, mask1),
58
+ vceqq_u8 (word, mask2))),
59
+ vceqq_u8 (word, mask3));
60
+ }
61
+ if (i < user_input.size ()) {
62
+ uint8_t buffer[16 ];
63
+ memcpy (buffer, user_input.data () + i, user_input.size () - i);
64
+ uint8x16_t word = vld1q_u8 ((const uint8_t *)user_input.data () + i);
65
+ running = vorrq_u8 (vorrq_u8 (running, vorrq_u8 (vceqq_u8 (word, mask1),
66
+ vceqq_u8 (word, mask2))),
67
+ vceqq_u8 (word, mask3));
68
+ }
69
+ return vmaxvq_u8 (running) != 0 ;
70
+ }
71
+ #elif ADA_SSE2
72
+ ada_really_inline bool has_tabs_or_newline (
73
+ std::string_view user_input) noexcept {
74
+ size_t i = 0 ;
75
+ const __m128i mask1 = _mm_set1_epi8 (' \r ' );
76
+ const __m128i mask2 = _mm_set1_epi8 (' \n ' );
77
+ const __m128i mask3 = _mm_set1_epi8 (' \t ' );
78
+ __m128i running{0 };
79
+ for (; i + 15 < user_input.size (); i += 16 ) {
80
+ __m128i word = _mm_loadu_si128 ((const __m128i*)(user_input.data () + i));
81
+ running = _mm_or_si128 (
82
+ _mm_or_si128 (running, _mm_or_si128 (_mm_cmpeq_epi8 (word, mask1),
83
+ _mm_cmpeq_epi8 (word, mask2))),
84
+ _mm_cmpeq_epi8 (word, mask3));
85
+ }
86
+ if (i < user_input.size ()) {
87
+ uint8_t buffer[16 ];
88
+ memcpy (buffer, user_input.data () + i, user_input.size () - i);
89
+ __m128i word = _mm_loadu_si128 ((const __m128i*)buffer);
90
+ running = _mm_or_si128 (
91
+ _mm_or_si128 (running, _mm_or_si128 (_mm_cmpeq_epi8 (word, mask1),
92
+ _mm_cmpeq_epi8 (word, mask2))),
93
+ _mm_cmpeq_epi8 (word, mask3));
94
+ }
95
+ return _mm_movemask_epi8 (running) != 0 ;
96
+ }
97
+ #else
98
+ ada_really_inline bool has_tabs_or_newline (
44
99
std::string_view user_input) noexcept {
45
100
auto has_zero_byte = [](uint64_t v) {
46
101
return ((v - 0x0101010101010101 ) & ~(v)&0x8080808080808080 );
@@ -71,6 +126,7 @@ ada_really_inline constexpr bool has_tabs_or_newline(
71
126
}
72
127
return running;
73
128
}
129
+ #endif
74
130
75
131
// A forbidden host code point is U+0000 NULL, U+0009 TAB, U+000A LF, U+000D CR,
76
132
// U+0020 SPACE, U+0023 (#), U+002F (/), U+003A (:), U+003C (<), U+003E (>),
0 commit comments