Skip to content

Commit a944dd2

Browse files
lemireDaniel Lemire
andauthored
fix node issue 49960 (#519)
* fix node issue 49960 * add: slow path for short strings. * fixing alignment/sse * format --------- Co-authored-by: Daniel Lemire <[email protected]>
1 parent 60b5c51 commit a944dd2

File tree

2 files changed

+27
-7
lines changed

2 files changed

+27
-7
lines changed

benchmarks/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ endif(ADA_BOOST_URL)
226226
endif(Boost_FOUND)
227227

228228
# Zuri
229-
find_package(ZURI)
229+
find_package(ZURI QUIET)
230230
if(ZURI_FOUND)
231231
message(STATUS "Zuri found")
232232
target_link_libraries(bench PRIVATE zuri)

src/unicode.cpp

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,17 @@ constexpr bool to_lower_ascii(char* input, size_t length) noexcept {
4848
#if ADA_NEON
4949
ada_really_inline bool has_tabs_or_newline(
5050
std::string_view user_input) noexcept {
51+
// first check for short strings in which case we do it naively.
52+
if (user_input.size() < 16) { // slow path
53+
for (size_t i = 0; i < user_input.size(); i++) {
54+
if (user_input[i] == '\r' || user_input[i] == '\n' ||
55+
user_input[i] == '\t') {
56+
return true;
57+
}
58+
}
59+
return false;
60+
}
61+
// fast path for long strings (expected to be common)
5162
size_t i = 0;
5263
const uint8x16_t mask1 = vmovq_n_u8('\r');
5364
const uint8x16_t mask2 = vmovq_n_u8('\n');
@@ -60,9 +71,8 @@ ada_really_inline bool has_tabs_or_newline(
6071
vceqq_u8(word, mask3));
6172
}
6273
if (i < user_input.size()) {
63-
uint8_t buffer[16]{};
64-
memcpy(buffer, user_input.data() + i, user_input.size() - i);
65-
uint8x16_t word = vld1q_u8((const uint8_t*)user_input.data() + i);
74+
uint8x16_t word =
75+
vld1q_u8((const uint8_t*)user_input.data() + user_input.length() - 16);
6676
running = vorrq_u8(vorrq_u8(running, vorrq_u8(vceqq_u8(word, mask1),
6777
vceqq_u8(word, mask2))),
6878
vceqq_u8(word, mask3));
@@ -72,6 +82,17 @@ ada_really_inline bool has_tabs_or_newline(
7282
#elif ADA_SSE2
7383
ada_really_inline bool has_tabs_or_newline(
7484
std::string_view user_input) noexcept {
85+
// first check for short strings in which case we do it naively.
86+
if (user_input.size() < 16) { // slow path
87+
for (size_t i = 0; i < user_input.size(); i++) {
88+
if (user_input[i] == '\r' || user_input[i] == '\n' ||
89+
user_input[i] == '\t') {
90+
return true;
91+
}
92+
}
93+
return false;
94+
}
95+
// fast path for long strings (expected to be common)
7596
size_t i = 0;
7697
const __m128i mask1 = _mm_set1_epi8('\r');
7798
const __m128i mask2 = _mm_set1_epi8('\n');
@@ -85,9 +106,8 @@ ada_really_inline bool has_tabs_or_newline(
85106
_mm_cmpeq_epi8(word, mask3));
86107
}
87108
if (i < user_input.size()) {
88-
alignas(16) uint8_t buffer[16]{};
89-
memcpy(buffer, user_input.data() + i, user_input.size() - i);
90-
__m128i word = _mm_load_si128((const __m128i*)buffer);
109+
__m128i word = _mm_loadu_si128(
110+
(const __m128i*)(user_input.data() + user_input.length() - 16));
91111
running = _mm_or_si128(
92112
_mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1),
93113
_mm_cmpeq_epi8(word, mask2))),

0 commit comments

Comments
 (0)