55#include <math.h>
66#include <ctype.h>
77
8- #include "simd.h"
8+ #include "../simd/ simd.h"
99
1010/* ruby api and some helpers */
1111
@@ -304,28 +304,6 @@ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
304304 return 1 ;
305305}
306306
307- // See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
308- static inline FORCE_INLINE uint64_t neon_match_mask (uint8x16_t matches )
309- {
310- const uint8x8_t res = vshrn_n_u16 (vreinterpretq_u16_u8 (matches ), 4 );
311- const uint64_t mask = vget_lane_u64 (vreinterpret_u64_u8 (res ), 0 );
312- return mask & 0x8888888888888888ull ;
313- }
314-
315- static inline FORCE_INLINE uint64_t neon_rules_update (const char * ptr )
316- {
317- uint8x16_t chunk = vld1q_u8 ((const unsigned char * )ptr );
318-
319- // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
320- // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
321- const uint8x16_t too_low_or_dbl_quote = vcltq_u8 (veorq_u8 (chunk , vdupq_n_u8 (2 )), vdupq_n_u8 (33 ));
322-
323- uint8x16_t has_backslash = vceqq_u8 (chunk , vdupq_n_u8 ('\\' ));
324- uint8x16_t needs_escape = vorrq_u8 (too_low_or_dbl_quote , has_backslash );
325-
326- return neon_match_mask (needs_escape );
327- }
328-
329307static inline unsigned char search_escape_basic_neon (search_state * search )
330308{
331309 if (RB_UNLIKELY (search -> has_matches )) {
@@ -380,14 +358,8 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
380358 * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
381359 * have at least one byte that needs to be escaped.
382360 */
383- while (search -> ptr + sizeof (uint8x16_t ) <= search -> end ) {
384- uint64_t mask = neon_rules_update (search -> ptr );
385361
386- if (!mask ) {
387- search -> ptr += sizeof (uint8x16_t );
388- continue ;
389- }
390- search -> matches_mask = mask ;
362+ if (string_scan_simd_neon (& search -> ptr , search -> end , & search -> matches_mask )) {
391363 search -> has_matches = true;
392364 search -> chunk_base = search -> ptr ;
393365 search -> chunk_end = search -> ptr + sizeof (uint8x16_t );
@@ -399,7 +371,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
399371 if (remaining >= SIMD_MINIMUM_THRESHOLD ) {
400372 char * s = copy_remaining_bytes (search , sizeof (uint8x16_t ), remaining );
401373
402- uint64_t mask = neon_rules_update (s );
374+ uint64_t mask = compute_chunk_mask_neon (s );
403375
404376 if (!mask ) {
405377 // Nothing to escape, ensure search_flush doesn't do anything by setting
@@ -428,11 +400,6 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
428400
429401#ifdef HAVE_SIMD_SSE2
430402
431- #define _mm_cmpge_epu8 (a , b ) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
432- #define _mm_cmple_epu8 (a , b ) _mm_cmpge_epu8(b, a)
433- #define _mm_cmpgt_epu8 (a , b ) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
434- #define _mm_cmplt_epu8 (a , b ) _mm_cmpgt_epu8(b, a)
435-
436403static inline FORCE_INLINE unsigned char sse2_next_match (search_state * search )
437404{
438405 int mask = search -> matches_mask ;
@@ -457,18 +424,6 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
457424#define TARGET_SSE2
458425#endif
459426
460- static inline TARGET_SSE2 FORCE_INLINE int sse2_update (const char * ptr )
461- {
462- __m128i chunk = _mm_loadu_si128 ((__m128i const * )ptr );
463-
464- // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
465- // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
466- __m128i too_low_or_dbl_quote = _mm_cmplt_epu8 (_mm_xor_si128 (chunk , _mm_set1_epi8 (2 )), _mm_set1_epi8 (33 ));
467- __m128i has_backslash = _mm_cmpeq_epi8 (chunk , _mm_set1_epi8 ('\\' ));
468- __m128i needs_escape = _mm_or_si128 (too_low_or_dbl_quote , has_backslash );
469- return _mm_movemask_epi8 (needs_escape );
470- }
471-
472427static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2 (search_state * search )
473428{
474429 if (RB_UNLIKELY (search -> has_matches )) {
@@ -487,17 +442,10 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
487442 }
488443 }
489444
490- while (search -> ptr + sizeof (__m128i ) <= search -> end ) {
491- int needs_escape_mask = sse2_update (search -> ptr );
492-
493- if (needs_escape_mask == 0 ) {
494- search -> ptr += sizeof (__m128i );
495- continue ;
496- }
497-
445+ if (string_scan_simd_sse2 (& search -> ptr , search -> end , & search -> matches_mask )) {
498446 search -> has_matches = true;
499- search -> matches_mask = needs_escape_mask ;
500447 search -> chunk_base = search -> ptr ;
448+ search -> chunk_end = search -> ptr + sizeof (__m128i );
501449 return sse2_next_match (search );
502450 }
503451
@@ -506,7 +454,7 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
506454 if (remaining >= SIMD_MINIMUM_THRESHOLD ) {
507455 char * s = copy_remaining_bytes (search , sizeof (__m128i ), remaining );
508456
509- int needs_escape_mask = sse2_update (s );
457+ int needs_escape_mask = compute_chunk_mask_sse2 (s );
510458
511459 if (needs_escape_mask == 0 ) {
512460 // Nothing to escape, ensure search_flush doesn't do anything by setting
0 commit comments