5
5
#include <math.h>
6
6
#include <ctype.h>
7
7
8
- #include "simd.h"
8
+ #include "../simd/ simd.h"
9
9
10
10
/* ruby api and some helpers */
11
11
@@ -304,28 +304,6 @@ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
304
304
return 1 ;
305
305
}
306
306
307
- // See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
308
- static inline FORCE_INLINE uint64_t neon_match_mask (uint8x16_t matches )
309
- {
310
- const uint8x8_t res = vshrn_n_u16 (vreinterpretq_u16_u8 (matches ), 4 );
311
- const uint64_t mask = vget_lane_u64 (vreinterpret_u64_u8 (res ), 0 );
312
- return mask & 0x8888888888888888ull ;
313
- }
314
-
315
- static inline FORCE_INLINE uint64_t neon_rules_update (const char * ptr )
316
- {
317
- uint8x16_t chunk = vld1q_u8 ((const unsigned char * )ptr );
318
-
319
- // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
320
- // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
321
- const uint8x16_t too_low_or_dbl_quote = vcltq_u8 (veorq_u8 (chunk , vdupq_n_u8 (2 )), vdupq_n_u8 (33 ));
322
-
323
- uint8x16_t has_backslash = vceqq_u8 (chunk , vdupq_n_u8 ('\\' ));
324
- uint8x16_t needs_escape = vorrq_u8 (too_low_or_dbl_quote , has_backslash );
325
-
326
- return neon_match_mask (needs_escape );
327
- }
328
-
329
307
static inline unsigned char search_escape_basic_neon (search_state * search )
330
308
{
331
309
if (RB_UNLIKELY (search -> has_matches )) {
@@ -380,14 +358,8 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
380
358
* no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
381
359
* have at least one byte that needs to be escaped.
382
360
*/
383
- while (search -> ptr + sizeof (uint8x16_t ) <= search -> end ) {
384
- uint64_t mask = neon_rules_update (search -> ptr );
385
361
386
- if (!mask ) {
387
- search -> ptr += sizeof (uint8x16_t );
388
- continue ;
389
- }
390
- search -> matches_mask = mask ;
362
+ if (string_scan_simd_neon (& search -> ptr , search -> end , & search -> matches_mask )) {
391
363
search -> has_matches = true;
392
364
search -> chunk_base = search -> ptr ;
393
365
search -> chunk_end = search -> ptr + sizeof (uint8x16_t );
@@ -399,7 +371,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
399
371
if (remaining >= SIMD_MINIMUM_THRESHOLD ) {
400
372
char * s = copy_remaining_bytes (search , sizeof (uint8x16_t ), remaining );
401
373
402
- uint64_t mask = neon_rules_update (s );
374
+ uint64_t mask = compute_chunk_mask_neon (s );
403
375
404
376
if (!mask ) {
405
377
// Nothing to escape, ensure search_flush doesn't do anything by setting
@@ -428,11 +400,6 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
428
400
429
401
#ifdef HAVE_SIMD_SSE2
430
402
431
- #define _mm_cmpge_epu8 (a , b ) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
432
- #define _mm_cmple_epu8 (a , b ) _mm_cmpge_epu8(b, a)
433
- #define _mm_cmpgt_epu8 (a , b ) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
434
- #define _mm_cmplt_epu8 (a , b ) _mm_cmpgt_epu8(b, a)
435
-
436
403
static inline FORCE_INLINE unsigned char sse2_next_match (search_state * search )
437
404
{
438
405
int mask = search -> matches_mask ;
@@ -457,18 +424,6 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
457
424
#define TARGET_SSE2
458
425
#endif
459
426
460
- static inline TARGET_SSE2 FORCE_INLINE int sse2_update (const char * ptr )
461
- {
462
- __m128i chunk = _mm_loadu_si128 ((__m128i const * )ptr );
463
-
464
- // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
465
- // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
466
- __m128i too_low_or_dbl_quote = _mm_cmplt_epu8 (_mm_xor_si128 (chunk , _mm_set1_epi8 (2 )), _mm_set1_epi8 (33 ));
467
- __m128i has_backslash = _mm_cmpeq_epi8 (chunk , _mm_set1_epi8 ('\\' ));
468
- __m128i needs_escape = _mm_or_si128 (too_low_or_dbl_quote , has_backslash );
469
- return _mm_movemask_epi8 (needs_escape );
470
- }
471
-
472
427
static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2 (search_state * search )
473
428
{
474
429
if (RB_UNLIKELY (search -> has_matches )) {
@@ -487,17 +442,10 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
487
442
}
488
443
}
489
444
490
- while (search -> ptr + sizeof (__m128i ) <= search -> end ) {
491
- int needs_escape_mask = sse2_update (search -> ptr );
492
-
493
- if (needs_escape_mask == 0 ) {
494
- search -> ptr += sizeof (__m128i );
495
- continue ;
496
- }
497
-
445
+ if (string_scan_simd_sse2 (& search -> ptr , search -> end , & search -> matches_mask )) {
498
446
search -> has_matches = true;
499
- search -> matches_mask = needs_escape_mask ;
500
447
search -> chunk_base = search -> ptr ;
448
+ search -> chunk_end = search -> ptr + sizeof (__m128i );
501
449
return sse2_next_match (search );
502
450
}
503
451
@@ -506,7 +454,7 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
506
454
if (remaining >= SIMD_MINIMUM_THRESHOLD ) {
507
455
char * s = copy_remaining_bytes (search , sizeof (__m128i ), remaining );
508
456
509
- int needs_escape_mask = sse2_update (s );
457
+ int needs_escape_mask = compute_chunk_mask_sse2 (s );
510
458
511
459
if (needs_escape_mask == 0 ) {
512
460
// Nothing to escape, ensure search_flush doesn't do anything by setting
0 commit comments