Use new WORTH_PER_WORD_LOOP()

khwilliamson · khwilliamson · commit bd63974083be · 2025-11-01T20:58:10.000-06:00
This converts the places that could benefit from this new macro (and
 its kin) to use them.
diff --git a/inline.h b/inline.h
@@ -1573,25 +1573,13 @@ Perl_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
         len = strlen((const char *)s);
     }
 
-
-#ifndef EBCDIC
-
     /* Do the word-at-a-time iff there is at least one usable full word.  That
      * means that after advancing to a word boundary, there still is at least a
-     * full word left.  The number of bytes needed to advance is 'wordsize -
-     * offset' unless offset is 0. */
-    if ((STRLEN) (send - x) >= PERL_WORDSIZE
-
-                            /* This term is wordsize if subword; 0 if not */
-                          + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
-
-                            /* 'offset' */
-                          - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
-    {
+     * full word left. */
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP(x, send, 1);
 
-        /* Process per-byte until reach word boundary.  XXX This loop could be
-         * eliminated if we knew that this platform had fast unaligned reads */
-        while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
+    if (per_byte_end) {
+        while (x < per_byte_end ) {
             if (! UTF8_IS_INVARIANT(*x)) {
                 if (ep) {
                     *ep = x;
@@ -1633,8 +1621,6 @@ Perl_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
         } while (x + PERL_WORDSIZE <= send);
     }
 
-#endif      /* End of ! EBCDIC */
-
     /* Process per-byte.  (Can't use libc functions like strpbrk() because
      * input isn't necessarily a C string) */
     while (x < send) {
@@ -2116,18 +2102,11 @@ S_variant_under_utf8_count(const U8* const s, const U8* const e)
     const U8* x = s;
     Size_t count = 0;
 
-#  ifndef EBCDIC
-
     /* Test if the string is long enough to use word-at-a-time.  (Logic is the
      * same as for is_utf8_invariant_string()) */
-    if ((STRLEN) (e - x) >= PERL_WORDSIZE
-                          + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
-                          - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
-    {
-
-        /* Process per-byte until reach word boundary.  XXX This loop could be
-         * eliminated if we knew that this platform had fast unaligned reads */
-        while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP(x, e, 1);
+    if (per_byte_end) {
+        while (x < per_byte_end ) {
             count += ! UTF8_IS_INVARIANT(*x++);
         }
 
@@ -2143,8 +2122,6 @@ S_variant_under_utf8_count(const U8* const s, const U8* const e)
         } while (x + PERL_WORDSIZE <= e);
     }
 
-#  endif
-
     /* Process per-byte */
     while (x < e) {
         if (! UTF8_IS_INVARIANT(*x)) {
diff --git a/regexec.c b/regexec.c
@@ -632,14 +632,9 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
      * 'send-1' inclusive that isn't 'span_byte'; returns 'send' if none found.
      * */
 
-    if ((STRLEN) (send - s) >= PERL_WORDSIZE
-                          + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
-                          - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
-    {
-
-        /* Process per-byte until reach word boundary.  XXX This loop could be
-         * eliminated if we knew that this platform had fast unaligned reads */
-        while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP_BINMODE(s, send, 1);
+    if (per_byte_end) {
+        while (s < per_byte_end ) {
             if (*s != span_byte) {
                 return s;
             }
@@ -705,14 +700,9 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
      * returns 'send' if none found.  It uses word-level operations instead of
      * byte to speed up the process */
 
-#ifndef EBCDIC
-
-    if ((STRLEN) (send - s) >= PERL_WORDSIZE
-                          + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
-                          - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
-    {
-
-        while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP(s, send, 1);
+    if (per_byte_end) {
+        while (s < per_byte_end ) {
             if (((*s) & mask) == byte) {
                 return s;
             }
@@ -757,8 +747,6 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
         } while (s + PERL_WORDSIZE <= send);
     }
 
-#endif
-
     while (s < send) {
         if (((*s) & mask) == byte) {
             return s;
@@ -781,12 +769,9 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
      * function.  Returns 'send' if none found.  Works like find_span_end(),
      * except for the AND */
 
-    if ((STRLEN) (send - s) >= PERL_WORDSIZE
-                          + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
-                          - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
-    {
-
-        while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP_BINMODE(s, send, 1);
+    if (per_byte_end) {
+        while (s < per_byte_end ) {
             if (((*s) & mask) != span_byte) {
                 return s;
             }
diff --git a/utf8.c b/utf8.c
@@ -2635,13 +2635,8 @@ Perl_utf8_length(pTHX_ const U8 * const s0, const U8 * const e)
      * cachegrind).  The number isn't critical, as at these sizes, the total
      * time spent isn't large either way */
 
-#ifndef EBCDIC
-
-    if (e - s0 < 96)
-
-#endif
-
-    {
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP(s0, e, 12);
+    if (! per_byte_end) {
         while (s < e) { /* Count characters directly */
 
             /* Take extra care to not exceed 'e' (which would be undefined
@@ -2670,30 +2665,23 @@ Perl_utf8_length(pTHX_ const U8 * const s0, const U8 * const e)
         return s - s0;
     }
 
-#ifndef EBCDIC
-
     /* Count continuations, word-at-a-time.
      *
      * We need to stop before the final start character in order to
      * preserve the limited error checking that's always been done */
     const U8 * e_limit = e - UTF8_MAXBYTES;
 
-    /* Points to the first byte >=s which is positioned at a word boundary.  If
-     * s is on a word boundary, it is s, otherwise it is to the next word. */
-    const U8 * partial_word_end = s + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
-                                    - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK);
-
     /* Process up to a full word boundary. */
-    while (s < partial_word_end) {
+    while (s < per_byte_end ) {
         const Size_t skip = UTF8SKIP(s);
 
         continuations += skip - 1;
         s += skip;
     }
 
     /* Adjust back down any overshoot */
-    continuations -= s - partial_word_end;
-    s = partial_word_end;
+    continuations -= s - per_byte_end;
+    s = per_byte_end;
 
     do { /* Process per-word */
 
@@ -2742,8 +2730,6 @@ Perl_utf8_length(pTHX_ const U8 * const s0, const U8 * const e)
         break;
     }
 
-#  endif
-
     if (LIKELY(e == s)) {
         return s - s0 - continuations;
     }
@@ -3014,15 +3000,11 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me,
     Size_t invariant_length = first_variant - s0;
     Size_t variant_count = 0;
 
-#ifndef EBCDIC      /* The below relies on the bit patterns of UTF-8 */
-
-    /* Do a first pass through the string to see if it actually is translatable
-     * into bytes, and if so, how big the result is.  On long strings this is
-     * done a word at a time, so is relatively quick. (There is some
-     * start-up/tear-down overhead with the per-word algorithm, so no real gain
+    /* There is some start-up/tear-down overhead with this, so no real gain
      * unless the remaining portion of the string is long enough.  The current
-     * value is just a guess.)  On EBCDIC, it's always per-byte. */
-    if ((send - s) > (ptrdiff_t) (5 * PERL_WORDSIZE)) {
+     * value is just a guess. */
+    U8 * const per_byte_end = WORTH_PER_WORD_LOOP(s, send, 5);
+    if (per_byte_end) {
 
         /* If the string contains any start byte besides C2 and C3, then it
          * isn't translatable into bytes */
@@ -3031,15 +3013,7 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me,
         const PERL_UINTMAX_T C2_mask = PERL_COUNT_MULTIPLIER * 0xC2;
         const PERL_UINTMAX_T FE_mask = PERL_COUNT_MULTIPLIER * 0xFE;
 
-        /* Points to the first byte >=s which is positioned at a word boundary.
-         * If s is on a word boundary, it is s, otherwise it is the first byte
-         * of the next word. */
-        U8 * partial_word_end = s + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
-                                - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK);
-
-        /* Here there is at least a full word beyond the first word boundary.
-         * Process up to that boundary. */
-        while (s < partial_word_end) {
+        while (s < per_byte_end ) {
             if (! UTF8_IS_INVARIANT(*s)) {
                 if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, send)) {
                     return false;
@@ -3053,7 +3027,7 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me,
         }
 
         /* Adjust back down any overshoot */
-        s = partial_word_end;
+        s = per_byte_end;
 
         /* Process per-word */
         do {
@@ -3116,7 +3090,6 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me,
         }
     }
 
-#endif
     /* Do the straggler bytes beyond what the loop above did */
     while (s < send) {
         if (! UTF8_IS_INVARIANT(*s)) {