diff --git a/embed.fnc b/embed.fnc index 470deba3ac8f..8eb86869bbee 100644 --- a/embed.fnc +++ b/embed.fnc @@ -4010,6 +4010,8 @@ CRTdip |UV |valid_utf8_to_uv \ CRTdmp |UV |valid_utf8_to_uvchr \ |NN const U8 *s \ |NULLOK STRLEN *retlen +CRTip |unsigned int|variant_byte_number \ + |PERL_UINTMAX_T word Adp |int |vcmp |NN SV *lhv \ |NN SV *rhv Adpr |void |vcroak |NULLOK const char *pat \ @@ -4108,10 +4110,6 @@ TXp |void |set_padlist |NN CV *cv \ : Used in sv.c p |void |dump_sv_child |NN SV *sv #endif -#if !defined(EBCDIC) -CRTip |unsigned int|variant_byte_number \ - |PERL_UINTMAX_T word -#endif #if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE) ARdp |I32 |my_chsize |int fd \ |Off_t length diff --git a/embed.h b/embed.h index 7394c28366f1..42a12ea3dc79 100644 --- a/embed.h +++ b/embed.h @@ -848,6 +848,7 @@ # define valid_identifier_sv(a) Perl_valid_identifier_sv(aTHX_ a) # define valid_utf8_to_uv Perl_valid_utf8_to_uv # define Perl_valid_utf8_to_uvchr valid_utf8_to_uvchr +# define variant_byte_number Perl_variant_byte_number # define vcmp(a,b) Perl_vcmp(aTHX_ a,b) # define vcroak(a,b) Perl_vcroak(aTHX_ a,b) # define vdeb(a,b) Perl_vdeb(aTHX_ a,b) @@ -874,9 +875,6 @@ # define pad_setsv(a,b) Perl_pad_setsv(aTHX_ a,b) # define pad_sv(a) Perl_pad_sv(aTHX_ a) # endif -# if !defined(EBCDIC) -# define variant_byte_number Perl_variant_byte_number -# endif # if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE) # define my_chsize(a,b) Perl_my_chsize(aTHX_ a,b) # endif diff --git a/inline.h b/inline.h index af1ec26c8b56..e66e9f7034a5 100644 --- a/inline.h +++ b/inline.h @@ -1361,7 +1361,7 @@ Perl_valid_utf8_to_uv(const U8 *s, STRLEN *retlen) /* Note that this is branchless except for the switch() jump table, and * checking that the caller wants a *retlen returned. * - * There is wasted effort for length 1 inputs of initializing 'uv' to 0 + * There is wasted effort for length 1 inputs of initializing 'uv' to 0 * and calculating 'full_shift' (unless the compiler optimizes that out). * Benchmarks indicate this is acceptable. * See GH #23690 */ @@ -1459,14 +1459,60 @@ Perl_valid_utf8_to_uv(const U8 *s, STRLEN *retlen) # define PERL_WORDSIZE sizeof(PERL_UINTMAX_T) # define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1) -/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by - * or'ing together the lowest bits of 'x'. Hopefully the final term gets - * optimized out completely on a 32-bit system, and its mask gets optimized out - * on a 64-bit system */ -# define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \ - | ( PTR2nat(x) >> 1) \ - | ( ( (PTR2nat(x) \ - & PERL_WORD_BOUNDARY_MASK) >> 2)))) +/* Given an address of a byte 'x', how many bytes away is that address to the + * following closest full word boundary. */ +# define BYTES_REMAINING_IN_WORD(x) \ + ( (PERL_WORDSIZE - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK)) \ + & PERL_WORD_BOUNDARY_MASK) +/* For example, consider two addresses in an 8 byte word size (the dots are + * don't cares): + * 0b...............010 0b...............000 + * ((8 - (0b1101010 & 0x7)) & 0x7) ((8 - (0b1101000 & 0x7)) & 0x7) + * ((8 - 0b10) & 0x7) ((8 - 0) & 0x7) + * (6 & 0x7) (8 & 0x7) + * 6 0 */ + +/* Some tasks that are byte-oriented can be done as well a full word-at-a-time, + * running 8 times faster on an 8-byte word, for example. But there is + * generally extra setup required to do this, and byte-at-a-time must be used + * anyway to get to the next word boundary. This macro calculates whether the + * trade-off is worth doing. If not, it returns NULL; if so, it returns a + * pointer to the first byte of the next word. Code using this is typically + * structured like: + * U8 * next_word_boundary = WORTH_PER_LOOP() + * if (next_word_boundary) { + * loop per-byte until next_word_boundary + * loop per-word until less than a word left before upper boundary + * } + * loop per-byte until reach final boundary + * + * 's' is the current position in the string + * 'e' is the upper string bound + * 'full_words_needed' is the caller's determination of where to make the + * trade-off between per-byte and per-word. Only if the number of words + * in the input string is at least this many, does the macro return + * non-NULL. + * + * Because of EBCDIC, there are two forms of this macro. + * WORTH_PER_WORD_LOOP_BINMODE() is for use when the data being examined is + * not dependent on the character set. The more usual form is plain + * WORTH_PER_WORD_LOOP() for character data. Because EBCDIC needs an extra + * transformation, per-word operations are not appropriate on it, so the macro + * always returns NULL, meaning don't use a per-word loop on an EBCDIC + * platform. */ +# define WORTH_PER_WORD_LOOP_BINMODE(s, e, full_words_needed) \ + /* Note multiple evaluations of 's' */ \ + ( ( ( (s) + BYTES_REMAINING_IN_WORD(s) \ + + (full_words_needed) * PERL_WORDSIZE) < (e) ) \ + ? ((s) + BYTES_REMAINING_IN_WORD(s)) \ + : NULL) + +# ifdef EBCDIC +# define WORTH_PER_WORD_LOOP(s, e, f) NULL +# else +# define WORTH_PER_WORD_LOOP(s, e, f) \ + WORTH_PER_WORD_LOOP_BINMODE(s, e, f) +# endif /* =for apidoc is_utf8_invariant_string @@ -1515,35 +1561,22 @@ C> and C>. PERL_STATIC_INLINE bool Perl_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep) { - const U8* send; - const U8* x = s; - PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC; + const U8* send = s + len; + const U8* x = s; + if (len == 0) { len = strlen((const char *)s); } - send = s + len; - -#ifndef EBCDIC - /* Do the word-at-a-time iff there is at least one usable full word. That * means that after advancing to a word boundary, there still is at least a - * full word left. The number of bytes needed to advance is 'wordsize - - * offset' unless offset is 0. */ - if ((STRLEN) (send - x) >= PERL_WORDSIZE - - /* This term is wordsize if subword; 0 if not */ - + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x) + * full word left. */ + const U8 * const per_byte_end = WORTH_PER_WORD_LOOP(x, send, 1); - /* 'offset' */ - - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK)) - { - - /* Process per-byte until reach word boundary. XXX This loop could be - * eliminated if we knew that this platform had fast unaligned reads */ - while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) { + if (per_byte_end) { + while (x < per_byte_end ) { if (! UTF8_IS_INVARIANT(*x)) { if (ep) { *ep = x; @@ -1585,8 +1618,6 @@ Perl_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep) } while (x + PERL_WORDSIZE <= send); } -#endif /* End of ! EBCDIC */ - /* Process per-byte. (Can't use libc functions like strpbrk() because * input isn't necessarily a C string) */ while (x < send) { @@ -1977,13 +2008,20 @@ Perl_single_1bit_pos32(U32 word) } -#ifndef EBCDIC +/* Returns the byte number of the lowest numbered-byte whose uppermost bit is + * set */ +#define first_upper_bit_set_byte_number(word) Perl_variant_byte_number(word) PERL_STATIC_INLINE unsigned int Perl_variant_byte_number(PERL_UINTMAX_T word) { - /* This returns the position in a word (0..7) of the first variant byte in - * it. This is a helper function. Note that there are no branches */ + /* This returns the position in a word (0..7) of the first byte whose + * uppermost bit is set. On ASCII boxes, this is equivalent to the first + * byte whose representation is different in UTF-8 vs not, hence the name + * and text in the comments. It was only later that this was used for + * binary data, not tied to the character set. + * + * This is a helper function. Note that there are no branches */ /* Get just the msb bits of each byte */ word &= PERL_VARIANTS_WORD_MASK; @@ -1992,7 +2030,7 @@ Perl_variant_byte_number(PERL_UINTMAX_T word) * word */ assert(word); -# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 +#if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* Bytes are stored like * Byte8 ... Byte2 Byte1 @@ -2005,7 +2043,7 @@ Perl_variant_byte_number(PERL_UINTMAX_T word) * to 0..7 */ return (unsigned int) ((word + 1) >> 3) - 1; -# elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 +#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* Bytes are stored like * Byte1 Byte2 ... Byte8 @@ -2024,13 +2062,24 @@ Perl_variant_byte_number(PERL_UINTMAX_T word) return (unsigned int) word; -# else -# error Unexpected byte order -# endif +#else /* Unhandled byte-order; the compiler knows which comes first */ -} + const U8 * bytes = (U8 *) &word; + for (unsigned int i = 0; i < sizeof(word); i++) { + if (bytes[i]) { + return i; + } + } + + assert(0); + + /* If all else fails, it's better to return something than just random */ + return 0; #endif + +} + #if defined(PERL_CORE) || defined(PERL_EXT) /* @@ -2063,23 +2112,16 @@ C>, PERL_STATIC_INLINE Size_t S_variant_under_utf8_count(const U8* const s, const U8* const e) { - const U8* x = s; - Size_t count = 0; - PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT; -# ifndef EBCDIC + const U8* x = s; + Size_t count = 0; /* Test if the string is long enough to use word-at-a-time. (Logic is the * same as for is_utf8_invariant_string()) */ - if ((STRLEN) (e - x) >= PERL_WORDSIZE - + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x) - - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK)) - { - - /* Process per-byte until reach word boundary. XXX This loop could be - * eliminated if we knew that this platform had fast unaligned reads */ - while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) { + const U8 * const per_byte_end = WORTH_PER_WORD_LOOP(x, e, 1); + if (per_byte_end) { + while (x < per_byte_end ) { count += ! UTF8_IS_INVARIANT(*x++); } @@ -2095,8 +2137,6 @@ S_variant_under_utf8_count(const U8* const s, const U8* const e) } while (x + PERL_WORDSIZE <= e); } -# endif - /* Process per-byte */ while (x < e) { if (! UTF8_IS_INVARIANT(*x)) { @@ -2117,6 +2157,7 @@ S_variant_under_utf8_count(const U8* const s, const U8* const e) # undef PERL_COUNT_MULTIPLIER # undef PERL_WORD_BOUNDARY_MASK # undef PERL_VARIANTS_WORD_MASK +# undef BYTES_REMAINING_IN_WORD #endif #define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL) @@ -3284,7 +3325,7 @@ Perl_utf8_to_uv_msgs(const U8 * const s0, * * The terminology of the dfa refers to a 'class'. The variable 'type' * would have been named 'class' except that is a reserved word in C++ - * + * * The table can be a U16 on EBCDIC platforms, so 'state' is declared * as U16; 'type' is likely to never occupy more than 5 bits. */ PERL_UINT_FAST8_T type = PL_strict_utf8_dfa_tab[*s]; @@ -4758,7 +4799,7 @@ extracted from C using L>. C must not be NULL. Memory deallocation To prevent memory leaks, the memory allocated for the new string needs to be -freed when no longer needed. +freed when no longer needed. =over diff --git a/proto.h b/proto.h index 0703f6997a28..7aabf8d17f1c 100644 --- a/proto.h +++ b/proto.h @@ -5698,16 +5698,6 @@ Perl_dump_sv_child(pTHX_ SV *sv) # define PERL_ARGS_ASSERT_DUMP_SV_CHILD \ assert(sv) -#endif -#if !defined(EBCDIC) - -# if !defined(PERL_NO_INLINE_FUNCTIONS) -PERL_STATIC_INLINE unsigned int -Perl_variant_byte_number(PERL_UINTMAX_T word) - __attribute__warn_unused_result__; -# define PERL_ARGS_ASSERT_VARIANT_BYTE_NUMBER - -# endif #endif #if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE) PERL_CALLCONV I32 @@ -10390,6 +10380,11 @@ Perl_valid_utf8_to_uv(const U8 *s, STRLEN *retlen) # define PERL_ARGS_ASSERT_VALID_UTF8_TO_UV \ assert(s) +PERL_STATIC_INLINE unsigned int +Perl_variant_byte_number(PERL_UINTMAX_T word) + __attribute__warn_unused_result__; +# define PERL_ARGS_ASSERT_VARIANT_BYTE_NUMBER + PERL_STATIC_INLINE void Perl_cx_popblock(pTHX_ PERL_CONTEXT *cx); # define PERL_ARGS_ASSERT_CX_POPBLOCK \ diff --git a/regexec.c b/regexec.c index d5e14c51beab..d04eb96a4305 100644 --- a/regexec.c +++ b/regexec.c @@ -632,15 +632,9 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte) * 'send-1' inclusive that isn't 'span_byte'; returns 'send' if none found. * */ - if ((STRLEN) (send - s) >= PERL_WORDSIZE - + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s) - - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK)) - { - PERL_UINTMAX_T span_word; - - /* Process per-byte until reach word boundary. XXX This loop could be - * eliminated if we knew that this platform had fast unaligned reads */ - while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) { + const U8 * const per_byte_end = WORTH_PER_WORD_LOOP_BINMODE(s, send, 1); + if (per_byte_end) { + while (s < per_byte_end ) { if (*s != span_byte) { return s; } @@ -648,7 +642,7 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte) } /* Create a word filled with the bytes we are spanning */ - span_word = PERL_COUNT_MULTIPLIER * span_byte; + PERL_UINTMAX_T span_word = PERL_COUNT_MULTIPLIER * span_byte; /* Process per-word as long as we have at least a full word left */ do { @@ -659,15 +653,9 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte) continue; } - /* Here, at least one byte in the word isn't 'span_byte'. */ - -#ifdef EBCDIC - - break; - -#else - - /* This xor leaves 1 bits only in those non-matching bytes */ + /* Here, at least one byte in the word isn't 'span_byte'. + * + * This xor leaves 1 bits only in those non-matching bytes */ span_word ^= * (PERL_UINTMAX_T *) s; /* Make sure the upper bit of each non-matching byte is set. This @@ -677,10 +665,7 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte) span_word |= span_word << 4; /* That reduces the problem to what this function solves */ - return s + variant_byte_number(span_word); - -#endif - + return s + first_upper_bit_set_byte_number(span_word); } while (s + PERL_WORDSIZE <= send); } @@ -706,23 +691,17 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask) * returns 'send' if none found. It uses word-level operations instead of * byte to speed up the process */ -#ifndef EBCDIC - - if ((STRLEN) (send - s) >= PERL_WORDSIZE - + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s) - - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK)) - { - PERL_UINTMAX_T word, mask_word; - - while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) { + const U8 * const per_byte_end = WORTH_PER_WORD_LOOP(s, send, 1); + if (per_byte_end) { + while (s < per_byte_end ) { if (((*s) & mask) == byte) { return s; } s++; } - word = PERL_COUNT_MULTIPLIER * byte; - mask_word = PERL_COUNT_MULTIPLIER * mask; + PERL_UINTMAX_T word = PERL_COUNT_MULTIPLIER * byte; + PERL_UINTMAX_T mask_word = PERL_COUNT_MULTIPLIER * mask; do { PERL_UINTMAX_T masked = (* (PERL_UINTMAX_T *) s) & mask_word; @@ -759,8 +738,6 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask) } while (s + PERL_WORDSIZE <= send); } -#endif - while (s < send) { if (((*s) & mask) == byte) { return s; @@ -783,21 +760,17 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask) * function. Returns 'send' if none found. Works like find_span_end(), * except for the AND */ - if ((STRLEN) (send - s) >= PERL_WORDSIZE - + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s) - - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK)) - { - PERL_UINTMAX_T span_word, mask_word; - - while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) { + const U8 * const per_byte_end = WORTH_PER_WORD_LOOP_BINMODE(s, send, 1); + if (per_byte_end) { + while (s < per_byte_end ) { if (((*s) & mask) != span_byte) { return s; } s++; } - span_word = PERL_COUNT_MULTIPLIER * span_byte; - mask_word = PERL_COUNT_MULTIPLIER * mask; + PERL_UINTMAX_T span_word = PERL_COUNT_MULTIPLIER * span_byte; + PERL_UINTMAX_T mask_word = PERL_COUNT_MULTIPLIER * mask; do { PERL_UINTMAX_T masked = (* (PERL_UINTMAX_T *) s) & mask_word; @@ -807,20 +780,11 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask) continue; } -#ifdef EBCDIC - - break; - -#else - masked ^= span_word; masked |= masked << 1; masked |= masked << 2; masked |= masked << 4; - return s + variant_byte_number(masked); - -#endif - + return s + first_upper_bit_set_byte_number(masked); } while (s + PERL_WORDSIZE <= send); } diff --git a/utf8.c b/utf8.c index fdd7fc6f9f4d..5f67da67fe7d 100644 --- a/utf8.c +++ b/utf8.c @@ -2635,13 +2635,8 @@ Perl_utf8_length(pTHX_ const U8 * const s0, const U8 * const e) * cachegrind). The number isn't critical, as at these sizes, the total * time spent isn't large either way */ -#ifndef EBCDIC - - if (e - s0 < 96) - -#endif - - { + const U8 * const per_byte_end = WORTH_PER_WORD_LOOP(s0, e, 12); + if (! per_byte_end) { while (s < e) { /* Count characters directly */ /* Take extra care to not exceed 'e' (which would be undefined @@ -2670,21 +2665,14 @@ Perl_utf8_length(pTHX_ const U8 * const s0, const U8 * const e) return s - s0; } -#ifndef EBCDIC - /* Count continuations, word-at-a-time. * * We need to stop before the final start character in order to * preserve the limited error checking that's always been done */ const U8 * e_limit = e - UTF8_MAXBYTES; - /* Points to the first byte >=s which is positioned at a word boundary. If - * s is on a word boundary, it is s, otherwise it is to the next word. */ - const U8 * partial_word_end = s + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s) - - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK); - /* Process up to a full word boundary. */ - while (s < partial_word_end) { + while (s < per_byte_end ) { const Size_t skip = UTF8SKIP(s); continuations += skip - 1; @@ -2692,8 +2680,8 @@ Perl_utf8_length(pTHX_ const U8 * const s0, const U8 * const e) } /* Adjust back down any overshoot */ - continuations -= s - partial_word_end; - s = partial_word_end; + continuations -= s - per_byte_end; + s = per_byte_end; do { /* Process per-word */ @@ -2742,8 +2730,6 @@ Perl_utf8_length(pTHX_ const U8 * const s0, const U8 * const e) break; } -# endif - if (LIKELY(e == s)) { return s - s0 - continuations; } @@ -3014,15 +3000,11 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me, Size_t invariant_length = first_variant - s0; Size_t variant_count = 0; -#ifndef EBCDIC /* The below relies on the bit patterns of UTF-8 */ - - /* Do a first pass through the string to see if it actually is translatable - * into bytes, and if so, how big the result is. On long strings this is - * done a word at a time, so is relatively quick. (There is some - * start-up/tear-down overhead with the per-word algorithm, so no real gain + /* There is some start-up/tear-down overhead with this, so no real gain * unless the remaining portion of the string is long enough. The current - * value is just a guess.) On EBCDIC, it's always per-byte. */ - if ((send - s) > (ptrdiff_t) (5 * PERL_WORDSIZE)) { + * value is just a guess. */ + U8 * const per_byte_end = WORTH_PER_WORD_LOOP(s, send, 5); + if (per_byte_end) { /* If the string contains any start byte besides C2 and C3, then it * isn't translatable into bytes */ @@ -3031,15 +3013,7 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me, const PERL_UINTMAX_T C2_mask = PERL_COUNT_MULTIPLIER * 0xC2; const PERL_UINTMAX_T FE_mask = PERL_COUNT_MULTIPLIER * 0xFE; - /* Points to the first byte >=s which is positioned at a word boundary. - * If s is on a word boundary, it is s, otherwise it is the first byte - * of the next word. */ - U8 * partial_word_end = s + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s) - - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK); - - /* Here there is at least a full word beyond the first word boundary. - * Process up to that boundary. */ - while (s < partial_word_end) { + while (s < per_byte_end ) { if (! UTF8_IS_INVARIANT(*s)) { if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, send)) { return false; @@ -3053,7 +3027,7 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me, } /* Adjust back down any overshoot */ - s = partial_word_end; + s = per_byte_end; /* Process per-word */ do { @@ -3116,8 +3090,7 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me, } } -#endif - /* Do the straggler bytes beyond what the loop above did */ + /* Do the straggler bytes beyond the final word boundary */ while (s < send) { if (! UTF8_IS_INVARIANT(*s)) { if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, send)) {