diff --git a/pp.c b/pp.c index 33cb018c8da3..5417d5db09d5 100644 --- a/pp.c +++ b/pp.c @@ -4511,7 +4511,6 @@ PP_wrapped(pp_uc, 1, 0) U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; #define GREEK_CAPITAL_LETTER_IOTA 0x0399 -#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345 /* All occurrences of these are to be moved to follow any other marks. * This is context-dependent. We may not be passed enough context to * move the iota subscript beyond all of them, but we do the best we can @@ -4525,11 +4524,11 @@ PP_wrapped(pp_uc, 1, 0) bool in_iota_subscript = FALSE; while (s < send) { - STRLEN u; - STRLEN ulen; + STRLEN this_len; + STRLEN upper_len; UV uv; if (UNLIKELY(in_iota_subscript)) { - UV cp = utf8_to_uv_or_die(s, send, &u); + UV cp = utf8_to_uv_or_die(s, send, &this_len); if (! _invlist_contains_cp(PL_utf8_mark, cp)) { @@ -4540,25 +4539,27 @@ PP_wrapped(pp_uc, 1, 0) } } else { - u = UTF8SKIP(s); + this_len = UTF8SKIP(s); } /* Then handle the current character. Get the changed case value * and copy it to the output buffer */ #ifdef USE_LOCALE_CTYPE - uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE)); + uv = _toUPPER_utf8_flags(s, send, tmpbuf, &upper_len, + IN_LC_RUNTIME(LC_CTYPE)); #else - uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, 0); + uv = _toUPPER_utf8_flags(s, send, tmpbuf, &upper_len, 0); #endif - if ( uv == GREEK_CAPITAL_LETTER_IOTA - && utf8_to_uv_or_die(s, send, 0) == - COMBINING_GREEK_YPOGEGRAMMENI) + if ( UNLIKELY(uv == GREEK_CAPITAL_LETTER_IOTA) + && memBEGINs(s, this_len, COMBINING_GREEK_YPOGEGRAMMENI_UTF8)) { in_iota_subscript = TRUE; } else { - if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { + if ( upper_len > this_len + && (SvLEN(dest) < (min += upper_len - this_len))) + { /* If the eventually required minimum size outgrows the * available space, we need to grow. */ const UV o = d - (U8*)SvPVX_const(dest); @@ -4571,10 +4572,10 @@ PP_wrapped(pp_uc, 1, 0) * another option */ d = o + (U8*) SvGROW(dest, min); } - Copy(tmpbuf, d, ulen, U8); - d += ulen; + Copy(tmpbuf, d, upper_len, U8); + d += upper_len; } - s += u; + s += this_len; } if (in_iota_subscript) { *d++ = UTF8_TWO_BYTE_HI(GREEK_CAPITAL_LETTER_IOTA); @@ -4868,12 +4869,13 @@ PP_wrapped(pp_lc, 1, 0) bool remove_dot_above = FALSE; while (s < send) { - const STRLEN u = UTF8SKIP(s); - STRLEN ulen; + const STRLEN this_len = UTF8SKIP(s); + STRLEN lower_len; #ifdef USE_LOCALE_CTYPE - _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE)); + _toLOWER_utf8_flags(s, send, tmpbuf, &lower_len, + IN_LC_RUNTIME(LC_CTYPE)); /* If we are in a Turkic locale, we have to do more work. As noted * in the comments for lcfirst, there is a special case if a 'I' @@ -4888,9 +4890,10 @@ PP_wrapped(pp_lc, 1, 0) && IN_LC_RUNTIME(LC_CTYPE)) { if ( UNLIKELY(remove_dot_above) - && memBEGINs(tmpbuf, sizeof(tmpbuf), COMBINING_DOT_ABOVE_UTF8)) + && memBEGINs(tmpbuf, sizeof(tmpbuf), + COMBINING_DOT_ABOVE_UTF8)) { - s += u; + s += this_len; remove_dot_above = FALSE; continue; } @@ -4901,15 +4904,16 @@ PP_wrapped(pp_lc, 1, 0) #else PERL_UNUSED_VAR(remove_dot_above); - _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, 0); + _toLOWER_utf8_flags(s, send, tmpbuf, &lower_len, 0); #endif /* Here is where we would do context-sensitive actions for the * Greek final sigma. See the commit message for 86510fb15 for why * there isn't any */ - if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { - + if ( lower_len > this_len + && (SvLEN(dest) < (min += lower_len - this_len))) + { /* If the eventually required minimum size outgrows the * available space, we need to grow. */ const UV o = d - (U8*)SvPVX_const(dest); @@ -4925,9 +4929,9 @@ PP_wrapped(pp_lc, 1, 0) /* Copy the newly lowercased letter to the output buffer we're * building */ - Copy(tmpbuf, d, ulen, U8); - d += ulen; - s += u; + Copy(tmpbuf, d, lower_len, U8); + d += lower_len; + s += this_len; } /* End of looping through the source string */ SvUTF8_on(dest); *d = '\0'; @@ -5131,19 +5135,19 @@ PP_wrapped(pp_fc, 1, 0) if (DO_UTF8(source)) { /* UTF-8 flagged string. */ while (s < send) { - const STRLEN u = UTF8SKIP(s); + const STRLEN this_len = UTF8SKIP(s); STRLEN ulen; _toFOLD_utf8_flags(s, send, tmpbuf, &ulen, flags); - if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { + if (ulen > this_len && (SvLEN(dest) < (min += ulen - this_len))) { const UV o = d - (U8*)SvPVX_const(dest); d = o + (U8*) SvGROW(dest, min); } Copy(tmpbuf, d, ulen, U8); d += ulen; - s += u; + s += this_len; } SvUTF8_on(dest); } /* Unflagged string */ diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl index a6814afc9444..538a397edce5 100644 --- a/regen/unicode_constants.pl +++ b/regen/unicode_constants.pl @@ -1020,6 +1020,7 @@ END U+0300 string U+0307 string +U+0345 string U+1E9E string_skip_if_undef diff --git a/unicode_constants.h b/unicode_constants.h index 50cc64f978bc..6f289f37212c 100644 --- a/unicode_constants.h +++ b/unicode_constants.h @@ -57,6 +57,7 @@ bytes. # define COMBINING_GRAVE_ACCENT_UTF8 "\xCC\x80" /* U+0300 */ # define COMBINING_DOT_ABOVE_UTF8 "\xCC\x87" /* U+0307 */ +# define COMBINING_GREEK_YPOGEGRAMMENI_UTF8 "\xCD\x85" /* U+0345 */ # define LATIN_CAPITAL_LETTER_SHARP_S_UTF8 "\xE1\xBA\x9E" /* U+1E9E */ @@ -122,6 +123,7 @@ bytes. # define COMBINING_GRAVE_ACCENT_UTF8 "\xAF\x41" /* U+0300 */ # define COMBINING_DOT_ABOVE_UTF8 "\xAF\x48" /* U+0307 */ +# define COMBINING_GREEK_YPOGEGRAMMENI_UTF8 "\xB1\x46" /* U+0345 */ # define LATIN_CAPITAL_LETTER_SHARP_S_UTF8 "\xBF\x63\x72" /* U+1E9E */ @@ -187,6 +189,7 @@ bytes. # define COMBINING_GRAVE_ACCENT_UTF8 "\xAD\x41" /* U+0300 */ # define COMBINING_DOT_ABOVE_UTF8 "\xAD\x48" /* U+0307 */ +# define COMBINING_GREEK_YPOGEGRAMMENI_UTF8 "\xAF\x46" /* U+0345 */ # define LATIN_CAPITAL_LETTER_SHARP_S_UTF8 "\xBF\x62\x71" /* U+1E9E */