Skip to content

Commit 79c191f

Browse files
committed
utf8_to_uv_msgs: Reorder case statements in switch()
This just moves these around so that they are roughly in increasing order of complexity, which makes it easier to grok. This doesn't change the order the cases are called in when there are multiple ones. The order is dictated by what the switch() does first, which isn't affected by this commit.
1 parent efff4c0 commit 79c191f

File tree

1 file changed

+128
-128
lines changed

1 file changed

+128
-128
lines changed

utf8.c

Lines changed: 128 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -1951,58 +1951,10 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0,
19511951
U32 pack_warn = packWARN(WARN_UTF8);
19521952

19531953
switch (this_problem) {
1954-
case UTF8_GOT_OVERFLOW:
1955-
1956-
/* Overflow means also got a super and are using Perl's
1957-
* extended UTF-8, but we handle all three cases here */
1958-
possible_problems &= ~(UTF8_GOT_SUPER|UTF8_GOT_PERL_EXTENDED);
1959-
*errors |= UTF8_GOT_OVERFLOW;
1960-
uv = UNICODE_REPLACEMENT;
1961-
1962-
/* But the API says we flag all errors found */
1963-
if (flags & (UTF8_WARN_SUPER|UTF8_DISALLOW_SUPER)) {
1964-
*errors |= UTF8_GOT_SUPER;
1965-
}
1966-
if (flags
1967-
& (UTF8_WARN_PERL_EXTENDED|UTF8_DISALLOW_PERL_EXTENDED))
1968-
{
1969-
*errors |= UTF8_GOT_PERL_EXTENDED;
1970-
}
1971-
1972-
/* Disallow if any of the three categories say to */
1973-
if ( ! (flags & UTF8_ALLOW_OVERFLOW)
1974-
|| (flags & ( UTF8_DISALLOW_SUPER
1975-
|UTF8_DISALLOW_PERL_EXTENDED)))
1976-
{
1977-
disallowed = TRUE;
1978-
}
1979-
1980-
/* Likewise, warn if any say to */
1981-
if ( ! (flags & UTF8_ALLOW_OVERFLOW)
1982-
|| (flags & (UTF8_WARN_SUPER|UTF8_WARN_PERL_EXTENDED)))
1983-
{
1984-
1985-
/* Overflow is a hybrid. If the word size on this platform
1986-
* were wide enough for this to not overflow, a non-Unicode
1987-
* code point would have been generated. If the caller
1988-
* wanted warnings for such code points, the warning
1989-
* category would be WARN_NON_UNICODE, On the other hand,
1990-
* overflow is considered a malformation, which is serious,
1991-
* and the category would be just WARN_UTF8. We clearly
1992-
* should warn if either category is enabled, but which
1993-
* category to use? Historically, we've used 'utf8' if it
1994-
* is enabled; and that seems like the more severe
1995-
* category, more befitting a malformation. */
1996-
pack_warn = NEED_MESSAGE(WARN_UTF8,
1997-
ckWARN_d, WARN_NON_UNICODE);
1998-
if (pack_warn) {
1999-
message = Perl_form(aTHX_ "%s: %s (overflows)",
2000-
malformed_text,
2001-
_byte_dump_string(s0, curlen, 0));
2002-
this_flag_bit = UTF8_GOT_OVERFLOW;
2003-
}
2004-
}
2005-
1954+
default:
1955+
Perl_croak(aTHX_ "panic: Unexpected case value in "
1956+
" utf8n_to_uvchr_msgs() %d", this_problem);
1957+
/* NOTREACHED */
20061958
break;
20071959

20081960
case UTF8_GOT_EMPTY:
@@ -2117,78 +2069,6 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0,
21172069

21182070
break;
21192071

2120-
case UTF8_GOT_SUPER:
2121-
2122-
if (flags & UTF8_WARN_SUPER) {
2123-
*errors |= UTF8_GOT_SUPER;
2124-
2125-
if (NEED_MESSAGE(WARN_NON_UNICODE,,)) {
2126-
pack_warn = packWARN(WARN_NON_UNICODE);
2127-
2128-
if (orig_problems & UTF8_GOT_TOO_SHORT) {
2129-
message = Perl_form(aTHX_
2130-
"Any UTF-8 sequence that starts with"
2131-
" \"%s\" is for a non-Unicode code point,"
2132-
" may not be portable",
2133-
_byte_dump_string(s0, curlen, 0));
2134-
}
2135-
else {
2136-
message = Perl_form(aTHX_ super_cp_format, uv);
2137-
}
2138-
this_flag_bit = UTF8_GOT_SUPER;
2139-
}
2140-
}
2141-
2142-
/* Test for Perl's extended UTF-8 after the regular SUPER ones,
2143-
* and before possibly bailing out, so that the more dire
2144-
* warning will override the regular one. */
2145-
if (UNLIKELY(UTF8_IS_PERL_EXTENDED(s0))) {
2146-
if ( (flags & (UTF8_WARN_PERL_EXTENDED|UTF8_WARN_SUPER))
2147-
&& NEED_MESSAGE(WARN_NON_UNICODE, ckWARN, WARN_PORTABLE))
2148-
{
2149-
pack_warn = packWARN2(WARN_NON_UNICODE, WARN_PORTABLE);
2150-
2151-
/* If it is an overlong that evaluates to a code point
2152-
* that doesn't have to use the Perl extended UTF-8, it
2153-
* still used it, and so we output a message that
2154-
* doesn't refer to the code point. The same is true
2155-
* if there was a SHORT malformation where the code
2156-
* point is not valid. In that case, 'uv' will have
2157-
* been set to the REPLACEMENT CHAR, and the message
2158-
* below without the code point in it will be selected
2159-
* */
2160-
if (UNICODE_IS_PERL_EXTENDED(uv)) {
2161-
message = Perl_form(aTHX_
2162-
PL_extended_cp_format, uv);
2163-
}
2164-
else {
2165-
message = Perl_form(aTHX_
2166-
"Any UTF-8 sequence that starts with"
2167-
" \"%s\" is a Perl extension, and"
2168-
" so is not portable",
2169-
_byte_dump_string(s0, curlen, 0));
2170-
}
2171-
this_flag_bit = UTF8_GOT_PERL_EXTENDED;
2172-
}
2173-
2174-
if (flags & ( UTF8_WARN_PERL_EXTENDED
2175-
|UTF8_DISALLOW_PERL_EXTENDED))
2176-
{
2177-
*errors |= UTF8_GOT_PERL_EXTENDED;
2178-
2179-
if (flags & UTF8_DISALLOW_PERL_EXTENDED) {
2180-
disallowed = TRUE;
2181-
}
2182-
}
2183-
}
2184-
2185-
if (flags & UTF8_DISALLOW_SUPER) {
2186-
*errors |= UTF8_GOT_SUPER;
2187-
disallowed = TRUE;
2188-
}
2189-
2190-
break;
2191-
21922072
case UTF8_GOT_NONCHAR:
21932073

21942074
if (flags & UTF8_WARN_NONCHAR) {
@@ -2276,11 +2156,131 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0,
22762156
}
22772157

22782158
break;
2159+
case UTF8_GOT_OVERFLOW:
22792160

2280-
default:
2281-
Perl_croak(aTHX_ "panic: Unexpected case value in "
2282-
" utf8n_to_uvchr_msgs() %d", this_problem);
2283-
/* NOTREACHED */
2161+
/* Overflow means also got a super and are using Perl's
2162+
* extended UTF-8, but we handle all three cases here */
2163+
possible_problems &= ~(UTF8_GOT_SUPER|UTF8_GOT_PERL_EXTENDED);
2164+
*errors |= UTF8_GOT_OVERFLOW;
2165+
uv = UNICODE_REPLACEMENT;
2166+
2167+
/* But the API says we flag all errors found */
2168+
if (flags & (UTF8_WARN_SUPER|UTF8_DISALLOW_SUPER)) {
2169+
*errors |= UTF8_GOT_SUPER;
2170+
}
2171+
if (flags
2172+
& (UTF8_WARN_PERL_EXTENDED|UTF8_DISALLOW_PERL_EXTENDED))
2173+
{
2174+
*errors |= UTF8_GOT_PERL_EXTENDED;
2175+
}
2176+
2177+
/* Disallow if any of the three categories say to */
2178+
if ( ! (flags & UTF8_ALLOW_OVERFLOW)
2179+
|| (flags & ( UTF8_DISALLOW_SUPER
2180+
|UTF8_DISALLOW_PERL_EXTENDED)))
2181+
{
2182+
disallowed = TRUE;
2183+
}
2184+
2185+
/* Likewise, warn if any say to */
2186+
if ( ! (flags & UTF8_ALLOW_OVERFLOW)
2187+
|| (flags & (UTF8_WARN_SUPER|UTF8_WARN_PERL_EXTENDED)))
2188+
{
2189+
2190+
/* Overflow is a hybrid. If the word size on this platform
2191+
* were wide enough for this to not overflow, a non-Unicode
2192+
* code point would have been generated. If the caller
2193+
* wanted warnings for such code points, the warning
2194+
* category would be WARN_NON_UNICODE, On the other hand,
2195+
* overflow is considered a malformation, which is serious,
2196+
* and the category would be just WARN_UTF8. We clearly
2197+
* should warn if either category is enabled, but which
2198+
* category to use? Historically, we've used 'utf8' if it
2199+
* is enabled; and that seems like the more severe
2200+
* category, more befitting a malformation. */
2201+
pack_warn = NEED_MESSAGE(WARN_UTF8,
2202+
ckWARN_d, WARN_NON_UNICODE);
2203+
if (pack_warn) {
2204+
message = Perl_form(aTHX_ "%s: %s (overflows)",
2205+
malformed_text,
2206+
_byte_dump_string(s0, curlen, 0));
2207+
this_flag_bit = UTF8_GOT_OVERFLOW;
2208+
}
2209+
}
2210+
2211+
break;
2212+
2213+
case UTF8_GOT_SUPER:
2214+
2215+
if (flags & UTF8_WARN_SUPER) {
2216+
*errors |= UTF8_GOT_SUPER;
2217+
2218+
if (NEED_MESSAGE(WARN_NON_UNICODE,,)) {
2219+
pack_warn = packWARN(WARN_NON_UNICODE);
2220+
2221+
if (orig_problems & UTF8_GOT_TOO_SHORT) {
2222+
message = Perl_form(aTHX_
2223+
"Any UTF-8 sequence that starts with"
2224+
" \"%s\" is for a non-Unicode code point,"
2225+
" may not be portable",
2226+
_byte_dump_string(s0, curlen, 0));
2227+
}
2228+
else {
2229+
message = Perl_form(aTHX_ super_cp_format, uv);
2230+
}
2231+
this_flag_bit = UTF8_GOT_SUPER;
2232+
}
2233+
}
2234+
2235+
/* Test for Perl's extended UTF-8 after the regular SUPER ones,
2236+
* and before possibly bailing out, so that the more dire
2237+
* warning will override the regular one. */
2238+
if (UNLIKELY(UTF8_IS_PERL_EXTENDED(s0))) {
2239+
if ( (flags & (UTF8_WARN_PERL_EXTENDED|UTF8_WARN_SUPER))
2240+
&& NEED_MESSAGE(WARN_NON_UNICODE, ckWARN, WARN_PORTABLE))
2241+
{
2242+
pack_warn = packWARN2(WARN_NON_UNICODE, WARN_PORTABLE);
2243+
2244+
/* If it is an overlong that evaluates to a code point
2245+
* that doesn't have to use the Perl extended UTF-8, it
2246+
* still used it, and so we output a message that
2247+
* doesn't refer to the code point. The same is true
2248+
* if there was a SHORT malformation where the code
2249+
* point is not valid. In that case, 'uv' will have
2250+
* been set to the REPLACEMENT CHAR, and the message
2251+
* below without the code point in it will be selected
2252+
* */
2253+
if (UNICODE_IS_PERL_EXTENDED(uv)) {
2254+
message = Perl_form(aTHX_
2255+
PL_extended_cp_format, uv);
2256+
}
2257+
else {
2258+
message = Perl_form(aTHX_
2259+
"Any UTF-8 sequence that starts with"
2260+
" \"%s\" is a Perl extension, and"
2261+
" so is not portable",
2262+
_byte_dump_string(s0, curlen, 0));
2263+
}
2264+
this_flag_bit = UTF8_GOT_PERL_EXTENDED;
2265+
}
2266+
2267+
if (flags & ( UTF8_WARN_PERL_EXTENDED
2268+
|UTF8_DISALLOW_PERL_EXTENDED))
2269+
{
2270+
*errors |= UTF8_GOT_PERL_EXTENDED;
2271+
2272+
if (flags & UTF8_DISALLOW_PERL_EXTENDED) {
2273+
disallowed = TRUE;
2274+
}
2275+
}
2276+
}
2277+
2278+
if (flags & UTF8_DISALLOW_SUPER) {
2279+
*errors |= UTF8_GOT_SUPER;
2280+
disallowed = TRUE;
2281+
}
2282+
2283+
break;
22842284

22852285
} /* End of switch() on the possible problems */
22862286

0 commit comments

Comments
 (0)