@@ -1949,21 +1949,21 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0,
1949
1949
* malformation is an overlong (which allows it to be fully
1950
1950
* computed). Or it may have been "cured" as best it can by the
1951
1951
* loop just above. */
1952
- if (UNLIKELY (UNICODE_IS_SURROGATE (uv ))) {
1953
- if (flags & (UTF8_DISALLOW_SURROGATE |UTF8_WARN_SURROGATE )) {
1954
- possible_problems |= UTF8_GOT_SURROGATE ;
1955
- }
1952
+ if (UNLIKELY (UNICODE_IS_SURROGATE (uv ))) {
1953
+ if (flags & (UTF8_DISALLOW_SURROGATE |UTF8_WARN_SURROGATE )) {
1954
+ possible_problems |= UTF8_GOT_SURROGATE ;
1956
1955
}
1957
- else if ( UNLIKELY ( UNICODE_IS_SUPER ( uv ))) {
1958
- if (flags & ( UTF8_DISALLOW_SUPER | UTF8_WARN_SUPER )) {
1959
- possible_problems |= UTF8_GOT_SUPER ;
1960
- }
1956
+ }
1957
+ else if (UNLIKELY ( UNICODE_IS_SUPER ( uv ) )) {
1958
+ if ( flags & ( UTF8_DISALLOW_SUPER | UTF8_WARN_SUPER )) {
1959
+ possible_problems |= UTF8_GOT_SUPER ;
1961
1960
}
1962
- else if ( UNLIKELY ( UNICODE_IS_NONCHAR ( uv ))) {
1963
- if (flags & ( UTF8_DISALLOW_NONCHAR | UTF8_WARN_NONCHAR )) {
1964
- possible_problems |= UTF8_GOT_NONCHAR ;
1965
- }
1961
+ }
1962
+ else if (UNLIKELY ( UNICODE_IS_NONCHAR ( uv ) )) {
1963
+ if ( flags & ( UTF8_DISALLOW_NONCHAR | UTF8_WARN_NONCHAR )) {
1964
+ possible_problems |= UTF8_GOT_NONCHAR ;
1966
1965
}
1966
+ }
1967
1967
} /* End of ! must_be_super */
1968
1968
} /* End of checking if is a special code point */
1969
1969
@@ -2258,26 +2258,27 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0,
2258
2258
case UTF8_GOT_EMPTY :
2259
2259
COMMON_DEFAULT_REJECTS (,);
2260
2260
2261
- /* This so-called malformation is now treated as a bug in
2262
- * the caller. If you have nothing to decode, skip calling
2263
- * this function */
2264
- assert ( 0 );
2265
- message = Perl_form ( aTHX_ "%s (empty string)" ,
2266
- malformed_text );
2261
+ /* This so-called malformation is now treated as a bug in the
2262
+ * caller. If you have nothing to decode, skip calling this
2263
+ * function */
2264
+
2265
+ assert ( 0 );
2266
+ message = Perl_form ( aTHX_ "%s (empty string)" , malformed_text );
2267
2267
break ;
2268
2268
2269
2269
case UTF8_GOT_CONTINUATION :
2270
2270
COMMON_DEFAULT_REJECTS (,);
2271
- message = Perl_form (aTHX_
2271
+ message = Perl_form (aTHX_
2272
2272
"%s: %s (unexpected continuation byte 0x%02x,"
2273
2273
" with no preceding start byte)" ,
2274
2274
malformed_text ,
2275
- _byte_dump_string (s0 , 1 , 0 ), * s0 );
2275
+ _byte_dump_string (s0 , 1 , 0 ),
2276
+ * s0 );
2276
2277
break ;
2277
2278
2278
2279
case UTF8_GOT_SHORT :
2279
2280
COMMON_DEFAULT_REJECTS (,);
2280
- message = Perl_form (aTHX_
2281
+ message = Perl_form (aTHX_
2281
2282
"%s: %s (too short; %d byte%s available, need %d)" ,
2282
2283
malformed_text ,
2283
2284
_byte_dump_string (s0 , avail_len , 0 ),
@@ -2289,14 +2290,15 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0,
2289
2290
case UTF8_GOT_NON_CONTINUATION :
2290
2291
{
2291
2292
COMMON_DEFAULT_REJECTS (,);
2292
- /* If we don't know for sure that the input length is
2293
- * valid, avoid as much as possible reading past the
2294
- * end of the buffer */
2295
- int printlen = (flags & UTF8_NO_CONFIDENCE_IN_CURLEN_ )
2296
- ? (int ) (s - s0 )
2297
- : (int ) (avail_len );
2298
- message = Perl_form (aTHX_ "%s" ,
2299
- unexpected_non_continuation_text (s0 ,
2293
+
2294
+ /* If we don't know for sure that the input length is valid,
2295
+ * avoid as much as possible reading past the end of the buffer
2296
+ * */
2297
+ int printlen = (flags & UTF8_NO_CONFIDENCE_IN_CURLEN_ )
2298
+ ? (int ) (s - s0 )
2299
+ : (int ) (avail_len );
2300
+ message = Perl_form (aTHX_ "%s" ,
2301
+ unexpected_non_continuation_text (s0 ,
2300
2302
printlen ,
2301
2303
s - s0 ,
2302
2304
(int ) expectlen ));
@@ -2307,37 +2309,34 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0,
2307
2309
case UTF8_GOT_LONG_WITH_VALUE :
2308
2310
COMMON_DEFAULT_REJECTS (,);
2309
2311
2310
- /* These error types cause 'input_uv' to be something
2311
- * that isn't what was intended, so can't use it in the
2312
- * message. The other error types either can't
2313
- * generate an overlong, or else the 'input_uv' is
2314
- * valid */
2315
- if (orig_problems &
2316
- (UTF8_GOT_TOO_SHORT |UTF8_GOT_OVERFLOW ))
2317
- {
2318
- message = Perl_form (aTHX_
2319
- "%s: %s (any UTF-8 sequence that starts"
2320
- " with \"%s\" is overlong which can and"
2321
- " should be represented with a"
2322
- " different, shorter sequence)" ,
2323
- malformed_text ,
2324
- _byte_dump_string (s0 , send - s0 , 0 ),
2325
- _byte_dump_string (s0 , curlen , 0 ));
2326
- }
2327
- else {
2328
- U8 tmpbuf [UTF8_MAXBYTES + 1 ];
2329
- const U8 * const e = uvoffuni_to_utf8_flags (tmpbuf ,
2312
+ /* These error types cause 'input_uv' to be something that
2313
+ * isn't what was intended, so can't use it in the message.
2314
+ * The other error types either can't generate an overlong, or
2315
+ * else the 'input_uv' is valid */
2316
+ if (orig_problems & (UTF8_GOT_TOO_SHORT |UTF8_GOT_OVERFLOW )) {
2317
+ message = Perl_form (aTHX_
2318
+ "%s: %s (any UTF-8 sequence that starts with"
2319
+ " \"%s\" is overlong which can and should be"
2320
+ " represented with a different, shorter sequence)" ,
2321
+ malformed_text ,
2322
+ _byte_dump_string (s0 , send - s0 , 0 ),
2323
+ _byte_dump_string (s0 , curlen , 0 ));
2324
+ }
2325
+ else {
2326
+ U8 tmpbuf [UTF8_MAXBYTES + 1 ];
2327
+ const U8 * const e = uvoffuni_to_utf8_flags (tmpbuf ,
2330
2328
input_uv , 0 );
2331
- /* Don't use U+ for non-Unicode code points, which
2332
- * includes those in the Latin1 range */
2333
- const char * preface = ( UNICODE_IS_SUPER (input_uv )
2329
+
2330
+ /* Don't use U+ for non-Unicode code points, which includes
2331
+ * those in the Latin1 range */
2332
+ const char * preface = ( UNICODE_IS_SUPER (input_uv )
2334
2333
#ifdef EBCDIC
2335
- || input_uv <= 0xFF
2334
+ || input_uv <= 0xFF
2336
2335
#endif
2337
- )
2338
- ? "0x"
2339
- : "U+" ;
2340
- message = Perl_form (aTHX_
2336
+ )
2337
+ ? "0x"
2338
+ : "U+" ;
2339
+ message = Perl_form (aTHX_
2341
2340
"%s: %s (overlong; instead use %s to represent"
2342
2341
" %s%0*" UVXf ")" ,
2343
2342
malformed_text ,
@@ -2348,7 +2347,7 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0,
2348
2347
for small code
2349
2348
points */
2350
2349
UNI_TO_NATIVE (input_uv ));
2351
- }
2350
+ }
2352
2351
break ;
2353
2352
2354
2353
/* PACK_WARN returns:
@@ -2384,33 +2383,30 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0,
2384
2383
COMMON_DEFAULT_ACCEPTEDS (UTF8_WARN_SURROGATE ,
2385
2384
WARN_SURROGATE ,,);
2386
2385
2387
- /* This is the only error that can occur with a
2388
- * surrogate when the 'input_uv' isn't valid */
2389
- if (orig_problems & UTF8_GOT_TOO_SHORT ) {
2390
- message = Perl_form (aTHX_
2391
- "UTF-16 surrogate (any UTF-8 sequence that"
2392
- " starts with \"%s\" is for a surrogate)" ,
2393
- _byte_dump_string (s0 , curlen , 0 ));
2394
- }
2395
- else {
2396
- message = Perl_form (aTHX_ surrogate_cp_format ,
2397
- input_uv );
2398
- }
2386
+ /* This is the only error that can occur with a surrogate when
2387
+ * the 'input_uv' isn't valid */
2388
+ if (orig_problems & UTF8_GOT_TOO_SHORT ) {
2389
+ message = Perl_form (aTHX_
2390
+ "UTF-16 surrogate (any UTF-8 sequence that"
2391
+ " starts with \"%s\" is for a surrogate)" ,
2392
+ _byte_dump_string (s0 , curlen , 0 ));
2393
+ }
2394
+ else {
2395
+ message = Perl_form (aTHX_ surrogate_cp_format , input_uv );
2396
+ }
2399
2397
2400
2398
break ;
2401
2399
2402
2400
case UTF8_GOT_NONCHAR :
2403
-
2404
2401
COMMON_DEFAULT_ACCEPTEDS (UTF8_WARN_NONCHAR , WARN_NONCHAR ,,);
2405
2402
2406
- /* The code above should have guaranteed that we don't
2407
- * get here with conditions other than these */
2408
- assert (! (orig_problems & ~( UTF8_GOT_LONG
2409
- |UTF8_GOT_LONG_WITH_VALUE
2410
- |UTF8_GOT_PERL_EXTENDED
2411
- |UTF8_GOT_NONCHAR )));
2412
-
2413
- message = Perl_form (aTHX_ nonchar_cp_format , input_uv );
2403
+ /* The code above should have guaranteed that we don't get here
2404
+ * with conditions other than these */
2405
+ assert (! (orig_problems & ~( UTF8_GOT_LONG
2406
+ |UTF8_GOT_LONG_WITH_VALUE
2407
+ |UTF8_GOT_PERL_EXTENDED
2408
+ |UTF8_GOT_NONCHAR )));
2409
+ message = Perl_form (aTHX_ nonchar_cp_format , input_uv );
2414
2410
2415
2411
break ;
2416
2412
@@ -2524,50 +2520,50 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0,
2524
2520
/* We only get here if there is a message to be displayed or
2525
2521
* returned; each case statement in the switch above does a
2526
2522
* continue if no message for it need be generated. */
2527
- if (msgs ) {
2528
- if (msgs_return == NULL ) {
2529
- msgs_return = newAV ();
2523
+ if (msgs ) {
2524
+ if (msgs_return == NULL ) {
2525
+ msgs_return = newAV ();
2526
+ }
2527
+
2528
+ av_push (msgs_return ,
2529
+ /* Negative 'pack_warn' really means 0 here. But this
2530
+ * converts that to UTF-8 to preserve broken behavior
2531
+ * depended upon by Encode. */
2532
+ newRV_noinc ((SV * ) new_msg_hv (message ,
2533
+ ((pack_warn <= 0 )
2534
+ ? packWARN (WARN_UTF8 )
2535
+ : pack_warn ),
2536
+ this_flag_bit )));
2537
+ }
2538
+ else {
2539
+ if (UNLIKELY (flags & ( UTF8_DIE_IF_MALFORMED
2540
+ |UTF8_FORCE_WARN_IF_MALFORMED )))
2541
+ {
2542
+ ENTER ;
2543
+ SAVEI8 (PL_dowarn );
2544
+ SAVESPTR (PL_curcop );
2545
+
2546
+ PL_dowarn = G_WARN_ALL_ON |G_WARN_ON ;
2547
+ if (PL_curcop ) {
2548
+ SAVECURCOPWARNINGS ();
2549
+ PL_curcop -> cop_warnings = pWARN_ALL ;
2530
2550
}
2551
+ }
2531
2552
2532
- av_push (msgs_return ,
2533
- /* Negative 'pack_warn' really means 0 here. But
2534
- * this converts that to UTF-8 to preserve broken
2535
- * behavior depended upon by Encode. */
2536
- newRV_noinc ((SV * ) new_msg_hv (message ,
2537
- ((pack_warn <= 0 )
2538
- ? packWARN (WARN_UTF8 )
2539
- : pack_warn ),
2540
- this_flag_bit )));
2553
+ if (PL_op ) {
2554
+ Perl_warner (aTHX_ pack_warn , "%s in %s" , message ,
2555
+ OP_DESC (PL_op ));
2541
2556
}
2542
2557
else {
2543
- if (UNLIKELY (flags & ( UTF8_DIE_IF_MALFORMED
2544
- |UTF8_FORCE_WARN_IF_MALFORMED )))
2545
- {
2546
- ENTER ;
2547
- SAVEI8 (PL_dowarn );
2548
- SAVESPTR (PL_curcop );
2549
-
2550
- PL_dowarn = G_WARN_ALL_ON |G_WARN_ON ;
2551
- if (PL_curcop ) {
2552
- SAVECURCOPWARNINGS ();
2553
- PL_curcop -> cop_warnings = pWARN_ALL ;
2554
- }
2555
- }
2556
-
2557
- if (PL_op ) {
2558
- Perl_warner (aTHX_ pack_warn , "%s in %s" , message ,
2559
- OP_DESC (PL_op ));
2560
- }
2561
- else {
2562
- Perl_warner (aTHX_ pack_warn , "%s" , message );
2563
- }
2558
+ Perl_warner (aTHX_ pack_warn , "%s" , message );
2559
+ }
2564
2560
2565
- if (UNLIKELY (flags & ( UTF8_DIE_IF_MALFORMED
2566
- |UTF8_FORCE_WARN_IF_MALFORMED )))
2567
- {
2568
- LEAVE ;
2569
- }
2561
+ if (UNLIKELY (flags & ( UTF8_DIE_IF_MALFORMED
2562
+ |UTF8_FORCE_WARN_IF_MALFORMED )))
2563
+ {
2564
+ LEAVE ;
2570
2565
}
2566
+ }
2571
2567
} /* End of 'while (possible_problems)' */
2572
2568
2573
2569
if (msgs_return ) {
0 commit comments