diff --git a/perl.c b/perl.c index e9e5b331ba36..7808137740f3 100644 --- a/perl.c +++ b/perl.c @@ -4698,8 +4698,13 @@ Perl_init_argv_symbols(pTHX_ int argc, char **argv) { PERL_ARGS_ASSERT_INIT_ARGV_SYMBOLS; + const bool mark_args_utf8 = + (!(PL_unicode & PERL_UNICODE_LOCALE_FLAG) || PL_utf8locale) + && (PL_unicode & PERL_UNICODE_ARGV_FLAG); + argc--,argv++; /* skip name of script */ if (PL_doswitches) { + const I32 flags = GV_ADD | (mark_args_utf8 ? SVf_UTF8 : 0); for (; argc > 0 && **argv == '-'; argc--,argv++) { char *s; if (!argv[0][1]) @@ -4710,11 +4715,16 @@ Perl_init_argv_symbols(pTHX_ int argc, char **argv) } if ((s = strchr(argv[0], '='))) { const char *const start_name = argv[0] + 1; - sv_setpv(GvSV(gv_fetchpvn_flags(start_name, s - start_name, - TRUE, SVt_PV)), s + 1); + SV *const sv = GvSV(gv_fetchpvn_flags(start_name, s - start_name, + flags, SVt_PV)); + sv_setpv(sv, s + 1); + if (mark_args_utf8) + SvUTF8_on(sv); + else + SvUTF8_off(sv); } else - sv_setiv(GvSV(gv_fetchpv(argv[0]+1, GV_ADD, SVt_PV)),1); + sv_setiv(GvSV(gv_fetchpv(argv[0]+1, flags, SVt_PV)), 1); } } if ((PL_argvgv = gv_fetchpvs("ARGV", GV_ADD|GV_NOTQUAL, SVt_PVAV))) { @@ -4724,12 +4734,10 @@ Perl_init_argv_symbols(pTHX_ int argc, char **argv) for (; argc > 0; argc--,argv++) { SV * const sv = newSVpv(argv[0],0); av_push(GvAV(PL_argvgv),sv); - if (!(PL_unicode & PERL_UNICODE_LOCALE_FLAG) || PL_utf8locale) { - if (PL_unicode & PERL_UNICODE_ARGV_FLAG) - SvUTF8_on(sv); - } + if (mark_args_utf8) + SvUTF8_on(sv); if (PL_unicode & PERL_UNICODE_WIDESYSCALLS_FLAG) /* Sarathy? */ - (void)sv_utf8_decode(sv); + (void)sv_utf8_decode(sv); } } diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 147ab795c621..78c0da7e57f7 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -366,6 +366,25 @@ manager will later use a regex to expand these into links. =item * +The C<-CA> flag (or equivalently, the C environment setting) +tells perl to treat command-line arguments as UTF-8 strings. (See L +for details.) However, this did not extend to the global variables implicitly +created by the C<-s> option: + + $ perl -CA -s -e 'printf "%vx\n", $_ for $foo, $ARGV[0]' -- -foo=é é + c3.a9 + e9 + +Here C<$foo> would end up containing the two-byte UTF-8 representation of +"LATIN SMALL LETTER E WITH ACUTE", but C<$ARGV[0]> would contain a single +codepoint corresponding to U+00E9. + +This has been fixed: If C<-CA> is in effect, options parsed by C<-s> are +treated as UTF-8, too. In the example above, C<$foo> and C<$ARGV[0]> now both +contain C. [GH #23377] + +=item * + We have long claimed to support identifiers up to about 255 characters long. However this was actually true only for identifiers that consisted of only ASCII characters. The real upper limit was as few as diff --git a/t/run/switches.t b/t/run/switches.t index bf440e1a132b..7ba641a8157d 100644 --- a/t/run/switches.t +++ b/t/run/switches.t @@ -155,9 +155,32 @@ $r = runperl( ); is( $r, '21-', '-s switch parsing' ); +# GH #23377: -s and -CA +SKIP: { + skip 'these UTF-8 switch tests assume ASCII-like environment', 2 + unless $::IS_ASCII; + + $r = runperl( + switches => [ '-C0', '-s' ], + prog => 'printf q(%vx;), $_ for ${qq(\xC5\xB8)}, ${qq(\x{178})}, ${qq(\xC3\xA1)}, ${qq(\xE1)}', + args => [ '--', "-\xC5\xB8", "-\xC3\xA1=\xE2\x82\xAC" ], + ); + + is( $r, '31;;e2.82.ac;;', '-s bytes in switches are preserved without -CA' ); + + $r = runperl( + switches => [ '-CA', '-s' ], + prog => 'printf q(%vx;), $_ for ${qq(\xC5\xB8)}, ${qq(\x{178})}, ${qq(\xC3\xA1)}, ${qq(\xE1)}', + args => [ '--', "-\xC5\xB8", "-\xC3\xA1=\xE2\x82\xAC" ], + ); + + is( $r, ';31;;20ac;', '-s bytes in switches are utf8-decoded with -CA' ); +} + +# Bug ID 20011106.084 (RT #7876 / GH #4554): -s on shebang line $filename = tempfile(); SKIP: { - open my $f, ">$filename" or skip( "Can't write temp file $filename: $!" ); + open my $f, ">", $filename or skip( "Can't write temp file $filename: $!" ); print $f <<'SWTEST'; #!perl -s BEGIN { print $x,$y; exit } @@ -170,10 +193,9 @@ SWTEST is( $r, 'foo1', '-s on the shebang line' ); } -# Bug ID 20011106.084 (#7876) $filename = tempfile(); SKIP: { - open my $f, ">$filename" or skip( "Can't write temp file $filename: $!" ); + open my $f, ">", $filename or skip( "Can't write temp file $filename: $!" ); print $f <<'SWTEST'; #!perl -sn BEGIN { print $x; exit }