Skip to content

Commit 597f346

Browse files
committed
mark -s switches as UTF-8 with -CA
-CA (also known as PERL_UNICODE=A) tells perl to assume the command-line arguments to be UTF-8. This did not apply to the global variables implicitly created by -s, however, only to the elements of @ARGV. Logically speaking it does not make sense to have half a command line treated as UTF-8, so this patch makes -CA apply to everything. Fixes #23377.
1 parent a53b949 commit 597f346

File tree

2 files changed

+38
-8
lines changed

2 files changed

+38
-8
lines changed

perl.c

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4698,8 +4698,13 @@ Perl_init_argv_symbols(pTHX_ int argc, char **argv)
46984698
{
46994699
PERL_ARGS_ASSERT_INIT_ARGV_SYMBOLS;
47004700

4701+
const bool mark_args_utf8 =
4702+
(!(PL_unicode & PERL_UNICODE_LOCALE_FLAG) || PL_utf8locale)
4703+
&& (PL_unicode & PERL_UNICODE_ARGV_FLAG);
4704+
47014705
argc--,argv++; /* skip name of script */
47024706
if (PL_doswitches) {
4707+
const I32 flags = GV_ADD | (mark_args_utf8 ? SVf_UTF8 : 0);
47034708
for (; argc > 0 && **argv == '-'; argc--,argv++) {
47044709
char *s;
47054710
if (!argv[0][1])
@@ -4710,11 +4715,16 @@ Perl_init_argv_symbols(pTHX_ int argc, char **argv)
47104715
}
47114716
if ((s = strchr(argv[0], '='))) {
47124717
const char *const start_name = argv[0] + 1;
4713-
sv_setpv(GvSV(gv_fetchpvn_flags(start_name, s - start_name,
4714-
TRUE, SVt_PV)), s + 1);
4718+
SV *const sv = GvSV(gv_fetchpvn_flags(start_name, s - start_name,
4719+
flags, SVt_PV));
4720+
sv_setpv(sv, s + 1);
4721+
if (mark_args_utf8)
4722+
SvUTF8_on(sv);
4723+
else
4724+
SvUTF8_off(sv);
47154725
}
47164726
else
4717-
sv_setiv(GvSV(gv_fetchpv(argv[0]+1, GV_ADD, SVt_PV)),1);
4727+
sv_setiv(GvSV(gv_fetchpv(argv[0]+1, flags, SVt_PV)), 1);
47184728
}
47194729
}
47204730
if ((PL_argvgv = gv_fetchpvs("ARGV", GV_ADD|GV_NOTQUAL, SVt_PVAV))) {
@@ -4724,12 +4734,10 @@ Perl_init_argv_symbols(pTHX_ int argc, char **argv)
47244734
for (; argc > 0; argc--,argv++) {
47254735
SV * const sv = newSVpv(argv[0],0);
47264736
av_push(GvAV(PL_argvgv),sv);
4727-
if (!(PL_unicode & PERL_UNICODE_LOCALE_FLAG) || PL_utf8locale) {
4728-
if (PL_unicode & PERL_UNICODE_ARGV_FLAG)
4729-
SvUTF8_on(sv);
4730-
}
4737+
if (mark_args_utf8)
4738+
SvUTF8_on(sv);
47314739
if (PL_unicode & PERL_UNICODE_WIDESYSCALLS_FLAG) /* Sarathy? */
4732-
(void)sv_utf8_decode(sv);
4740+
(void)sv_utf8_decode(sv);
47334741
}
47344742
}
47354743

t/run/switches.t

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,28 @@ $r = runperl(
155155
);
156156
is( $r, '21-', '-s switch parsing' );
157157

158+
# GH #23377: -s and -CA
159+
SKIP: {
160+
skip 'these UTF-8 switch tests assume ASCII-like environment', 2
161+
unless $::IS_ASCII;
162+
163+
$r = runperl(
164+
switches => [ '-C0', '-s' ],
165+
prog => 'printf q(%vx;), $_ for ${qq(\xC5\xB8)}, ${qq(\x{178})}, ${qq(\xC3\xA1)}, ${qq(\xE1)}',
166+
args => [ '--', "-\xC5\xB8", "-\xC3\xA1=\xE2\x82\xAC" ],
167+
);
168+
169+
is( $r, '31;;e2.82.ac;;', '-s bytes in switches are preserved without -CA' );
170+
171+
$r = runperl(
172+
switches => [ '-CA', '-s' ],
173+
prog => 'printf q(%vx;), $_ for ${qq(\xC5\xB8)}, ${qq(\x{178})}, ${qq(\xC3\xA1)}, ${qq(\xE1)}',
174+
args => [ '--', "-\xC5\xB8", "-\xC3\xA1=\xE2\x82\xAC" ],
175+
);
176+
177+
is( $r, ';31;;20ac;', '-s bytes in switches are utf8-decoded with -CA' );
178+
}
179+
158180
# Bug ID 20011106.084 (RT #7876 / GH #4554): -s on shebang line
159181
$filename = tempfile();
160182
SKIP: {

0 commit comments

Comments
 (0)