diff --git a/pod/perldiag.pod b/pod/perldiag.pod index e6250bd970ee..1c9eec84a13a 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -2866,9 +2866,19 @@ has since been undefined. =item Group name must start with a non-digit word character in regex; marked by S<<-- HERE> in m/%s/ -(F) Group names must follow the rules for perl identifiers, meaning -they must start with a non-digit word character. A common cause of -this error is using (?&0) instead of (?0). See L. +(F) Group names must follow the rules for Perl identifiers, meaning +they must start with a character that matches C<\p{XID_Start}> plus the +underscore. This means the first character may not be a digit. +Subsequent characters must match C<\p{XID_Continue}>. + +A common cause of this error is using (?&0) instead of (?0). + +This message was formulated before Perl supported Unicode; so it is +not accurate for Unicode characters outside the ASCII-range. There are +many word characters in Unicode that may not start a group name, and a +few that may not be a continuation character. + +See L. =item ()-group starts with a count diff --git a/regcomp.c b/regcomp.c index 774311f12dc5..c6a2e6ee0e14 100644 --- a/regcomp.c +++ b/regcomp.c @@ -2533,8 +2533,8 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) && (advance = isWORDCHAR_utf8_safe( (U8 *) RExC_parse, (U8 *) RExC_end))); } else { - RExC_parse_inc_by(1); /* so the <- from the vFAIL is after the offending - character */ + /* so the <- from the vFAIL is after the offending character */ + RExC_parse_inc_safe(); vFAIL("Group name must start with a non-digit word character"); } sv_name = newSVpvn_flags(name_start, (int)(RExC_parse - name_start), diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t index b8b4ec6a3d8e..676843fb1624 100644 --- a/t/re/reg_mesg.t +++ b/t/re/reg_mesg.t @@ -176,7 +176,7 @@ my @death = '/(?/' => 'Sequence (? incomplete {#} m/(?{#}/', '/(?;x/' => 'Sequence (?;...) not recognized {#} m/(?;{#}x/', - '/(?<;x/' => 'Group name must start with a non-digit word character {#} m/(?<;{#}x/', + '/(?<;name>match)/' => 'Group name must start with a non-digit word character {#} m/(?<;{#}name>match)/', '/(?\ix/' => 'Sequence (?\...) not recognized {#} m/(?\{#}ix/', '/(?\mx/' => 'Sequence (?\...) not recognized {#} m/(?\{#}mx/', '/(?\:x/' => 'Sequence (?\...) not recognized {#} m/(?\{#}:x/', @@ -222,7 +222,7 @@ my @death = '/\g{-abc}/' => 'Group name must start with a non-digit word character {#} m/\g{-{#}abc}/', '/\g{1-1}/' => 'Sequence \g{... not terminated {#} m/\g{1{#}-1}/', '/\g{ -1 foo }/' => 'Sequence \g{... not terminated {#} m/\g{ -1 {#}foo }/', - '/(?<;x/' => 'Group name must start with a non-digit word character {#} m/(?<;{#}x/', + '/(?<;name>match)/' => 'Group name must start with a non-digit word character {#} m/(?<;{#}name>match)/', 'my $m = "\\\"; $m =~ $m', => 'Trailing \ in regex m/\/', @@ -310,7 +310,7 @@ my @death = 'm/(?&a/' => 'Sequence (?&... not terminated {#} m/(?&a{#}/', 'm/(?P=/' => 'Sequence ?P=... not terminated {#} m/(?P={#}/', "m/(?'/" => "Sequence (?'... not terminated {#} m/(?'{#}/", - "m/(? "Sequence (?<... not terminated {#} m/(?<{#}/", + "m/(? "Sequence (?<... not terminated {#} m/(? 'Sequence (?&... not terminated {#} m/(?&{#}/', 'm/(?( 'Sequence (?(<... not terminated {#} m/(?(<{#}/', "m/(?('/" => "Sequence (?('... not terminated {#} m/(?('{#}/", @@ -485,7 +485,7 @@ my @death_utf8 = mark_as_utf8( '/ネ(?/' => 'Sequence (? incomplete {#} m/ネ(?{#}/', '/ネ(?;ネ/' => 'Sequence (?;...) not recognized {#} m/ネ(?;{#}ネ/', - '/ネ(?<;ネ/' => 'Group name must start with a non-digit word character {#} m/ネ(?<;{#}ネ/', + '/ネ(?<;name>match)ネ/' => 'Group name must start with a non-digit word character {#} m/ネ(?<;{#}name>match)ネ/', '/ネ(?\ixネ/' => 'Sequence (?\...) not recognized {#} m/ネ(?\{#}ixネ/', '/ネ(?^lu:ネ)/' => 'Regexp modifiers "l" and "u" are mutually exclusive {#} m/ネ(?^lu{#}:ネ)/', '/ネ(?lil:ネ)/' => 'Regexp modifier "l" may not appear twice {#} m/ネ(?lil{#}:ネ)/', @@ -547,6 +547,7 @@ my @death_utf8 = mark_as_utf8( '/[\cネ]/' => "Character following \"\\c\" must be printable ASCII {#} m/[\\cネ{#}]/", '/\b{ネ}/' => "'ネ' is an unknown bound type {#} m/\\b{ネ{#}}/", '/\B{ネ}/' => "'ネ' is an unknown bound type {#} m/\\B{ネ{#}}/", + '/ネ(?<‿name>match)ネ/; #no latin1' => 'Group name must start with a non-digit word character {#} m/ネ(?<‿{#}name>match)ネ/', ); push @death, @death_utf8;