From 6cc07d11de2d1e91a782e71dd73a6d2643d63027 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Sat, 25 Oct 2025 16:49:13 -0600
Subject: [PATCH 1/3] reg_mesg.t: Only one error per test

This just fills out a couple of tests so that they don't prematurely
end.  That makes it clear that the eorror that does get shown isn't also
due to other mistakes in the test.
---
 t/re/reg_mesg.t | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t
index b8b4ec6a3d8e..a7662e9ce0b3 100644
--- a/t/re/reg_mesg.t
+++ b/t/re/reg_mesg.t
@@ -176,7 +176,7 @@ my @death =
  '/(?/' => 'Sequence (? incomplete {#} m/(?{#}/',
 
  '/(?;x/' => 'Sequence (?;...) not recognized {#} m/(?;{#}x/',
- '/(?<;x/' => 'Group name must start with a non-digit word character {#} m/(?<;{#}x/',
+ '/(?<;name>match)/' => 'Group name must start with a non-digit word character {#} m/(?<;{#}name>match)/',
  '/(?\ix/' => 'Sequence (?\...) not recognized {#} m/(?\{#}ix/',
  '/(?\mx/' => 'Sequence (?\...) not recognized {#} m/(?\{#}mx/',
  '/(?\:x/' => 'Sequence (?\...) not recognized {#} m/(?\{#}:x/',
@@ -222,7 +222,7 @@ my @death =
  '/\g{-abc}/' => 'Group name must start with a non-digit word character {#} m/\g{-{#}abc}/',
  '/\g{1-1}/'  => 'Sequence \g{... not terminated {#} m/\g{1{#}-1}/',
  '/\g{ -1 foo }/'  => 'Sequence \g{... not terminated {#} m/\g{ -1 {#}foo }/',
- '/(?<;x/' => 'Group name must start with a non-digit word character {#} m/(?<;{#}x/',
+ '/(?<;name>match)/' => 'Group name must start with a non-digit word character {#} m/(?<;{#}name>match)/',
 
  'my $m = "\\\"; $m =~ $m', => 'Trailing \ in regex m/\/',
 
@@ -310,7 +310,7 @@ my @death =
  'm/(?&a/'  => 'Sequence (?&... not terminated {#} m/(?&a{#}/',
  'm/(?P=/' => 'Sequence ?P=... not terminated {#} m/(?P={#}/',
  "m/(?'/"  => "Sequence (?'... not terminated {#} m/(?'{#}/",
- "m/(?</"  => "Sequence (?<... not terminated {#} m/(?<{#}/",
+ "m/(?<name)/"  => "Sequence (?<... not terminated {#} m/(?<name{#})/",
  'm/(?&/'  => 'Sequence (?&... not terminated {#} m/(?&{#}/',
  'm/(?(</' => 'Sequence (?(<... not terminated {#} m/(?(<{#}/',
  "m/(?('/" => "Sequence (?('... not terminated {#} m/(?('{#}/",
@@ -485,7 +485,7 @@ my @death_utf8 = mark_as_utf8(
  '/ネ(?/' => 'Sequence (? incomplete {#} m/ネ(?{#}/',
 
  '/ネ(?;ネ/' => 'Sequence (?;...) not recognized {#} m/ネ(?;{#}ネ/',
- '/ネ(?<;ネ/' => 'Group name must start with a non-digit word character {#} m/ネ(?<;{#}ネ/',
+ '/ネ(?<;name>match)ネ/' => 'Group name must start with a non-digit word character {#} m/ネ(?<;{#}name>match)ネ/',
  '/ネ(?\ixネ/' => 'Sequence (?\...) not recognized {#} m/ネ(?\{#}ixネ/',
  '/ネ(?^lu:ネ)/' => 'Regexp modifiers "l" and "u" are mutually exclusive {#} m/ネ(?^lu{#}:ネ)/',
 '/ネ(?lil:ネ)/' => 'Regexp modifier "l" may not appear twice {#} m/ネ(?lil{#}:ネ)/',

From ba0080604745130e108aa0cfdcb041950dbe3583 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Sat, 25 Oct 2025 09:21:55 -0600
Subject: [PATCH 2/3] perldiag: Update description for regex group names

This was written before Unicode, and its wording does not accurately
extend beyond ASCII.

This commit clarifies the description.
---
 pod/perldiag.pod | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index e6250bd970ee..1c9eec84a13a 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -2866,9 +2866,19 @@ has since been undefined.
 =item Group name must start with a non-digit word character in regex; marked by 
 S<<-- HERE> in m/%s/
 
-(F) Group names must follow the rules for perl identifiers, meaning
-they must start with a non-digit word character.  A common cause of
-this error is using (?&0) instead of (?0).  See L<perlre>.
+(F) Group names must follow the rules for Perl identifiers, meaning
+they must start with a character that matches C<\p{XID_Start}> plus the
+underscore.  This means the first character may not be a digit.
+Subsequent characters must match C<\p{XID_Continue}>.
+
+A common cause of this error is using (?&0) instead of (?0).
+
+This message was formulated before Perl supported Unicode; so it is
+not accurate for Unicode characters outside the ASCII-range.  There are
+many word characters in Unicode that may not start a group name, and a
+few that may not be a continuation character.
+
+See L<perlre>.
 
 =item ()-group starts with a count
 

From bdade0c2738adbbeb5aafaa4bf47083168afe079 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Sat, 25 Oct 2025 17:33:05 -0600
Subject: [PATCH 3/3] regcomp.c: Need to account for UTF group name

I found this by reading the code.  Prior to this commit, the parse
pointer was advanced by one byte; it should be advanced by one
character.  As long as the the character was ASCII, things worked.

I looked through the regcomp.c source for other mis-use of the macro
changed by this commit; none were obvious.
---
 regcomp.c       | 4 ++--
 t/re/reg_mesg.t | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index 774311f12dc5..c6a2e6ee0e14 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2533,8 +2533,8 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
                  && (advance = isWORDCHAR_utf8_safe( (U8 *) RExC_parse,
                                                      (U8 *) RExC_end)));
     } else {
-        RExC_parse_inc_by(1); /* so the <- from the vFAIL is after the offending
-                         character */
+        /* so the <- from the vFAIL is after the offending character */
+        RExC_parse_inc_safe();
         vFAIL("Group name must start with a non-digit word character");
     }
     sv_name = newSVpvn_flags(name_start, (int)(RExC_parse - name_start),
diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t
index a7662e9ce0b3..676843fb1624 100644
--- a/t/re/reg_mesg.t
+++ b/t/re/reg_mesg.t
@@ -547,6 +547,7 @@ my @death_utf8 = mark_as_utf8(
  '/[\cネ]/' => "Character following \"\\c\" must be printable ASCII {#} m/[\\cネ{#}]/",
  '/\b{ネ}/' => "'ネ' is an unknown bound type {#} m/\\b{ネ{#}}/",
  '/\B{ネ}/' => "'ネ' is an unknown bound type {#} m/\\B{ネ{#}}/",
+ '/ネ(?<‿name>match)ネ/; #no latin1' => 'Group name must start with a non-digit word character {#} m/ネ(?<‿{#}name>match)ネ/',
 );
 push @death, @death_utf8;