Skip to content

Commit 8c5e6c3

Browse files
committed
regcomp.c: validate \g{...} contents past first number
This code used to silently accept \g{123trailing garbage} as long as it started with a valid number. Fixes #23050.
1 parent a21edf4 commit 8c5e6c3

File tree

3 files changed

+46
-14
lines changed

3 files changed

+46
-14
lines changed

pod/perldelta.pod

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,13 @@ manager will later use a regex to expand these into links.
379379

380380
=item *
381381

382+
In regexes, the contents of C<\g{...}> backreferences are now properly
383+
validated. Previously, C<\g{1 FOO}> was silently parsed as C<\g{1}>, ignoring
384+
everything after the first number.
385+
[GH #23050]
386+
387+
=item *
388+
382389
XXX
383390

384391
=back

regcomp.c

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5511,12 +5511,19 @@ S_compute_EXACTish(RExC_state_t *pRExC_state)
55115511
* in which case return I32_MAX (rather than possibly 32-bit wrapping) */
55125512

55135513
static I32
5514-
S_backref_value(char *p, char *e)
5514+
S_backref_value(const char *p, const char *e, char **pe)
55155515
{
5516-
const char* endptr = e;
5516+
const char *endptr = e;
55175517
UV val;
5518-
if (grok_atoUV(p, &val, &endptr) && val <= I32_MAX)
5518+
if (grok_atoUV(p, &val, &endptr) && val <= I32_MAX) {
5519+
if (pe) {
5520+
*pe = (char *)endptr;
5521+
}
55195522
return (I32)val;
5523+
}
5524+
if (pe) {
5525+
*pe = NULL;
5526+
}
55205527
return I32_MAX;
55215528
}
55225529

@@ -6021,7 +6028,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
60216028
char * e = RExC_end;
60226029

60236030
if (*s == 'g') {
6024-
bool isrel = 0;
6031+
bool isrel = FALSE;
60256032

60266033
s++;
60276034
if (*s == '{') {
@@ -6067,7 +6074,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
60676074
* surrounding braces */
60686075

60696076
if (*s == '-') {
6070-
isrel = 1;
6077+
isrel = TRUE;
60716078
s++;
60726079
}
60736080

@@ -6076,7 +6083,20 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
60766083
}
60776084

60786085
RExC_parse_set(s);
6079-
num = S_backref_value(RExC_parse, RExC_end);
6086+
num = S_backref_value(RExC_parse, RExC_end, &s);
6087+
6088+
if (endbrace && s) {
6089+
while (isBLANK(*s)) {
6090+
++s;
6091+
}
6092+
assert(s <= endbrace);
6093+
if (s != endbrace) {
6094+
RExC_parse_set(s);
6095+
vFAIL2("Sequence \\%s... not terminated", "g{");
6096+
}
6097+
++s;
6098+
}
6099+
60806100
if (num == 0)
60816101
vFAIL("Reference to invalid group 0");
60826102
else if (num == I32_MAX) {
@@ -6085,6 +6105,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
60856105
else
60866106
vFAIL("Unterminated \\g... pattern");
60876107
}
6108+
assert(s != NULL);
60886109

60896110
if (isrel) {
60906111
num = RExC_npar - num;
@@ -6108,7 +6129,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
61086129
}
61096130
}
61106131
else {
6111-
num = S_backref_value(RExC_parse, RExC_end);
6132+
num = S_backref_value(RExC_parse, RExC_end, &s);
61126133
/* bare \NNN might be backref or octal - if it is larger
61136134
* than or equal RExC_npar then it is assumed to be an
61146135
* octal escape. Note RExC_npar is +1 from the actual
@@ -6131,6 +6152,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
61316152
RExC_parse_set(atom_parse_start);
61326153
goto defchar;
61336154
}
6155+
6156+
if (!s) {
6157+
for (s = RExC_parse; isDIGIT(*s); ++s)
6158+
;
6159+
}
6160+
61346161
if (num < RExC_logical_npar) {
61356162
num = RExC_logical_to_parno[num];
61366163
}
@@ -6154,12 +6181,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
61546181
*
61556182
* We've already figured out what value the digits represent.
61566183
* Now, move the parse to beyond them. */
6157-
if (endbrace) {
6158-
RExC_parse_set(endbrace + 1);
6159-
}
6160-
else while (isDIGIT(*RExC_parse)) {
6161-
RExC_parse_inc_by(1);
6162-
}
6184+
assert(s != NULL);
6185+
RExC_parse_set(s);
61636186
if (num < 0)
61646187
vFAIL("Reference to nonexistent group");
61656188

@@ -6577,7 +6600,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
65776600
/* NOTE, RExC_npar is 1 more than the actual number of
65786601
* parens we have seen so far, hence the "<" as opposed
65796602
* to "<=" */
6580-
if ( !isDIGIT(p[1]) || S_backref_value(p, RExC_end) < RExC_npar)
6603+
if ( !isDIGIT(p[1]) || S_backref_value(p, RExC_end, NULL) < RExC_npar)
65816604
{ /* Not to be treated as an octal constant, go
65826605
find backref */
65836606
p = oldp;

t/re/reg_mesg.t

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,8 @@ my @death =
220220
'/\g/' => 'Unterminated \g... pattern {#} m/\g{#}/',
221221
'/\g{1/' => 'Unterminated \g{...} pattern {#} m/\g{1{#}/',
222222
'/\g{-abc}/' => 'Group name must start with a non-digit word character {#} m/\g{-{#}abc}/',
223+
'/\g{1-1}/' => 'Sequence \g{... not terminated {#} m/\g{1{#}-1}/',
224+
'/\g{ -1 foo }/' => 'Sequence \g{... not terminated {#} m/\g{ -1 {#}foo }/',
223225
'/(?<;x/' => 'Group name must start with a non-digit word character {#} m/(?<;{#}x/',
224226

225227
'my $m = "\\\"; $m =~ $m', => 'Trailing \ in regex m/\/',

0 commit comments

Comments
 (0)