Skip to content

Commit 47f1cae

Browse files
committed
Merge branch 'PHP-8.4'
* PHP-8.4: Fix phpGH-17122: memory leak in regex
2 parents f5aa69a + dc27acd commit 47f1cae

File tree

2 files changed

+73
-33
lines changed

2 files changed

+73
-33
lines changed

ext/pcre/php_pcre.c

Lines changed: 43 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,14 @@ char *php_pcre_version;
4949

5050
struct _pcre_cache_entry {
5151
pcre2_code *re;
52-
/* Pointer is not NULL when there are named captures.
53-
* Length is equal to capture_count + 1 to account for capture group 0. */
52+
/* Pointer is not NULL (during request) when there are named captures.
53+
* Length is equal to capture_count + 1 to account for capture group 0.
54+
* This table cache is only valid during request.
55+
* Trying to store this over multiple requests causes issues when the keys are exposed in user arrays
56+
* (see GH-17122 and GH-17132). */
5457
zend_string **subpats_table;
5558
uint32_t preg_options;
59+
uint32_t name_count;
5660
uint32_t capture_count;
5761
uint32_t compile_options;
5862
uint32_t refcount;
@@ -478,6 +482,14 @@ static PHP_RINIT_FUNCTION(pcre)
478482

479483
static PHP_RSHUTDOWN_FUNCTION(pcre)
480484
{
485+
pcre_cache_entry *pce;
486+
ZEND_HASH_MAP_FOREACH_PTR(&PCRE_G(pcre_cache), pce) {
487+
if (pce->subpats_table) {
488+
free_subpats_table(pce->subpats_table, pce->capture_count + 1);
489+
pce->subpats_table = NULL;
490+
}
491+
} ZEND_HASH_FOREACH_END();
492+
481493
pcre2_general_context_free(PCRE_G(gctx_zmm));
482494
PCRE_G(gctx_zmm) = NULL;
483495

@@ -509,10 +521,10 @@ static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats)
509521
uint32_t i;
510522
for (i = 0; i < num_subpats; i++) {
511523
if (subpat_names[i]) {
512-
zend_string_release_ex(subpat_names[i], true);
524+
zend_string_release_ex(subpat_names[i], false);
513525
}
514526
}
515-
pefree(subpat_names, true);
527+
efree(subpat_names);
516528
}
517529

518530
/* {{{ static make_subpats_table */
@@ -531,24 +543,25 @@ static zend_string **make_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce
531543
return NULL;
532544
}
533545

534-
subpat_names = pecalloc(num_subpats, sizeof(zend_string *), true);
546+
subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
535547
while (ni++ < name_cnt) {
536548
unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
537549
const char *name = name_table + 2;
538-
/* Note: this makes a persistent string when the cache is not request-based because the string
539-
* has to outlive the request. In that case, they will only be used within this thread
540-
* and never be shared.
541-
* Although we will be storing them in user-exposed arrays, they cannot cause problems
542-
* because they only live in this thread and the last reference is deleted on shutdown
543-
* instead of by user code. */
544-
subpat_names[name_idx] = zend_string_init(name, strlen(name), true);
545-
GC_MAKE_PERSISTENT_LOCAL(subpat_names[name_idx]);
550+
subpat_names[name_idx] = zend_string_init(name, strlen(name), false);
546551
name_table += name_size;
547552
}
548553
return subpat_names;
549554
}
550555
/* }}} */
551556

557+
static zend_string **ensure_subpats_table(uint32_t name_cnt, pcre_cache_entry *pce)
558+
{
559+
if (!pce->subpats_table) {
560+
pce->subpats_table = make_subpats_table(name_cnt, pce);
561+
}
562+
return pce->subpats_table;
563+
}
564+
552565
/* {{{ static calculate_unit_length */
553566
/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
554567
static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, const char *start)
@@ -820,6 +833,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
820833
new_entry.preg_options = poptions;
821834
new_entry.compile_options = coptions;
822835
new_entry.refcount = 0;
836+
new_entry.subpats_table = NULL;
823837

824838
rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
825839
if (rc < 0) {
@@ -831,8 +845,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
831845
return NULL;
832846
}
833847

834-
uint32_t name_count;
835-
rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &name_count);
848+
rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
836849
if (rc < 0) {
837850
if (key != regex) {
838851
zend_string_release_ex(key, 0);
@@ -842,21 +855,6 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
842855
return NULL;
843856
}
844857

845-
/* Compute and cache the subpattern table to avoid computing it again over and over. */
846-
if (name_count > 0) {
847-
new_entry.subpats_table = make_subpats_table(name_count, &new_entry);
848-
if (!new_entry.subpats_table) {
849-
if (key != regex) {
850-
zend_string_release_ex(key, false);
851-
}
852-
/* Warning already emitted by make_subpats_table() */
853-
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
854-
return NULL;
855-
}
856-
} else {
857-
new_entry.subpats_table = NULL;
858-
}
859-
860858
/*
861859
* Interned strings are not duplicated when stored in HashTable,
862860
* but all the interned strings created during HTTP request are removed
@@ -971,6 +969,8 @@ static zend_always_inline void populate_match_value(
971969

972970
static inline void add_named(
973971
HashTable *const subpats, zend_string *name, zval *val, bool unmatched) {
972+
ZEND_ASSERT(!(GC_FLAGS(name) & IS_STR_PERSISTENT));
973+
974974
/* If the DUPNAMES option is used, multiple subpatterns might have the same name.
975975
* In this case we want to preserve the one that actually has a value. */
976976
if (!unmatched) {
@@ -1232,8 +1232,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
12321232
* allocate the table only if there are any named subpatterns.
12331233
*/
12341234
subpat_names = NULL;
1235-
if (subpats) {
1236-
subpat_names = pce->subpats_table;
1235+
if (subpats && pce->name_count > 0) {
1236+
subpat_names = ensure_subpats_table(pce->name_count, pce);
1237+
if (UNEXPECTED(!subpat_names)) {
1238+
RETURN_FALSE;
1239+
}
12371240
}
12381241

12391242
matched = 0;
@@ -1869,7 +1872,14 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
18691872

18701873
/* Calculate the size of the offsets array, and allocate memory for it. */
18711874
num_subpats = pce->capture_count + 1;
1872-
subpat_names = pce->subpats_table;
1875+
if (pce->name_count > 0) {
1876+
subpat_names = ensure_subpats_table(pce->name_count, pce);
1877+
if (UNEXPECTED(!subpat_names)) {
1878+
return NULL;
1879+
}
1880+
} else {
1881+
subpat_names = NULL;
1882+
}
18731883

18741884
alloc_len = 0;
18751885
result = NULL;

ext/pcre/tests/gh17122.phpt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
--TEST--
2+
GH-17122 (memory leak in regex)
3+
--FILE--
4+
<?php
5+
preg_match('|(?P<name>)(\d+)|', 0xffffffff, $m1);
6+
var_dump($m1);
7+
\preg_match('|(?P<name2>)(\d+)|', 0, $m2);
8+
var_dump($m2);
9+
?>
10+
--EXPECT--
11+
array(4) {
12+
[0]=>
13+
string(10) "4294967295"
14+
["name"]=>
15+
string(0) ""
16+
[1]=>
17+
string(0) ""
18+
[2]=>
19+
string(10) "4294967295"
20+
}
21+
array(4) {
22+
[0]=>
23+
string(1) "0"
24+
["name2"]=>
25+
string(0) ""
26+
[1]=>
27+
string(0) ""
28+
[2]=>
29+
string(1) "0"
30+
}

0 commit comments

Comments
 (0)