@@ -49,9 +49,11 @@ char *php_pcre_version;
4949
5050struct _pcre_cache_entry {
5151 pcre2_code * re ;
52+ /* Pointer is not NULL when there are named captures.
53+ * Length is equal to capture_count + 1 to account for capture group 0. */
54+ zend_string * * subpats_table ;
5255 uint32_t preg_options ;
5356 uint32_t capture_count ;
54- uint32_t name_count ;
5557 uint32_t compile_options ;
5658 uint32_t refcount ;
5759};
@@ -90,6 +92,8 @@ static MUTEX_T pcre_mt = NULL;
9092
9193ZEND_TLS HashTable char_tables ;
9294
95+ static void free_subpats_table (zend_string * * subpat_names , uint32_t num_subpats , bool persistent );
96+
9397static void php_pcre_free_char_table (zval * data )
9498{/*{{{*/
9599 void * ptr = Z_PTR_P (data );
@@ -163,6 +167,9 @@ static void php_free_pcre_cache(zval *data) /* {{{ */
163167{
164168 pcre_cache_entry * pce = (pcre_cache_entry * ) Z_PTR_P (data );
165169 if (!pce ) return ;
170+ if (pce -> subpats_table ) {
171+ free_subpats_table (pce -> subpats_table , pce -> capture_count + 1 , true);
172+ }
166173 pcre2_code_free (pce -> re );
167174 free (pce );
168175}
@@ -172,6 +179,9 @@ static void php_efree_pcre_cache(zval *data) /* {{{ */
172179{
173180 pcre_cache_entry * pce = (pcre_cache_entry * ) Z_PTR_P (data );
174181 if (!pce ) return ;
182+ if (pce -> subpats_table ) {
183+ free_subpats_table (pce -> subpats_table , pce -> capture_count + 1 , false);
184+ }
175185 pcre2_code_free (pce -> re );
176186 efree (pce );
177187}
@@ -520,20 +530,21 @@ static int pcre_clean_cache(zval *data, void *arg)
520530}
521531/* }}} */
522532
523- static void free_subpats_table (zend_string * * subpat_names , uint32_t num_subpats ) {
533+ static void free_subpats_table (zend_string * * subpat_names , uint32_t num_subpats , bool persistent ) {
524534 uint32_t i ;
525535 for (i = 0 ; i < num_subpats ; i ++ ) {
526536 if (subpat_names [i ]) {
527- zend_string_release_ex (subpat_names [i ], false );
537+ zend_string_release_ex (subpat_names [i ], persistent );
528538 }
529539 }
530- efree (subpat_names );
540+ pefree (subpat_names , persistent );
531541}
532542
533543/* {{{ static make_subpats_table */
534- static zend_string * * make_subpats_table (uint32_t num_subpats , pcre_cache_entry * pce )
544+ static zend_string * * make_subpats_table (uint32_t name_cnt , pcre_cache_entry * pce , bool persistent )
535545{
536- uint32_t name_cnt = pce -> name_count , name_size , ni = 0 ;
546+ uint32_t num_subpats = pce -> capture_count + 1 ;
547+ uint32_t name_size , ni = 0 ;
537548 char * name_table ;
538549 zend_string * * subpat_names ;
539550 int rc1 , rc2 ;
@@ -545,11 +556,20 @@ static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *
545556 return NULL ;
546557 }
547558
548- subpat_names = ecalloc (num_subpats , sizeof (zend_string * ));
559+ subpat_names = pecalloc (num_subpats , sizeof (zend_string * ), persistent );
549560 while (ni ++ < name_cnt ) {
550561 unsigned short name_idx = 0x100 * (unsigned char )name_table [0 ] + (unsigned char )name_table [1 ];
551562 const char * name = name_table + 2 ;
552- subpat_names [name_idx ] = zend_string_init (name , strlen (name ), 0 );
563+ /* Note: this makes a persistent string when the cache is not request-based because the string
564+ * has to outlive the request. In that case, they will only be used within this thread
565+ * and never be shared.
566+ * Although we will be storing them in user-exposed arrays, they cannot cause problems
567+ * because they only live in this thread and the last reference is deleted on shutdown
568+ * instead of by user code. */
569+ subpat_names [name_idx ] = zend_string_init (name , strlen (name ), persistent );
570+ if (persistent ) {
571+ GC_MAKE_PERSISTENT_LOCAL (subpat_names [name_idx ]);
572+ }
553573 name_table += name_size ;
554574 }
555575 return subpat_names ;
@@ -838,7 +858,8 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
838858 return NULL ;
839859 }
840860
841- rc = pcre2_pattern_info (re , PCRE2_INFO_NAMECOUNT , & new_entry .name_count );
861+ uint32_t name_count ;
862+ rc = pcre2_pattern_info (re , PCRE2_INFO_NAMECOUNT , & name_count );
842863 if (rc < 0 ) {
843864 if (key != regex ) {
844865 zend_string_release_ex (key , 0 );
@@ -848,6 +869,21 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
848869 return NULL ;
849870 }
850871
872+ /* Compute and cache the subpattern table to avoid computing it again over and over. */
873+ if (name_count > 0 ) {
874+ new_entry .subpats_table = make_subpats_table (name_count , & new_entry , !PCRE_G (per_request_cache ));
875+ if (!new_entry .subpats_table ) {
876+ if (key != regex ) {
877+ zend_string_release_ex (key , false);
878+ }
879+ /* Warning already emitted by make_subpats_table() */
880+ pcre_handle_exec_error (PCRE2_ERROR_INTERNAL );
881+ return NULL ;
882+ }
883+ } else {
884+ new_entry .subpats_table = NULL ;
885+ }
886+
851887 /*
852888 * Interned strings are not duplicated when stored in HashTable,
853889 * but all the interned strings created during HTTP request are removed
@@ -1204,11 +1240,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
12041240 * allocate the table only if there are any named subpatterns.
12051241 */
12061242 subpat_names = NULL ;
1207- if (subpats && pce -> name_count > 0 ) {
1208- subpat_names = make_subpats_table (num_subpats , pce );
1209- if (!subpat_names ) {
1210- RETURN_FALSE ;
1211- }
1243+ if (subpats ) {
1244+ subpat_names = pce -> subpats_table ;
12121245 }
12131246
12141247 matched = 0 ;
@@ -1220,9 +1253,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
12201253 match_data = pcre2_match_data_create_from_pattern (pce -> re , PCRE_G (gctx_zmm ));
12211254 if (!match_data ) {
12221255 PCRE_G (error_code ) = PHP_PCRE_INTERNAL_ERROR ;
1223- if (subpat_names ) {
1224- free_subpats_table (subpat_names , num_subpats );
1225- }
12261256 RETURN_FALSE ;
12271257 }
12281258 }
@@ -1269,9 +1299,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
12691299 if (subpats != NULL ) {
12701300 /* Try to get the list of substrings and display a warning if failed. */
12711301 if (UNEXPECTED (offsets [1 ] < offsets [0 ])) {
1272- if (subpat_names ) {
1273- free_subpats_table (subpat_names , num_subpats );
1274- }
12751302 if (match_sets ) efree (match_sets );
12761303 php_error_docref (NULL , E_WARNING , "Get subpatterns list failed" );
12771304 RETURN_FALSE ;
@@ -1435,10 +1462,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
14351462 }
14361463 }
14371464
1438- if (subpat_names ) {
1439- free_subpats_table (subpat_names , num_subpats );
1440- }
1441-
14421465 if (PCRE_G (error_code ) == PHP_PCRE_NO_ERROR ) {
14431466 /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
14441467 if ((pce -> compile_options & PCRE2_UTF )
@@ -1852,18 +1875,7 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
18521875
18531876 /* Calculate the size of the offsets array, and allocate memory for it. */
18541877 num_subpats = pce -> capture_count + 1 ;
1855-
1856- /*
1857- * Build a mapping from subpattern numbers to their names. We will
1858- * allocate the table only if there are any named subpatterns.
1859- */
1860- subpat_names = NULL ;
1861- if (UNEXPECTED (pce -> name_count > 0 )) {
1862- subpat_names = make_subpats_table (num_subpats , pce );
1863- if (!subpat_names ) {
1864- return NULL ;
1865- }
1866- }
1878+ subpat_names = pce -> subpats_table ;
18671879
18681880 alloc_len = 0 ;
18691881 result = NULL ;
@@ -1883,9 +1895,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
18831895 match_data = pcre2_match_data_create_from_pattern (pce -> re , PCRE_G (gctx_zmm ));
18841896 if (!match_data ) {
18851897 PCRE_G (error_code ) = PHP_PCRE_INTERNAL_ERROR ;
1886- if (subpat_names ) {
1887- free_subpats_table (subpat_names , num_subpats );
1888- }
18891898 mdata_used = old_mdata_used ;
18901899 return NULL ;
18911900 }
@@ -2036,10 +2045,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
20362045 }
20372046 mdata_used = old_mdata_used ;
20382047
2039- if (UNEXPECTED (subpat_names )) {
2040- free_subpats_table (subpat_names , num_subpats );
2041- }
2042-
20432048 return result ;
20442049}
20452050/* }}} */
0 commit comments