Skip to content

Commit 642e111

Browse files
authored
Minor pcre optimizations (#12923)
* Update signature of pcre API This changes the variables that are bools to actually be bools instead of ints, which allows some additional optimization by the compiler (e.g. removing some ternaries and move extensions). It also gets rid of the use_flags argument because that's just the same as flags == 0. This reduces the call frame. * Use zend_string_release_ex where possible * Remove duplicate symbols from strchr * Avoid useless value conversions * Use a raw HashTable* instead of a zval * Move condition * Make for loop cheaper by reusing a recently used value as start iteration index * Remove useless condition This can't be true if the second condition is true because it would require the string to occupy the entire address space. * Upgrading + remark
1 parent 185627f commit 642e111

File tree

6 files changed

+43
-42
lines changed

6 files changed

+43
-42
lines changed

UPGRADING.INTERNALS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ PHP 8.4 INTERNALS UPGRADE NOTES
5959
e. ext/date
6060
- Added the php_format_date_ex() API to format instances of php_date_obj.
6161

62+
d. ext/pcre
63+
- php_pcre_match_impl() now no longer has a use_flags argument.
64+
When flags should be ignored, pass 0 to the flags argument.
65+
- php_pcre_match_impl() and pcre_get_compiled_regex_cache_ex() now use
66+
proper boolean argument types instead of integer types.
67+
6268
========================
6369
4. OpCode changes
6470
========================

ext/fileinfo/libmagic/softmagic.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2266,7 +2266,7 @@ magiccheck(struct magic_set *ms, struct magic *m)
22662266
haystack = zend_string_init(ms->search.s, ms->search.s_len, 0);
22672267

22682268
/* match v = 0, no match v = 1 */
2269-
php_pcre_match_impl(pce, haystack, &retval, &subpats, 0, 1, PREG_OFFSET_CAPTURE, 0);
2269+
php_pcre_match_impl(pce, haystack, &retval, &subpats, 0, PREG_OFFSET_CAPTURE, 0);
22702270
/* Free haystack */
22712271
zend_string_release(haystack);
22722272

ext/imap/php_imap.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,7 @@ PHP_FUNCTION(imap_append)
806806

807807
zend_string_release(regex);
808808
php_pcre_match_impl(pce, internal_date, return_value, subpats, global,
809-
0, Z_L(0), Z_L(0));
809+
Z_L(0), Z_L(0));
810810

811811
if (!Z_LVAL_P(return_value)) {
812812
// TODO Promoto to error?

ext/pcre/php_pcre.c

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats)
527527
uint32_t i;
528528
for (i = 0; i < num_subpats; i++) {
529529
if (subpat_names[i]) {
530-
zend_string_release(subpat_names[i]);
530+
zend_string_release_ex(subpat_names[i], false);
531531
}
532532
}
533533
efree(subpat_names);
@@ -584,7 +584,7 @@ static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, co
584584
/* }}} */
585585

586586
/* {{{ pcre_get_compiled_regex_cache */
587-
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
587+
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bool locale_aware)
588588
{
589589
pcre2_code *re = NULL;
590590
#if 10 == PCRE2_MAJOR && 37 == PCRE2_MINOR && !HAVE_BUNDLED_PCRE
@@ -655,7 +655,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
655655
}
656656

657657
start_delimiter = delimiter;
658-
if ((pp = strchr("([{< )]}> )]}>", delimiter)))
658+
if ((pp = strchr("([{< )]}>", delimiter)))
659659
delimiter = pp[5];
660660
end_delimiter = delimiter;
661661

@@ -879,7 +879,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
879879
/* {{{ pcre_get_compiled_regex_cache */
880880
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
881881
{
882-
return pcre_get_compiled_regex_cache_ex(regex, 1);
882+
return pcre_get_compiled_regex_cache_ex(regex, true);
883883
}
884884
/* }}} */
885885

@@ -979,7 +979,7 @@ static inline void add_named(
979979
/* {{{ add_offset_pair */
980980
static inline void add_offset_pair(
981981
HashTable *const result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
982-
zend_string *name, uint32_t unmatched_as_null)
982+
zend_string *name, zend_long unmatched_as_null)
983983
{
984984
zval match_pair;
985985

@@ -1013,8 +1013,8 @@ static inline void add_offset_pair(
10131013
static void populate_subpat_array(
10141014
zval *subpats, const char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
10151015
uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1016-
bool offset_capture = (flags & PREG_OFFSET_CAPTURE) != 0;
1017-
bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0;
1016+
zend_long offset_capture = flags & PREG_OFFSET_CAPTURE;
1017+
zend_long unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
10181018
zval val;
10191019
int i;
10201020
HashTable *subpats_ht = Z_ARRVAL_P(subpats);
@@ -1079,7 +1079,7 @@ static void populate_subpat_array(
10791079
}
10801080
}
10811081

1082-
static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
1082+
static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, bool global) /* {{{ */
10831083
{
10841084
/* parameters */
10851085
zend_string *regex; /* Regular expression */
@@ -1105,7 +1105,7 @@ static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ *
11051105

11061106
pce->refcount++;
11071107
php_pcre_match_impl(pce, subject, return_value, subpats,
1108-
global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
1108+
global, flags, start_offset);
11091109
pce->refcount--;
11101110
}
11111111
/* }}} */
@@ -1128,7 +1128,7 @@ static zend_always_inline bool is_known_valid_utf8(
11281128

11291129
/* {{{ php_pcre_match_impl() */
11301130
PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1131-
zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
1131+
zval *subpats, bool global, zend_long flags, zend_off_t start_offset)
11321132
{
11331133
zval result_set; /* Holds a set of subpatterns after
11341134
a global match */
@@ -1142,17 +1142,15 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
11421142
size_t i;
11431143
uint32_t subpats_order; /* Order of subpattern matches */
11441144
uint32_t offset_capture; /* Capture match offsets: yes/no */
1145-
uint32_t unmatched_as_null; /* Null non-matches: yes/no */
1145+
zend_long unmatched_as_null; /* Null non-matches: yes/no */
11461146
PCRE2_SPTR mark = NULL; /* Target for MARK name */
1147-
zval marks; /* Array of marks for PREG_PATTERN_ORDER */
1147+
HashTable *marks = NULL; /* Array of marks for PREG_PATTERN_ORDER */
11481148
pcre2_match_data *match_data;
11491149
PCRE2_SIZE start_offset2, orig_start_offset;
11501150

11511151
char *subject = ZSTR_VAL(subject_str);
11521152
size_t subject_len = ZSTR_LEN(subject_str);
11531153

1154-
ZVAL_UNDEF(&marks);
1155-
11561154
/* Overwrite the passed-in value for subpatterns with an empty array. */
11571155
if (subpats != NULL) {
11581156
subpats = zend_try_array_init(subpats);
@@ -1163,7 +1161,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
11631161

11641162
subpats_order = global ? PREG_PATTERN_ORDER : 0;
11651163

1166-
if (use_flags) {
1164+
if (flags) {
11671165
offset_capture = flags & PREG_OFFSET_CAPTURE;
11681166
unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
11691167

@@ -1173,11 +1171,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
11731171
*/
11741172
if (flags & 0xff) {
11751173
subpats_order = flags & 0xff;
1176-
}
1177-
if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1178-
(!global && subpats_order != 0)) {
1179-
zend_argument_value_error(4, "must be a PREG_* constant");
1180-
RETURN_THROWS();
1174+
if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1175+
(!global && subpats_order != 0)) {
1176+
zend_argument_value_error(4, "must be a PREG_* constant");
1177+
RETURN_THROWS();
1178+
}
11811179
}
11821180
} else {
11831181
offset_capture = 0;
@@ -1301,18 +1299,20 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
13011299
mark = pcre2_get_mark(match_data);
13021300
/* Add MARK, if available */
13031301
if (mark) {
1304-
if (Z_TYPE(marks) == IS_UNDEF) {
1305-
array_init(&marks);
1302+
if (!marks) {
1303+
marks = zend_new_array(0);
13061304
}
1307-
add_index_string(&marks, matched - 1, (char *) mark);
1305+
zval tmp;
1306+
ZVAL_STRING(&tmp, (char *) mark);
1307+
zend_hash_index_add_new(marks, matched - 1, &tmp);
13081308
}
13091309
/*
13101310
* If the number of captured subpatterns on this run is
13111311
* less than the total possible number, pad the result
13121312
* arrays with NULLs or empty strings.
13131313
*/
13141314
if (count < num_subpats) {
1315-
for (; i < num_subpats; i++) {
1315+
for (int i = count; i < num_subpats; i++) {
13161316
if (offset_capture) {
13171317
add_offset_pair(
13181318
match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
@@ -1394,7 +1394,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
13941394
/* Execute the regular expression. */
13951395
#ifdef HAVE_PCRE_JIT_SUPPORT
13961396
if ((pce->preg_options & PREG_JIT)) {
1397-
if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
1397+
if (start_offset2 > subject_len) {
13981398
pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
13991399
break;
14001400
}
@@ -1430,8 +1430,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
14301430
}
14311431
efree(match_sets);
14321432

1433-
if (Z_TYPE(marks) != IS_UNDEF) {
1434-
add_assoc_zval(subpats, "MARK", &marks);
1433+
if (marks) {
1434+
zval tmp;
1435+
ZVAL_ARR(&tmp, marks);
1436+
zend_hash_str_update(Z_ARRVAL_P(subpats), "MARK", sizeof("MARK") - 1, &tmp);
14351437
}
14361438
}
14371439

@@ -1456,14 +1458,14 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
14561458
/* {{{ Perform a Perl-style regular expression match */
14571459
PHP_FUNCTION(preg_match)
14581460
{
1459-
php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1461+
php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
14601462
}
14611463
/* }}} */
14621464

14631465
/* {{{ Perform a Perl-style global regular expression match */
14641466
PHP_FUNCTION(preg_match_all)
14651467
{
1466-
php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1468+
php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
14671469
}
14681470
/* }}} */
14691471

ext/pcre/php_pcre.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@ typedef enum {
4747
} php_pcre_error_code;
4848

4949
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex);
50-
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware);
50+
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bool locale_aware);
5151

5252
PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
53-
zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset);
53+
zval *subpats, bool global, zend_long flags, zend_off_t start_offset);
5454

5555
PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, const char *subject, size_t subject_len, zend_string *replace_str,
5656
size_t limit, size_t *replace_count);

ext/spl/spl_iterators.c

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,6 @@ typedef struct _spl_dual_it_object {
135135
pcre_cache_entry *pce;
136136
zend_string *regex;
137137
regex_mode mode;
138-
int use_flags;
139138
} regex;
140139
zend_fcall_info_cache callback_filter;
141140
} u;
@@ -1407,7 +1406,6 @@ static spl_dual_it_object* spl_dual_it_construct(INTERNAL_FUNCTION_PARAMETERS, z
14071406
zend_string *regex;
14081407
zend_long mode = REGIT_MODE_MATCH;
14091408

1410-
intern->u.regex.use_flags = ZEND_NUM_ARGS() >= 5;
14111409
intern->u.regex.flags = 0;
14121410
intern->u.regex.preg_flags = 0;
14131411
if (zend_parse_parameters(ZEND_NUM_ARGS(), "OS|lll", &zobject, ce_inner, &regex, &mode, &intern->u.regex.flags, &intern->u.regex.preg_flags) == FAILURE) {
@@ -1876,7 +1874,7 @@ PHP_METHOD(RegexIterator, accept)
18761874
zval_ptr_dtor(&intern->current.data);
18771875
ZVAL_UNDEF(&intern->current.data);
18781876
php_pcre_match_impl(intern->u.regex.pce, subject, &zcount,
1879-
&intern->current.data, intern->u.regex.mode == REGIT_MODE_ALL_MATCHES, intern->u.regex.use_flags, intern->u.regex.preg_flags, 0);
1877+
&intern->current.data, intern->u.regex.mode == REGIT_MODE_ALL_MATCHES, intern->u.regex.preg_flags, 0);
18801878
RETVAL_BOOL(Z_LVAL(zcount) > 0);
18811879
break;
18821880

@@ -2006,11 +2004,7 @@ PHP_METHOD(RegexIterator, getPregFlags)
20062004

20072005
SPL_FETCH_AND_CHECK_DUAL_IT(intern, ZEND_THIS);
20082006

2009-
if (intern->u.regex.use_flags) {
2010-
RETURN_LONG(intern->u.regex.preg_flags);
2011-
} else {
2012-
RETURN_LONG(0);
2013-
}
2007+
RETURN_LONG(intern->u.regex.preg_flags);
20142008
} /* }}} */
20152009

20162010
/* {{{ Set PREG flags */
@@ -2026,7 +2020,6 @@ PHP_METHOD(RegexIterator, setPregFlags)
20262020
SPL_FETCH_AND_CHECK_DUAL_IT(intern, ZEND_THIS);
20272021

20282022
intern->u.regex.preg_flags = preg_flags;
2029-
intern->u.regex.use_flags = 1;
20302023
} /* }}} */
20312024

20322025
/* {{{ Create an RecursiveRegexIterator from another recursive iterator and a regular expression */

0 commit comments

Comments
 (0)