Skip to content

Commit 40fe50d

Browse files
nikicsmalyshev
authored andcommitted
Validate pattern against mbregex encoding
Oniguruma does not consistently perform this validation itself (at least on older versions), so make sure we check pattern encoding validity on the PHP side.
1 parent 58c25bf commit 40fe50d

File tree

6 files changed

+31
-15
lines changed

6 files changed

+31
-15
lines changed

ext/mbstring/php_mbregex.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -451,13 +451,18 @@ static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patl
451451
OnigErrorInfo err_info;
452452
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
453453

454+
if (!php_mb_check_encoding(pattern, patlen, _php_mb_regex_mbctype2name(enc))) {
455+
php_error_docref(NULL, E_WARNING,
456+
"Pattern is not valid under %s encoding", _php_mb_regex_mbctype2name(enc));
457+
return NULL;
458+
}
459+
454460
rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen);
455461
if (!rc || onig_get_options(rc) != options || onig_get_encoding(rc) != enc || onig_get_syntax(rc) != syntax) {
456462
if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
457463
onig_error_code_to_str(err_str, err_code, &err_info);
458464
php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str);
459-
retval = NULL;
460-
goto out;
465+
return NULL;
461466
}
462467
if (rc == MBREX(search_re)) {
463468
/* reuse the new rc? see bug #72399 */
@@ -467,7 +472,6 @@ static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patl
467472
} else {
468473
retval = rc;
469474
}
470-
out:
471475
return retval;
472476
}
473477
/* }}} */

ext/mbstring/tests/bug72994.phpt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,7 @@ var_dump($var1);
1313
===DONE===
1414
--EXPECTF--
1515
Notice: Undefined variable: var in %s on line %d
16-
string(0) ""
16+
17+
Warning: mbereg_replace(): Pattern is not valid under UTF-8 encoding in %s on line %d
18+
bool(false)
1719
===DONE===

ext/mbstring/tests/bug77370.phpt

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ Bug #77370 (Buffer overflow on mb regex functions - fetch_token)
66
<?php
77
var_dump(mb_split(" \xfd",""));
88
?>
9-
--EXPECT--
10-
array(1) {
11-
[0]=>
12-
string(0) ""
13-
}
9+
--EXPECTF--
10+
Warning: mb_split(): Pattern is not valid under UTF-8 encoding in %s on line %d
11+
bool(false)

ext/mbstring/tests/bug77371.phpt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@ Bug #77371 (heap buffer overflow in mb regex functions - compile_string_node)
66
<?php
77
var_dump(mb_ereg("()0\xfc00000\xfc00000\xfc00000\xfc",""));
88
?>
9-
--EXPECT--
10-
bool(false)
9+
--EXPECTF--
10+
Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d
11+
bool(false)

ext/mbstring/tests/bug77381.phpt

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,15 @@ var_dump(mb_ereg("(?i)000000000000000000000\xf0",""));
99
var_dump(mb_ereg("0000\\"."\xf5","0"));
1010
var_dump(mb_ereg("(?i)FFF00000000000000000\xfd",""));
1111
?>
12-
--EXPECT--
13-
int(1)
12+
--EXPECTF--
13+
Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d
1414
bool(false)
15+
16+
Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d
1517
bool(false)
18+
19+
Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d
20+
bool(false)
21+
22+
Warning: mb_ereg(): Pattern is not valid under UTF-8 encoding in %s on line %d
1623
bool(false)

ext/mbstring/tests/mb_ereg_replace_variation1.phpt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,17 @@ string(10) "string_val"
109109
string(10) "string_val"
110110

111111
-- Iteration 4 --
112-
string(10) "string_val"
112+
113+
Warning: mb_ereg_replace(): Pattern is not valid under UTF-8 encoding in %s on line %d
114+
bool(false)
113115

114116
-- Iteration 5 --
115117
string(10) "string_val"
116118

117119
-- Iteration 6 --
118-
string(10) "string_val"
120+
121+
Warning: mb_ereg_replace(): Pattern is not valid under UTF-8 encoding in %s on line %d
122+
bool(false)
119123

120124
-- Iteration 7 --
121125
string(10) "string_val"

0 commit comments

Comments
 (0)