Skip to content

Commit fa3b8c7

Browse files
committed
Promote unknown encoding throws in encoding array/string list
For the string list we emit still emit a warning by comparing arg_num to 0 Closes GH-5337
1 parent 75b01c7 commit fa3b8c7

File tree

7 files changed

+132
-70
lines changed

7 files changed

+132
-70
lines changed

ext/mbstring/mbstring.c

Lines changed: 37 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -351,9 +351,10 @@ static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name, uint
351351

352352
/* {{{ static int php_mb_parse_encoding_list()
353353
* Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
354+
* Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
354355
*/
355-
static int
356-
php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
356+
static int php_mb_parse_encoding_list(const char *value, size_t value_length,
357+
const mbfl_encoding ***return_list, size_t *return_size, int persistent, uint32_t arg_num)
357358
{
358359
if (value == NULL || value_length == 0) {
359360
*return_list = NULL;
@@ -416,15 +417,20 @@ php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_en
416417
}
417418
} else {
418419
const mbfl_encoding *encoding = mbfl_name2encoding(p1);
419-
if (encoding) {
420-
*entry++ = encoding;
421-
n++;
422-
} else {
423-
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", p1);
420+
if (!encoding) {
421+
/* Called from an INI setting modification */
422+
if (arg_num == 0) {
423+
php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%s\"", p1);
424+
} else {
425+
zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", p1);
426+
}
424427
efree(tmpstr);
425428
pefree(list, persistent);
426429
return FAILURE;
427430
}
431+
432+
*entry++ = encoding;
433+
n++;
428434
}
429435
p1 = p2 + 1;
430436
} while (n < size && p2 != NULL);
@@ -439,9 +445,10 @@ php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_en
439445

440446
/* {{{ static int php_mb_parse_encoding_array()
441447
* Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
448+
* Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
442449
*/
443-
static int
444-
php_mb_parse_encoding_array(HashTable *target_hash, const mbfl_encoding ***return_list, size_t *return_size)
450+
static int php_mb_parse_encoding_array(HashTable *target_hash, const mbfl_encoding ***return_list,
451+
size_t *return_size, uint32_t arg_num)
445452
{
446453
/* Allocate enough space to include the default detect order if "auto" is used. */
447454
size_t size = zend_hash_num_elements(target_hash) + MBSTRG(default_detect_order_list_size);
@@ -475,8 +482,7 @@ php_mb_parse_encoding_array(HashTable *target_hash, const mbfl_encoding ***retur
475482
*entry++ = encoding;
476483
n++;
477484
} else {
478-
php_error_docref(NULL, E_WARNING,
479-
"Unknown encoding \"%s\"", ZSTR_VAL(encoding_str));
485+
zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", ZSTR_VAL(encoding_str));
480486
zend_string_release(encoding_str);
481487
efree(list);
482488
return FAILURE;
@@ -576,7 +582,7 @@ static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_leng
576582

577583
static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent)
578584
{
579-
return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent);
585+
return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent, 0);
580586
}
581587

582588
static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
@@ -869,7 +875,7 @@ static PHP_INI_MH(OnUpdate_mbstring_detect_order)
869875
return SUCCESS;
870876
}
871877

872-
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1) || size == 0) {
878+
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1, 0) || size == 0) {
873879
return FAILURE;
874880
}
875881

@@ -885,7 +891,7 @@ static PHP_INI_MH(OnUpdate_mbstring_detect_order)
885891
static int _php_mb_ini_mbstring_http_input_set(const char *new_value, size_t new_value_length) {
886892
const mbfl_encoding **list;
887893
size_t size;
888-
if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1) || size == 0) {
894+
if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1, 0) || size == 0) {
889895
return FAILURE;
890896
}
891897
if (MBSTRG(http_input_list)) {
@@ -1551,12 +1557,12 @@ PHP_FUNCTION(mb_detect_order)
15511557
const mbfl_encoding **list;
15521558
size_t size;
15531559
if (order_ht) {
1554-
if (FAILURE == php_mb_parse_encoding_array(order_ht, &list, &size)) {
1555-
RETURN_FALSE;
1560+
if (FAILURE == php_mb_parse_encoding_array(order_ht, &list, &size, 1)) {
1561+
RETURN_THROWS();
15561562
}
15571563
} else {
1558-
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(order_str), ZSTR_LEN(order_str), &list, &size, 0)) {
1559-
RETURN_FALSE;
1564+
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(order_str), ZSTR_LEN(order_str), &list, &size, 0, 1)) {
1565+
RETURN_THROWS();
15601566
}
15611567
}
15621568

@@ -2699,13 +2705,14 @@ PHP_FUNCTION(mb_convert_encoding)
26992705
}
27002706

27012707
if (from_encodings_ht) {
2702-
if (php_mb_parse_encoding_array(from_encodings_ht, &from_encodings, &num_from_encodings) == FAILURE) {
2703-
RETURN_FALSE;
2708+
if (php_mb_parse_encoding_array(from_encodings_ht, &from_encodings, &num_from_encodings, 3) == FAILURE) {
2709+
RETURN_THROWS();
27042710
}
27052711
free_from_encodings = 1;
27062712
} else if (from_encodings_str) {
2707-
if (php_mb_parse_encoding_list(ZSTR_VAL(from_encodings_str), ZSTR_LEN(from_encodings_str), &from_encodings, &num_from_encodings, 0) == FAILURE) {
2708-
RETURN_FALSE;
2713+
if (php_mb_parse_encoding_list(ZSTR_VAL(from_encodings_str), ZSTR_LEN(from_encodings_str),
2714+
&from_encodings, &num_from_encodings, 0, 3) == FAILURE) {
2715+
RETURN_THROWS();
27092716
}
27102717
free_from_encodings = 1;
27112718
} else {
@@ -2885,13 +2892,13 @@ PHP_FUNCTION(mb_detect_encoding)
28852892

28862893
/* make encoding list */
28872894
if (encoding_ht) {
2888-
if (FAILURE == php_mb_parse_encoding_array(encoding_ht, &elist, &size)) {
2889-
RETURN_FALSE;
2895+
if (FAILURE == php_mb_parse_encoding_array(encoding_ht, &elist, &size, 2)) {
2896+
RETURN_THROWS();
28902897
}
28912898
free_elist = 1;
28922899
} else if (encoding_str) {
2893-
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str), &elist, &size, 0)) {
2894-
RETURN_FALSE;
2900+
if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str), &elist, &size, 0, 2)) {
2901+
RETURN_THROWS();
28952902
}
28962903
free_elist = 1;
28972904
} else {
@@ -3285,12 +3292,12 @@ PHP_FUNCTION(mb_convert_variables)
32853292

32863293
/* pre-conversion encoding */
32873294
if (from_enc_ht) {
3288-
if (php_mb_parse_encoding_array(from_enc_ht, &elist, &elistsz) == FAILURE) {
3289-
RETURN_FALSE;
3295+
if (php_mb_parse_encoding_array(from_enc_ht, &elist, &elistsz, 2) == FAILURE) {
3296+
RETURN_THROWS();
32903297
}
32913298
} else {
3292-
if (php_mb_parse_encoding_list(ZSTR_VAL(from_enc_str), ZSTR_LEN(from_enc_str), &elist, &elistsz, 0) == FAILURE) {
3293-
RETURN_FALSE;
3299+
if (php_mb_parse_encoding_list(ZSTR_VAL(from_enc_str), ZSTR_LEN(from_enc_str), &elist, &elistsz, 0, 2) == FAILURE) {
3300+
RETURN_THROWS();
32943301
}
32953302
}
32963303

ext/mbstring/tests/bug76704.phpt

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,17 @@ if (!extension_loaded('mbstring')) die('skip mbstring extension not available');
66
?>
77
--FILE--
88
<?php
9-
var_dump(mb_detect_order('Foo, UTF-8'));
10-
var_dump(mb_detect_order(['Foo', 'UTF-8']))
9+
try {
10+
var_dump(mb_detect_order('Foo, UTF-8'));
11+
} catch (\ValueError $e) {
12+
echo $e->getMessage() . \PHP_EOL;
13+
}
14+
try {
15+
var_dump(mb_detect_order(['Foo', 'UTF-8']));
16+
} catch (\ValueError $e) {
17+
echo $e->getMessage() . \PHP_EOL;
18+
}
1119
?>
12-
--EXPECTF--
13-
Warning: mb_detect_order(): Unknown encoding "Foo" in %s on line %d
14-
bool(false)
15-
16-
Warning: mb_detect_order(): Unknown encoding "Foo" in %s on line %d
17-
bool(false)
20+
--EXPECT--
21+
mb_detect_order(): Argument #1 ($encoding) contains invalid encoding "Foo"
22+
mb_detect_order(): Argument #1 ($encoding) contains invalid encoding "Foo"

ext/mbstring/tests/bug79149.phpt

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,8 @@ try {
2424

2525
?>
2626
--EXPECTF--
27-
Warning: mb_convert_encoding(): Unknown encoding "0" in %s on line %d
28-
bool(false)
27+
mb_convert_encoding(): Argument #3 ($from) contains invalid encoding "0"
2928

3029
Warning: Array to string conversion in %s on line %d
31-
32-
Warning: mb_convert_encoding(): Unknown encoding "Array" in %s on line %d
33-
bool(false)
34-
35-
Warning: mb_convert_encoding(): Unknown encoding "foo" in %s on line %d
36-
bool(false)
30+
mb_convert_encoding(): Argument #3 ($from) contains invalid encoding "Array"
31+
mb_convert_encoding(): Argument #3 ($from) contains invalid encoding "foo"
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
--TEST--
2+
Invalid values for MBString INI settings
3+
--SKIPIF--
4+
<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
5+
--INI--
6+
mbstring.language=UNKNOWN_LANGUAGE
7+
mbstring.internal_encoding=UNKNOWN_ENCODING
8+
mbstring.detect_order=UTF-8,DETECT_ORDER,ASCII
9+
mbstring.http_input=UTF-8,HTTP_INPUT,ASCII
10+
mbstring.http_output=HTTP_OUTPUT
11+
mbstring.http_output_conv_mimetypes=UNKNOWN_MIME_TYPE_OUTPUT
12+
mbstring.substitute_character=U+3000,NON_EXISTING_CHARACTER,JIS+7E7E
13+
mbstring.func_overload=BOOL_OVERLOAD
14+
mbstring.encoding_translation=BOOL_TRANSLATION
15+
mbstring.strict_detection=BOOL_STRICT_DETECTION
16+
--FILE--
17+
<?php
18+
// Empty as we are only testing INI settings
19+
?>
20+
--EXPECT--
21+
PHP Warning: PHP Startup: INI setting contains invalid encoding "DETECT_ORDER" in Unknown on line 0
22+
PHP Deprecated: PHP Startup: Use of mbstring.http_input is deprecated in Unknown on line 0
23+
PHP Warning: PHP Startup: INI setting contains invalid encoding "HTTP_INPUT" in Unknown on line 0
24+
PHP Deprecated: PHP Startup: Use of mbstring.http_output is deprecated in Unknown on line 0
25+
PHP Deprecated: PHP Startup: Use of mbstring.internal_encoding is deprecated in Unknown on line 0
26+
27+
Warning: PHP Startup: INI setting contains invalid encoding "DETECT_ORDER" in Unknown on line 0
28+
29+
Deprecated: PHP Startup: Use of mbstring.http_input is deprecated in Unknown on line 0
30+
31+
Warning: PHP Startup: INI setting contains invalid encoding "HTTP_INPUT" in Unknown on line 0
32+
33+
Deprecated: PHP Startup: Use of mbstring.http_output is deprecated in Unknown on line 0
34+
35+
Deprecated: PHP Startup: Use of mbstring.internal_encoding is deprecated in Unknown on line 0

ext/mbstring/tests/mb_detect_encoding.phpt

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,14 @@ $s = mb_detect_encoding('', 'EUC-JP');
8484
print("EUC-JP: $s\n"); // SJIS
8585

8686
$s = $euc_jp;
87-
$s = mb_detect_encoding($s, 'BAD');
88-
print("BAD: $s\n"); // BAD
87+
try {
88+
var_dump(mb_detect_encoding($s, 'BAD'));
89+
} catch (\ValueError $e) {
90+
echo $e->getMessage() . \PHP_EOL;
91+
}
8992

9093
?>
91-
--EXPECTF--
94+
--EXPECT--
9295
== BASIC TEST ==
9396
SJIS: SJIS
9497
JIS: JIS
@@ -105,6 +108,4 @@ SJIS: SJIS
105108
== INVALID PARAMETER ==
106109
INT: EUC-JP
107110
EUC-JP: EUC-JP
108-
109-
Warning: mb_detect_encoding(): Unknown encoding "BAD" in %s on line %d
110-
BAD:
111+
mb_detect_encoding(): Argument #2 ($encoding_list) contains invalid encoding "BAD"

ext/mbstring/tests/mb_detect_order.phpt

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -35,29 +35,49 @@ print implode(', ', mb_detect_order()) . "\n";
3535
// Set invalid encoding. Should fail.
3636
print "== INVALID PARAMETER ==\n";
3737

38-
$r = mb_detect_order('BAD_NAME');
39-
($r === FALSE) ? print "OK_BAD_STR\n" : print "NG_BAD_STR\n";
40-
print implode(', ', mb_detect_order()) . "\n";
38+
try {
39+
var_dump(mb_detect_order('BAD_NAME'));
40+
} catch (\ValueError $e) {
41+
echo $e->getMessage() . \PHP_EOL;
42+
}
43+
var_dump(mb_detect_order());
4144

4245
$a[] = 'BAD_NAME';
43-
$r = mb_detect_order($a);
44-
($r === FALSE) ? print "OK_BAD_ARRAY\n" : print "NG_BAD_ARRAY\n";
45-
print implode(', ', mb_detect_order()) . "\n";
46+
try {
47+
var_dump(mb_detect_order($a));
48+
} catch (\ValueError $e) {
49+
echo $e->getMessage() . \PHP_EOL;
50+
}
51+
var_dump(mb_detect_order());
4652

4753
?>
48-
--EXPECTF--
54+
--EXPECT--
4955
OK_AUTO
5056
ASCII, JIS, UTF-8, EUC-JP, SJIS
5157
OK_STR
5258
SJIS, EUC-JP, JIS, UTF-8
5359
OK_ARRAY
5460
ASCII, JIS, EUC-JP, UTF-8
5561
== INVALID PARAMETER ==
56-
57-
Warning: mb_detect_order(): Unknown encoding "BAD_NAME" in %s on line %d
58-
OK_BAD_STR
59-
ASCII, JIS, EUC-JP, UTF-8
60-
61-
Warning: mb_detect_order(): Unknown encoding "BAD_NAME" in %s on line %d
62-
OK_BAD_ARRAY
63-
ASCII, JIS, EUC-JP, UTF-8
62+
mb_detect_order(): Argument #1 ($encoding) contains invalid encoding "BAD_NAME"
63+
array(4) {
64+
[0]=>
65+
string(5) "ASCII"
66+
[1]=>
67+
string(3) "JIS"
68+
[2]=>
69+
string(6) "EUC-JP"
70+
[3]=>
71+
string(5) "UTF-8"
72+
}
73+
mb_detect_order(): Argument #1 ($encoding) contains invalid encoding "BAD_NAME"
74+
array(4) {
75+
[0]=>
76+
string(5) "ASCII"
77+
[1]=>
78+
string(3) "JIS"
79+
[2]=>
80+
string(6) "EUC-JP"
81+
[3]=>
82+
string(5) "UTF-8"
83+
}

ext/mbstring/tests/mb_str_unknown_encoding.phpt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,11 +141,10 @@ try {
141141
}
142142

143143
?>
144-
--EXPECTF--
144+
--EXPECT--
145145
mb_chr(): Argument #2 ($encoding) must be a valid encoding, "UTF-0" given
146146
mb_convert_case(): Argument #3 ($encoding) must be a valid encoding, "UTF-0" given
147-
148-
Warning: mb_convert_encoding(): Unknown encoding "UTF-0" in %s on line %d
147+
mb_convert_encoding(): Argument #3 ($from) contains invalid encoding "UTF-0"
149148
mb_convert_kana(): Argument #3 ($encoding) must be a valid encoding, "UTF-0" given
150149
mb_decode_numericentity(): Argument #3 ($encoding) must be a valid encoding, "UTF-0" given
151150
mb_ord(): Argument #2 ($encoding) must be a valid encoding, "UTF-0" given

0 commit comments

Comments
 (0)