Skip to content

Commit 6436ddb

Browse files
committed
Handle out-of-bounds offset consistently in grapheme_* API
Make sure we throw the same kind of error regardless of whether the offset is out-of-bounds in the fast path or in the slow path.
1 parent 1312c41 commit 6436ddb

File tree

3 files changed

+97
-32
lines changed

3 files changed

+97
-32
lines changed

ext/intl/grapheme/grapheme_string.c

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -129,21 +129,17 @@ PHP_FUNCTION(grapheme_strpos)
129129
RETURN_THROWS();
130130
}
131131

132-
if (offset >= 0) {
132+
if (offset >= 0 && grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0) {
133133
/* quick check to see if the string might be there
134134
* I realize that 'offset' is 'grapheme count offset' but will work in spite of that
135135
*/
136136
found = php_memnstr(haystack + noffset, needle, needle_len, haystack + haystack_len);
137137

138138
/* if it isn't there the we are done */
139-
if (!found) {
140-
RETURN_FALSE;
141-
}
142-
143-
/* if it is there, and if the haystack is ascii, we are all done */
144-
if ( grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0 ) {
139+
if (found) {
145140
RETURN_LONG(found - haystack);
146141
}
142+
RETURN_FALSE;
147143
}
148144

149145
/* do utf16 part of the strpos */
@@ -586,9 +582,7 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas
586582

587583
if ( !f_ignore_case ) {
588584

589-
/* ASCII optimization: quick check to see if the string might be there
590-
* I realize that 'offset' is 'grapheme count offset' but will work in spite of that
591-
*/
585+
/* ASCII optimization: quick check to see if the string might be there */
592586
found = php_memnstr(haystack, needle, needle_len, haystack + haystack_len);
593587

594588
/* if it isn't there the we are done */

ext/intl/grapheme/grapheme_util.c

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -107,23 +107,12 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
107107
}
108108
/* }}} */
109109

110-
#define STRPOS_CHECK_STATUS(status, error) \
111-
if ( U_FAILURE( (status) ) ) { \
112-
intl_error_set_code( NULL, (status) ); \
113-
intl_error_set_custom_msg( NULL, (error), 0 ); \
114-
if (uhaystack) { \
115-
efree( uhaystack ); \
116-
} \
117-
if (uneedle) { \
118-
efree( uneedle ); \
119-
} \
120-
if(bi) { \
121-
ubrk_close (bi); \
122-
} \
123-
if(src) { \
124-
usearch_close(src); \
125-
} \
126-
return -1; \
110+
#define STRPOS_CHECK_STATUS(status, error) \
111+
if ( U_FAILURE( (status) ) ) { \
112+
intl_error_set_code( NULL, (status) ); \
113+
intl_error_set_custom_msg( NULL, (error), 0 ); \
114+
ret_pos = -1; \
115+
goto finish; \
127116
}
128117

129118

@@ -172,9 +161,10 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle,
172161

173162
if(offset != 0) {
174163
offset_pos = grapheme_get_haystack_offset(bi, offset);
175-
if(offset_pos == -1) {
176-
status = U_ILLEGAL_ARGUMENT_ERROR;
177-
STRPOS_CHECK_STATUS(status, "Invalid search offset");
164+
if (offset_pos == -1) {
165+
zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
166+
ret_pos = -1;
167+
goto finish;
178168
}
179169
status = U_ZERO_ERROR;
180170
usearch_setOffset(src, offset_pos, &status);
@@ -201,14 +191,19 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle,
201191
ret_pos = -1;
202192
}
203193

194+
finish:
204195
if (uhaystack) {
205196
efree( uhaystack );
206197
}
207198
if (uneedle) {
208199
efree( uneedle );
209200
}
210-
ubrk_close (bi);
211-
usearch_close (src);
201+
if (bi) {
202+
ubrk_close (bi);
203+
}
204+
if (src) {
205+
usearch_close (src);
206+
}
212207

213208
return ret_pos;
214209
}

ext/intl/tests/grapheme_out_of_bounds.phpt

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,28 @@ var_dump(grapheme_strpos("foo", "bar", 3));
88
var_dump(grapheme_stripos("foo", "bar", 3));
99
var_dump(grapheme_strrpos("foo", "bar", 3));
1010
var_dump(grapheme_strripos("foo", "bar", 3));
11+
var_dump(grapheme_strpos("äöü", "bar", 3));
12+
var_dump(grapheme_stripos("äöü", "bar", 3));
13+
var_dump(grapheme_strrpos("äöü", "bar", 3));
14+
var_dump(grapheme_strripos("äöü", "bar", 3));
1115
echo "\n";
1216

1317
// Offset == -Length is legal.
1418
var_dump(grapheme_strpos("foo", "bar", -3));
1519
var_dump(grapheme_stripos("foo", "bar", -3));
1620
var_dump(grapheme_strrpos("foo", "bar", -3));
1721
var_dump(grapheme_strripos("foo", "bar", -3));
22+
var_dump(grapheme_strpos("äöü", "bar", -3));
23+
var_dump(grapheme_stripos("äöü", "bar", -3));
24+
var_dump(grapheme_strrpos("äöü", "bar", -3));
25+
var_dump(grapheme_strripos("äöü", "bar", -3));
26+
echo "\n";
27+
28+
// Offset == Length is legal.
29+
var_dump(grapheme_strpos("", "bar", 0));
30+
var_dump(grapheme_stripos("", "bar", 0));
31+
var_dump(grapheme_strrpos("", "bar", 0));
32+
var_dump(grapheme_strripos("", "bar", 0));
1833
echo "\n";
1934

2035
// Positive out of bounds.
@@ -38,6 +53,26 @@ try {
3853
} catch (ValueError $e) {
3954
echo $e->getMessage(), "\n";
4055
}
56+
try {
57+
var_dump(grapheme_strpos("äöü", "bar", 4));
58+
} catch (ValueError $e) {
59+
echo $e->getMessage(), "\n";
60+
}
61+
try {
62+
var_dump(grapheme_stripos("äöü", "bar", 4));
63+
} catch (ValueError $e) {
64+
echo $e->getMessage(), "\n";
65+
}
66+
try {
67+
var_dump(grapheme_strrpos("äöü", "bar", 4));
68+
} catch (ValueError $e) {
69+
echo $e->getMessage(), "\n";
70+
}
71+
try {
72+
var_dump(grapheme_strripos("äöü", "bar", 4));
73+
} catch (ValueError $e) {
74+
echo $e->getMessage(), "\n";
75+
}
4176
echo "\n";
4277

4378
// Negative out of bounds.
@@ -61,6 +96,26 @@ try {
6196
} catch (ValueError $e) {
6297
echo $e->getMessage(), "\n";
6398
}
99+
try {
100+
var_dump(grapheme_strpos("äöü", "bar", -4));
101+
} catch (ValueError $e) {
102+
echo $e->getMessage(), "\n";
103+
}
104+
try {
105+
var_dump(grapheme_stripos("äöü", "bar", -4));
106+
} catch (ValueError $e) {
107+
echo $e->getMessage(), "\n";
108+
}
109+
try {
110+
var_dump(grapheme_strrpos("äöü", "bar", -4));
111+
} catch (ValueError $e) {
112+
echo $e->getMessage(), "\n";
113+
}
114+
try {
115+
var_dump(grapheme_strripos("äöü", "bar", -4));
116+
} catch (ValueError $e) {
117+
echo $e->getMessage(), "\n";
118+
}
64119
echo "\n";
65120

66121
// TODO: substr is special.
@@ -75,17 +130,38 @@ bool(false)
75130
bool(false)
76131
bool(false)
77132
bool(false)
133+
bool(false)
134+
bool(false)
135+
bool(false)
136+
bool(false)
137+
138+
bool(false)
139+
bool(false)
140+
bool(false)
141+
bool(false)
142+
bool(false)
143+
bool(false)
144+
bool(false)
145+
bool(false)
78146

79147
bool(false)
80148
bool(false)
81149
bool(false)
82150
bool(false)
83151

152+
grapheme_strpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
153+
grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
154+
grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
155+
grapheme_strripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
84156
grapheme_strpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
85157
grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
86158
grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
87159
grapheme_strripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
88160

161+
grapheme_strpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
162+
grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
163+
grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
164+
grapheme_strripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
89165
grapheme_strpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
90166
grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
91167
grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)

0 commit comments

Comments
 (0)