Skip to content

Commit d3b2061

Browse files
committed
[RFC] Add a locale for grapheme case-insensitive functions
1 parent cce0efd commit d3b2061

8 files changed

+82
-24
lines changed

ext/intl/grapheme/grapheme_string.c

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ PHP_FUNCTION(grapheme_strpos)
8484
char *haystack, *needle;
8585
size_t haystack_len, needle_len;
8686
const char *found;
87+
char *locale = "";
8788
zend_long loffset = 0;
8889
int32_t offset = 0;
8990
size_t noffset = 0;
@@ -121,7 +122,7 @@ PHP_FUNCTION(grapheme_strpos)
121122
}
122123

123124
/* do utf16 part of the strpos */
124-
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0 /* last */ );
125+
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0, locale /* last */ );
125126

126127
if ( ret_pos >= 0 ) {
127128
RETURN_LONG(ret_pos);
@@ -134,19 +135,20 @@ PHP_FUNCTION(grapheme_strpos)
134135
/* {{{ Find position of first occurrence of a string within another, ignoring case differences */
135136
PHP_FUNCTION(grapheme_stripos)
136137
{
137-
char *haystack, *needle;
138-
size_t haystack_len, needle_len;
138+
char *haystack, *needle, *locale = "";
139+
size_t haystack_len, needle_len, locale_len = 0;
139140
const char *found;
140141
zend_long loffset = 0;
141142
int32_t offset = 0;
142143
zend_long ret_pos;
143144
int is_ascii;
144145

145-
ZEND_PARSE_PARAMETERS_START(2, 3)
146+
ZEND_PARSE_PARAMETERS_START(2, 4)
146147
Z_PARAM_STRING(haystack, haystack_len)
147148
Z_PARAM_STRING(needle, needle_len)
148149
Z_PARAM_OPTIONAL
149150
Z_PARAM_LONG(loffset)
151+
Z_PARAM_STRING_OR_NULL(locale, locale_len)
150152
ZEND_PARSE_PARAMETERS_END();
151153

152154
if ( OUTSIDE_STRING(loffset, haystack_len) ) {
@@ -185,7 +187,7 @@ PHP_FUNCTION(grapheme_stripos)
185187
}
186188

187189
/* do utf16 part of the strpos */
188-
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0 /*last */ );
190+
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0, locale /*last */ );
189191

190192
if ( ret_pos >= 0 ) {
191193
RETURN_LONG(ret_pos);
@@ -200,6 +202,7 @@ PHP_FUNCTION(grapheme_stripos)
200202
PHP_FUNCTION(grapheme_strrpos)
201203
{
202204
char *haystack, *needle;
205+
char *locale = "";
203206
size_t haystack_len, needle_len;
204207
zend_long loffset = 0;
205208
int32_t offset = 0;
@@ -242,7 +245,7 @@ PHP_FUNCTION(grapheme_strrpos)
242245
/* else we need to continue via utf16 */
243246
}
244247

245-
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1/* last */);
248+
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1, locale /* last */);
246249

247250
if ( ret_pos >= 0 ) {
248251
RETURN_LONG(ret_pos);
@@ -257,18 +260,19 @@ PHP_FUNCTION(grapheme_strrpos)
257260
/* {{{ Find position of last occurrence of a string within another, ignoring case */
258261
PHP_FUNCTION(grapheme_strripos)
259262
{
260-
char *haystack, *needle;
261-
size_t haystack_len, needle_len;
263+
char *haystack, *needle, *locale = "";
264+
size_t haystack_len, needle_len, locale_len = 0;
262265
zend_long loffset = 0;
263266
int32_t offset = 0;
264267
zend_long ret_pos;
265268
int is_ascii;
266269

267-
ZEND_PARSE_PARAMETERS_START(2, 3)
270+
ZEND_PARSE_PARAMETERS_START(2, 4)
268271
Z_PARAM_STRING(haystack, haystack_len)
269272
Z_PARAM_STRING(needle, needle_len)
270273
Z_PARAM_OPTIONAL
271274
Z_PARAM_LONG(loffset)
275+
Z_PARAM_STRING_OR_NULL(locale, locale_len)
272276
ZEND_PARSE_PARAMETERS_END();
273277

274278
if ( OUTSIDE_STRING(loffset, haystack_len) ) {
@@ -309,7 +313,7 @@ PHP_FUNCTION(grapheme_strripos)
309313
/* else we need to continue via utf16 */
310314
}
311315

312-
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* f_ignore_case */, 1 /*last */);
316+
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* f_ignore_case */, 1, locale /*last */);
313317

314318
if ( ret_pos >= 0 ) {
315319
RETURN_LONG(ret_pos);
@@ -537,17 +541,18 @@ PHP_FUNCTION(grapheme_substr)
537541
/* {{{ strstr_common_handler */
538542
static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_case)
539543
{
540-
char *haystack, *needle;
544+
char *haystack, *needle, *locale = "";
541545
const char *found;
542-
size_t haystack_len, needle_len;
546+
size_t haystack_len, needle_len, locale_len = 0;
543547
int32_t ret_pos, uchar_pos;
544548
bool part = false;
545549

546-
ZEND_PARSE_PARAMETERS_START(2, 3)
550+
ZEND_PARSE_PARAMETERS_START(2, 4)
547551
Z_PARAM_STRING(haystack, haystack_len)
548552
Z_PARAM_STRING(needle, needle_len)
549553
Z_PARAM_OPTIONAL
550554
Z_PARAM_BOOL(part)
555+
Z_PARAM_STRING_OR_NULL(locale, locale_len)
551556
ZEND_PARSE_PARAMETERS_END();
552557

553558
if ( !f_ignore_case ) {
@@ -574,7 +579,7 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas
574579
}
575580

576581
/* need to work in utf16 */
577-
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0 /*last */ );
582+
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0, locale /*last */ );
578583

579584
if ( ret_pos < 0 ) {
580585
RETURN_FALSE;

ext/intl/grapheme/grapheme_util.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
9494

9595

9696
/* {{{ grapheme_strpos_utf16 - strrpos using utf16*/
97-
int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last)
97+
int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last, char* locale)
9898
{
9999
UChar *uhaystack = NULL, *uneedle = NULL;
100100
int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0;
@@ -136,7 +136,7 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle,
136136
}
137137

138138
status = U_ZERO_ERROR;
139-
src = usearch_open(uneedle, uneedle_len, uhaystack, uhaystack_len, "", bi, &status);
139+
src = usearch_open(uneedle, uneedle_len, uhaystack, uhaystack_len, locale, bi, &status);
140140
STRPOS_CHECK_STATUS(status, "Error creating search object");
141141

142142
if(f_ignore_case) {

ext/intl/grapheme/grapheme_util.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
2626
zend_long grapheme_strrpos_ascii(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset);
2727

2828
int32_t grapheme_strrpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int f_ignore_case);
29-
int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case, int last);
29+
int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case, int last, char* locale);
3030

3131
int32_t grapheme_split_string(const UChar *text, int32_t text_length, int boundary_array[], int boundary_array_len );
3232

ext/intl/php_intl.stub.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -433,17 +433,17 @@ function grapheme_strlen(string $string): int|false|null {}
433433

434434
function grapheme_strpos(string $haystack, string $needle, int $offset = 0): int|false {}
435435

436-
function grapheme_stripos(string $haystack, string $needle, int $offset = 0): int|false {}
436+
function grapheme_stripos(string $haystack, string $needle, int $offset = 0, ?string $locale = null): int|false {}
437437

438438
function grapheme_strrpos(string $haystack, string $needle, int $offset = 0): int|false {}
439439

440-
function grapheme_strripos(string $haystack, string $needle, int $offset = 0): int|false {}
440+
function grapheme_strripos(string $haystack, string $needle, int $offset = 0, ?string $locale = null): int|false {}
441441

442442
function grapheme_substr(string $string, int $offset, ?int $length = null): string|false {}
443443

444444
function grapheme_strstr(string $haystack, string $needle, bool $beforeNeedle = false): string|false {}
445445

446-
function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = false): string|false {}
446+
function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = false, ?string $locale = null): string|false {}
447447

448448
function grapheme_str_split(string $string, int $length = 1): array|false {}
449449

ext/intl/php_intl_arginfo.h

Lines changed: 14 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
--TEST--
2+
grapheme_stripos() function locale dependency test
3+
--EXTENSIONS--
4+
intl
5+
--FILE--
6+
<?php
7+
var_dump(grapheme_stripos("abc", "abc", 0));
8+
var_dump(grapheme_stripos("i", "\u{0130}", 0, "tr_TR"));
9+
var_dump(grapheme_stripos("i", "\u{0130}", 0, "en_US"));
10+
?>
11+
--EXPECT--
12+
int(0)
13+
int(0)
14+
bool(false)
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
--TEST--
2+
grapheme_stristr() function locale dependency test
3+
--EXTENSIONS--
4+
intl
5+
--FILE--
6+
<?php
7+
var_dump(grapheme_stristr("abc", "abc", 0));
8+
var_dump(grapheme_stristr("i", "\u{0130}", 0, "tr_TR"));
9+
var_dump(grapheme_stristr("i", "\u{0130}", 0, "en_US"));
10+
?>
11+
--EXPECT--
12+
string(3) "abc"
13+
string(1) "i"
14+
bool(false)
15+
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
--TEST--
2+
grapheme_strripos() function locale dependency test
3+
--EXTENSIONS--
4+
intl
5+
--FILE--
6+
<?php
7+
var_dump(grapheme_strripos("abc", "abc", 0));
8+
var_dump(grapheme_strripos("i", "\u{0130}", 0, "tr_TR"));
9+
var_dump(grapheme_strripos("i", "\u{0130}", 0, "en_US"));
10+
?>
11+
--EXPECT--
12+
int(0)
13+
int(0)
14+
bool(false)

0 commit comments

Comments
 (0)