Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ PHP NEWS
. Fixed date_sunrise() and date_sunset() with partial-hour UTC offset.
(ilutov)

- Intl:
. Added grapheme_strpos(), grapheme_stripos(), grapheme_strrpos(),
grapheme_strripos(), grapheme_strstr(), grapheme_stristr() and
grapheme_levenshtein() functions add $locale parameter (Yuya Hamada).

- EXIF:
. Added support to retrieve Exif from HEIF file. (Benstone Zhang)

Expand Down
4 changes: 4 additions & 0 deletions UPGRADING
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,10 @@ PHP 8.5 UPGRADE NOTES
TransLiterator::getErrorCode(), and TransLiterator::getErrorMessage()
have dropped the false from the return type union. Returning false
was actually never possible.
. grapheme_strpos(), grapheme_stripos(), grapheme_strrpos(),
grapheme_strripos(), grapheme_strstr(), grapheme_stristr() and
grapheme_levenshtein() functions add $locale parameter.
RFC: https://wiki.php.net/rfc/grapheme_add_locale_for_case_insensitive

- LDAP:
. ldap_get_option() now accepts a NULL connection, as ldap_set_option(),
Expand Down
58 changes: 34 additions & 24 deletions ext/intl/grapheme/grapheme_string.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,19 +81,20 @@ PHP_FUNCTION(grapheme_strlen)
/* {{{ Find position of first occurrence of a string within another */
PHP_FUNCTION(grapheme_strpos)
{
char *haystack, *needle;
size_t haystack_len, needle_len;
char *haystack, *needle, *locale = "";
size_t haystack_len, needle_len, locale_len = 0;
const char *found;
zend_long loffset = 0;
int32_t offset = 0;
size_t noffset = 0;
zend_long ret_pos;

ZEND_PARSE_PARAMETERS_START(2, 3)
ZEND_PARSE_PARAMETERS_START(2, 4)
Z_PARAM_STRING(haystack, haystack_len)
Z_PARAM_STRING(needle, needle_len)
Z_PARAM_OPTIONAL
Z_PARAM_LONG(loffset)
Z_PARAM_PATH(locale, locale_len)
ZEND_PARSE_PARAMETERS_END();

if ( OUTSIDE_STRING(loffset, haystack_len) ) {
Expand Down Expand Up @@ -121,7 +122,7 @@ PHP_FUNCTION(grapheme_strpos)
}

/* do utf16 part of the strpos */
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0 /* last */ );
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0, locale /* last */ );

if ( ret_pos >= 0 ) {
RETURN_LONG(ret_pos);
Expand All @@ -134,19 +135,20 @@ PHP_FUNCTION(grapheme_strpos)
/* {{{ Find position of first occurrence of a string within another, ignoring case differences */
PHP_FUNCTION(grapheme_stripos)
{
char *haystack, *needle;
size_t haystack_len, needle_len;
char *haystack, *needle, *locale = "";
size_t haystack_len, needle_len, locale_len = 0;
const char *found;
zend_long loffset = 0;
int32_t offset = 0;
zend_long ret_pos;
int is_ascii;

ZEND_PARSE_PARAMETERS_START(2, 3)
ZEND_PARSE_PARAMETERS_START(2, 4)
Z_PARAM_STRING(haystack, haystack_len)
Z_PARAM_STRING(needle, needle_len)
Z_PARAM_OPTIONAL
Z_PARAM_LONG(loffset)
Z_PARAM_PATH(locale, locale_len)
ZEND_PARSE_PARAMETERS_END();

if ( OUTSIDE_STRING(loffset, haystack_len) ) {
Expand Down Expand Up @@ -185,7 +187,7 @@ PHP_FUNCTION(grapheme_stripos)
}

/* do utf16 part of the strpos */
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0 /*last */ );
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0, locale /*last */ );

if ( ret_pos >= 0 ) {
RETURN_LONG(ret_pos);
Expand All @@ -200,17 +202,19 @@ PHP_FUNCTION(grapheme_stripos)
PHP_FUNCTION(grapheme_strrpos)
{
char *haystack, *needle;
size_t haystack_len, needle_len;
char *locale = "";
size_t haystack_len, needle_len, locale_len = 0;
zend_long loffset = 0;
int32_t offset = 0;
zend_long ret_pos;
int is_ascii;

ZEND_PARSE_PARAMETERS_START(2, 3)
ZEND_PARSE_PARAMETERS_START(2, 4)
Z_PARAM_STRING(haystack, haystack_len)
Z_PARAM_STRING(needle, needle_len)
Z_PARAM_OPTIONAL
Z_PARAM_LONG(loffset)
Z_PARAM_PATH(locale, locale_len)
ZEND_PARSE_PARAMETERS_END();

if ( OUTSIDE_STRING(loffset, haystack_len) ) {
Expand Down Expand Up @@ -242,7 +246,7 @@ PHP_FUNCTION(grapheme_strrpos)
/* else we need to continue via utf16 */
}

ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1/* last */);
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1, locale /* last */);

if ( ret_pos >= 0 ) {
RETURN_LONG(ret_pos);
Expand All @@ -257,18 +261,19 @@ PHP_FUNCTION(grapheme_strrpos)
/* {{{ Find position of last occurrence of a string within another, ignoring case */
PHP_FUNCTION(grapheme_strripos)
{
char *haystack, *needle;
size_t haystack_len, needle_len;
char *haystack, *needle, *locale = "";
size_t haystack_len, needle_len, locale_len = 0;
zend_long loffset = 0;
int32_t offset = 0;
zend_long ret_pos;
int is_ascii;

ZEND_PARSE_PARAMETERS_START(2, 3)
ZEND_PARSE_PARAMETERS_START(2, 4)
Z_PARAM_STRING(haystack, haystack_len)
Z_PARAM_STRING(needle, needle_len)
Z_PARAM_OPTIONAL
Z_PARAM_LONG(loffset)
Z_PARAM_PATH(locale, locale_len)
ZEND_PARSE_PARAMETERS_END();

if ( OUTSIDE_STRING(loffset, haystack_len) ) {
Expand Down Expand Up @@ -309,7 +314,7 @@ PHP_FUNCTION(grapheme_strripos)
/* else we need to continue via utf16 */
}

ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* f_ignore_case */, 1 /*last */);
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* f_ignore_case */, 1, locale /*last */);

if ( ret_pos >= 0 ) {
RETURN_LONG(ret_pos);
Expand All @@ -324,10 +329,10 @@ PHP_FUNCTION(grapheme_strripos)
/* {{{ Returns part of a string */
PHP_FUNCTION(grapheme_substr)
{
char *str;
char *str, *locale = "";
zend_string *u8_sub_str;
UChar *ustr;
size_t str_len;
size_t str_len, locale_len = 0;
int32_t ustr_len;
zend_long lstart = 0, length = 0;
int32_t start = 0;
Expand All @@ -339,11 +344,12 @@ PHP_FUNCTION(grapheme_substr)
int32_t (*iter_func)(UBreakIterator *);
bool no_length = true;

ZEND_PARSE_PARAMETERS_START(2, 3)
ZEND_PARSE_PARAMETERS_START(2, 4)
Z_PARAM_STRING(str, str_len)
Z_PARAM_LONG(lstart)
Z_PARAM_OPTIONAL
Z_PARAM_LONG_OR_NULL(length, no_length)
Z_PARAM_PATH(locale, locale_len)
ZEND_PARSE_PARAMETERS_END();

if (lstart < INT32_MIN || lstart > INT32_MAX) {
Expand Down Expand Up @@ -537,17 +543,18 @@ PHP_FUNCTION(grapheme_substr)
/* {{{ strstr_common_handler */
static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_case)
{
char *haystack, *needle;
char *haystack, *needle, *locale = "";
const char *found;
size_t haystack_len, needle_len;
size_t haystack_len, needle_len, locale_len = 0;
int32_t ret_pos, uchar_pos;
bool part = false;

ZEND_PARSE_PARAMETERS_START(2, 3)
ZEND_PARSE_PARAMETERS_START(2, 4)
Z_PARAM_STRING(haystack, haystack_len)
Z_PARAM_STRING(needle, needle_len)
Z_PARAM_OPTIONAL
Z_PARAM_BOOL(part)
Z_PARAM_PATH(locale, locale_len)
ZEND_PARSE_PARAMETERS_END();

if ( !f_ignore_case ) {
Expand All @@ -574,7 +581,7 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas
}

/* need to work in utf16 */
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0 /*last */ );
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0, locale /*last */ );

if ( ret_pos < 0 ) {
RETURN_FALSE;
Expand Down Expand Up @@ -919,14 +926,17 @@ PHP_FUNCTION(grapheme_levenshtein)
zend_long cost_ins = 1;
zend_long cost_rep = 1;
zend_long cost_del = 1;
char *locale = "";
size_t locale_len = 0;

ZEND_PARSE_PARAMETERS_START(2, 5)
ZEND_PARSE_PARAMETERS_START(2, 6)
Z_PARAM_STR(string1)
Z_PARAM_STR(string2)
Z_PARAM_OPTIONAL
Z_PARAM_LONG(cost_ins)
Z_PARAM_LONG(cost_rep)
Z_PARAM_LONG(cost_del)
Z_PARAM_PATH(locale, locale_len)
ZEND_PARSE_PARAMETERS_END();

if (cost_ins <= 0 || cost_ins > UINT_MAX / 4) {
Expand Down Expand Up @@ -1043,7 +1053,7 @@ PHP_FUNCTION(grapheme_levenshtein)
RETVAL_FALSE;
goto out_bi2;
}
UCollator *collator = ucol_open("", &ustatus);
UCollator *collator = ucol_open(locale, &ustatus);
if (U_FAILURE(ustatus)) {
intl_error_set_code(NULL, ustatus);

Expand Down
4 changes: 2 additions & 2 deletions ext/intl/grapheme/grapheme_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char


/* {{{ grapheme_strpos_utf16 - strrpos using utf16*/
int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last)
int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last, const char* locale)
{
UChar *uhaystack = NULL, *uneedle = NULL;
int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0;
Expand Down Expand Up @@ -136,7 +136,7 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle,
}

status = U_ZERO_ERROR;
src = usearch_open(uneedle, uneedle_len, uhaystack, uhaystack_len, "", bi, &status);
src = usearch_open(uneedle, uneedle_len, uhaystack, uhaystack_len, locale, bi, &status);
STRPOS_CHECK_STATUS(status, "Error creating search object");

if(f_ignore_case) {
Expand Down
4 changes: 2 additions & 2 deletions ext/intl/grapheme/grapheme_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ zend_long grapheme_ascii_check(const unsigned char *day, size_t len);
void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char **sub_str, int32_t *sub_str_len);
zend_long grapheme_strrpos_ascii(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset);

int32_t grapheme_strrpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int f_ignore_case);
int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case, int last);
int32_t grapheme_strrpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int f_ignore_case, const char *locale);
int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case, int last, const char *locale);

int32_t grapheme_split_string(const UChar *text, int32_t text_length, int boundary_array[], int boundary_array_len );

Expand Down
16 changes: 8 additions & 8 deletions ext/intl/php_intl.stub.php
Original file line number Diff line number Diff line change
Expand Up @@ -427,23 +427,23 @@ function numfmt_get_error_message(NumberFormatter $formatter): string {}

function grapheme_strlen(string $string): int|false|null {}

function grapheme_strpos(string $haystack, string $needle, int $offset = 0): int|false {}
function grapheme_strpos(string $haystack, string $needle, int $offset = 0, string $locale = ""): int|false {}

function grapheme_stripos(string $haystack, string $needle, int $offset = 0): int|false {}
function grapheme_stripos(string $haystack, string $needle, int $offset = 0, string $locale = ""): int|false {}

function grapheme_strrpos(string $haystack, string $needle, int $offset = 0): int|false {}
function grapheme_strrpos(string $haystack, string $needle, int $offset = 0, string $locale = ""): int|false {}

function grapheme_strripos(string $haystack, string $needle, int $offset = 0): int|false {}
function grapheme_strripos(string $haystack, string $needle, int $offset = 0, string $locale = ""): int|false {}

function grapheme_substr(string $string, int $offset, ?int $length = null): string|false {}
function grapheme_substr(string $string, int $offset, ?int $length = null, string $locale = ""): string|false {}

function grapheme_strstr(string $haystack, string $needle, bool $beforeNeedle = false): string|false {}
function grapheme_strstr(string $haystack, string $needle, bool $beforeNeedle = false, string $locale = ""): string|false {}

function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = false): string|false {}
function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = false, string $locale = ""): string|false {}

function grapheme_str_split(string $string, int $length = 1): array|false {}

function grapheme_levenshtein(string $string1, string $string2, int $insertion_cost = 1, int $replacement_cost = 1, int $deletion_cost = 1): int|false {}
function grapheme_levenshtein(string $string1, string $string2, int $insertion_cost = 1, int $replacement_cost = 1, int $deletion_cost = 1, string $locale = ""): int|false {}

/** @param int $next */
function grapheme_extract(string $haystack, int $size, int $type = GRAPHEME_EXTR_COUNT, int $offset = 0, &$next = null): string|false {}
Expand Down
6 changes: 5 additions & 1 deletion ext/intl/php_intl_arginfo.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions ext/intl/tests/grapheme_levenshtein.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ $nabe = '邊';
$nabe_E0100 = "邊󠄀";
var_dump(grapheme_levenshtein($nabe, $nabe_E0100));

// variable $nabe and $nabe_E0101 is different because that is IVS.
// $nabe_E0101 is variable selector in U+908A U+E0101.
// grapheme_levenshtein can catches different only match strength is u-ks-identic for locale.
// So result is expect to 1.
$nabe = '邊';
$nabe_E0101 = "\u{908A}\u{E0101}";
var_dump(grapheme_levenshtein($nabe, $nabe_E0101, locale: "ja_JP-u-ks-identic"));

// combining character
var_dump(grapheme_levenshtein("\u{0065}\u{0301}", "\u{00e9}"));

Expand All @@ -80,6 +88,12 @@ try {
} catch (ValueError $e) {
echo $e->getMessage() . PHP_EOL;
}

echo "--- Invalid locales ---\n";
var_dump(grapheme_levenshtein("abc", "abc", locale: "defaaaaaaaaaaaaaaaaaaaaaaaaaaaa"));
var_dump(intl_get_error_code());
var_dump(intl_get_error_message());

?>
--EXPECTF--
--- Equal ---
Expand Down Expand Up @@ -121,8 +135,13 @@ int(2)
--- Variable selector ---
int(1)
int(0)
int(1)
int(0)
--- Corner case ---
grapheme_levenshtein(): Argument #3 ($insertion_cost) must be greater than 0 and less than or equal to %d
grapheme_levenshtein(): Argument #4 ($replacement_cost) must be greater than 0 and less than or equal to %d
grapheme_levenshtein(): Argument #5 ($deletion_cost) must be greater than 0 and less than or equal to %d
--- Invalid locales ---
bool(false)
int(%d)
string(68) "grapheme_levenshtein(): Error on ucol_open: U_ILLEGAL_ARGUMENT_ERROR"
Loading
Loading