2626#include <unicode/ucol.h>
2727#include <unicode/ustring.h>
2828#include <unicode/ubrk.h>
29+ #include <unicode/usearch.h>
2930
3031/* }}} */
3132
@@ -979,10 +980,8 @@ PHP_FUNCTION(grapheme_levenshtein)
979980 intl_convert_utf8_to_utf16 (& ustring1 , & ustring1_len , pstr1 , ZSTR_LEN (string1 ), & ustatus1 );
980981
981982 if (U_FAILURE (ustatus1 )) {
982- /* Set global error code. */
983983 intl_error_set_code (NULL , ustatus1 );
984984
985- /* Set error messages. */
986985 intl_error_set_custom_msg (NULL , "Error converting input string to UTF-16" , 0 );
987986 if (ustring1 ) {
988987 efree (ustring1 );
@@ -993,10 +992,8 @@ PHP_FUNCTION(grapheme_levenshtein)
993992 intl_convert_utf8_to_utf16 (& ustring2 , & ustring2_len , pstr2 , ZSTR_LEN (string2 ), & ustatus2 );
994993
995994 if (U_FAILURE (ustatus2 )) {
996- /* Set global error code. */
997995 intl_error_set_code (NULL , ustatus2 );
998996
999- /* Set error messages. */
1000997 intl_error_set_custom_msg (NULL , "Error converting input string to UTF-16" , 0 );
1001998 if (ustring2 ) {
1002999 efree (ustring2 );
@@ -1007,8 +1004,6 @@ PHP_FUNCTION(grapheme_levenshtein)
10071004 RETURN_FALSE ;
10081005 }
10091006
1010- UText * ut1 = NULL ;
1011- UText * ut2 = NULL ;
10121007 UBreakIterator * bi1 , * bi2 ;
10131008
10141009 int32_t strlen_1 , strlen_2 ;
@@ -1031,10 +1026,28 @@ PHP_FUNCTION(grapheme_levenshtein)
10311026 bi1 = grapheme_get_break_iterator ((void * )u_break_iterator_buffer1 , & ustatus1 );
10321027 bi2 = grapheme_get_break_iterator ((void * )u_break_iterator_buffer2 , & ustatus2 );
10331028
1034- ut1 = utext_openUTF8 (ut1 , pstr1 , ZSTR_LEN (string1 ), & ustatus1 );
1035- ubrk_setUText (bi1 , ut1 , & ustatus1 );
1036- ut2 = utext_openUTF8 (ut2 , pstr2 , ZSTR_LEN (string2 ), & ustatus2 );
1037- ubrk_setUText (bi2 , ut2 , & ustatus2 );
1029+ ubrk_setText (bi1 , ustring1 , ustring1_len , & ustatus1 );
1030+
1031+ if (U_FAILURE (ustatus1 )) {
1032+ intl_error_set_code (NULL , ustatus1 );
1033+
1034+ intl_error_set_custom_msg (NULL , "Error on ubrk_setText on ustring1" , 0 );
1035+ if (ustring1 ) {
1036+ efree (ustring1 );
1037+ }
1038+ RETURN_FALSE ;
1039+ }
1040+
1041+ ubrk_setText (bi2 , ustring2 , ustring2_len , & ustatus2 );
1042+ if (U_FAILURE (ustatus2 )) {
1043+ intl_error_set_code (NULL , ustatus2 );
1044+
1045+ intl_error_set_custom_msg (NULL , "Error on ubrk_setText on ustring2" , 0 );
1046+ if (ustring2 ) {
1047+ efree (ustring2 );
1048+ }
1049+ RETURN_FALSE ;
1050+ }
10381051
10391052 p1 = safe_emalloc (strlen_2 + 1 , sizeof (zend_long ), 0 );
10401053 p2 = safe_emalloc (strlen_2 + 1 , sizeof (zend_long ), 0 );
@@ -1048,6 +1061,7 @@ PHP_FUNCTION(grapheme_levenshtein)
10481061 int32_t pos1 = 0 ;
10491062 int32_t pos2 = 0 ;
10501063 int32_t usrch_pos = 0 ;
1064+
10511065 while (pos1 != UBRK_DONE ) {
10521066 current1 = ubrk_current (bi1 );
10531067 pos1 = ubrk_next (bi1 );
@@ -1061,8 +1075,19 @@ PHP_FUNCTION(grapheme_levenshtein)
10611075 if (pos2 == UBRK_DONE ) {
10621076 break ;
10631077 }
1064- usrch_pos = grapheme_strpos_utf16 (pstr1 + current1 , pos1 - current1 , pstr2 + current2 , pos2 - current2 , 0 , NULL , 0 , 0 );
1065- if (usrch_pos == 0 ) {
1078+ UStringSearch * srch = usearch_open (ustring1 + current1 , pos1 - current1 , ustring2 + current2 , pos2 - current2 , "" , NULL , & ustatus2 );
1079+ if (U_FAILURE (ustatus2 )) {
1080+ intl_error_set_code (NULL , ustatus2 );
1081+ intl_error_set_custom_msg (NULL , "Error usearch_open" , 0 );
1082+ }
1083+ usrch_pos = usearch_first (srch , & ustatus2 );
1084+ if (U_FAILURE (ustatus2 )) {
1085+ intl_error_set_code (NULL , ustatus2 );
1086+ intl_error_set_custom_msg (NULL , "Error usearch_first" , 0 );
1087+ }
1088+ usearch_close (srch );
1089+
1090+ if (usrch_pos != USEARCH_DONE ) {
10661091 c0 = p1 [i2 ];
10671092 } else {
10681093 c0 = p1 [i2 ] + cost_rep ;
@@ -1083,9 +1108,6 @@ PHP_FUNCTION(grapheme_levenshtein)
10831108 p2 = tmp ;
10841109 }
10851110
1086- utext_close (ut1 );
1087- utext_close (ut2 );
1088-
10891111 ubrk_close (bi1 );
10901112 ubrk_close (bi2 );
10911113
0 commit comments