@@ -918,4 +918,219 @@ PHP_FUNCTION(grapheme_str_split)
918918	ubrk_close (bi );
919919}
920920
921+ PHP_FUNCTION (grapheme_levenshtein )
922+ {
923+ 	zend_string  * string1 , * string2 ;
924+ 	zend_long  cost_ins  =  1 ;
925+ 	zend_long  cost_rep  =  1 ;
926+ 	zend_long  cost_del  =  1 ;
927+ 
928+ 	ZEND_PARSE_PARAMETERS_START (2 , 5 )
929+ 		Z_PARAM_STR (string1 )
930+ 		Z_PARAM_STR (string2 )
931+ 		Z_PARAM_OPTIONAL 
932+ 		Z_PARAM_LONG (cost_ins )
933+ 		Z_PARAM_LONG (cost_rep )
934+ 		Z_PARAM_LONG (cost_del )
935+ 	ZEND_PARSE_PARAMETERS_END ();
936+ 
937+ 	if  (cost_ins  <= 0  ||  cost_ins  >  UINT_MAX  / 4 ) {
938+ 		zend_argument_value_error (3 , "must be greater than 0 and less than or equal to %d" , UINT_MAX  / 4 );
939+ 		RETURN_THROWS ();
940+ 	}
941+ 
942+ 	if  (cost_rep  <= 0  ||  cost_rep  >  UINT_MAX  / 4 ) {
943+ 		zend_argument_value_error (4 , "must be greater than 0 and less than or equal to %d" , UINT_MAX  / 4 );
944+ 		RETURN_THROWS ();
945+ 	}
946+ 
947+ 	if  (cost_del  <= 0  ||  cost_del  >  UINT_MAX  / 4 ) {
948+ 		zend_argument_value_error (5 , "must be greater than 0 and less than or equal to %d" , UINT_MAX  / 4 );
949+ 		RETURN_THROWS ();
950+ 	}
951+ 
952+ 	zend_long  c0 , c1 , c2 ;
953+ 	zend_long  retval ;
954+ 	size_t  i2 ;
955+ 	char  * pstr1 , * pstr2 ;
956+ 
957+ 	UChar  * ustring1  =  NULL ;
958+ 	UChar  * ustring2  =  NULL ;
959+ 
960+ 	int32_t  ustring1_len  =  0 ;
961+ 	int32_t  ustring2_len  =  0 ;
962+ 
963+ 	UErrorCode  ustatus  =  U_ZERO_ERROR ;
964+ 
965+ 	/* When all costs are equal, levenshtein fulfills the requirements of a metric, which means 
966+ 	 * that the distance is symmetric. If string1 is shorter than string2 we can save memory (and CPU time) 
967+ 	 * by having shorter rows (p1 & p2). */ 
968+ 	if  (ZSTR_LEN (string1 ) <  ZSTR_LEN (string2 ) &&  cost_ins  ==  cost_rep  &&  cost_rep  ==  cost_del ) {
969+ 		zend_string  * tmp  =  string1 ;
970+ 		string1  =  string2 ;
971+ 		string2  =  tmp ;
972+ 	}
973+ 
974+ 	pstr1  =  ZSTR_VAL (string1 );
975+ 	pstr2  =  ZSTR_VAL (string2 );
976+ 
977+ 	intl_convert_utf8_to_utf16 (& ustring1 , & ustring1_len , pstr1 , ZSTR_LEN (string1 ), & ustatus );
978+ 
979+ 	if  (U_FAILURE (ustatus )) {
980+ 		intl_error_set_code (NULL , ustatus );
981+ 
982+ 		intl_error_set_custom_msg (NULL , "Error converting input string to UTF-16" , 0 );
983+ 		efree (ustring1 );
984+ 		RETURN_FALSE ;
985+ 	}
986+ 
987+ 	intl_convert_utf8_to_utf16 (& ustring2 , & ustring2_len , pstr2 , ZSTR_LEN (string2 ), & ustatus );
988+ 
989+ 	if  (U_FAILURE (ustatus )) {
990+ 		intl_error_set_code (NULL , ustatus );
991+ 
992+ 		intl_error_set_custom_msg (NULL , "Error converting input string to UTF-16" , 0 );
993+ 		efree (ustring2 );
994+ 		efree (ustring1 );
995+ 		RETURN_FALSE ;
996+ 	}
997+ 
998+ 	UBreakIterator  * bi1 , * bi2 ;
999+ 
1000+ 	int32_t  strlen_1 , strlen_2 ;
1001+ 	strlen_1  =  grapheme_split_string (ustring1 , ustring1_len , NULL , 0 );
1002+ 	strlen_2  =  grapheme_split_string (ustring2 , ustring2_len , NULL , 0 );
1003+ 
1004+ 	if  (strlen_1  ==  0 ) {
1005+ 		efree (ustring1 );
1006+ 		efree (ustring2 );
1007+ 		RETURN_LONG (strlen_2  *  cost_ins );
1008+ 	}
1009+ 	if  (strlen_2  ==  0 ) {
1010+ 		efree (ustring1 );
1011+ 		efree (ustring2 );
1012+ 		RETURN_LONG (strlen_1  *  cost_del );
1013+ 	}
1014+ 
1015+ 	unsigned char   u_break_iterator_buffer1 [U_BRK_SAFECLONE_BUFFERSIZE ];
1016+ 	unsigned char   u_break_iterator_buffer2 [U_BRK_SAFECLONE_BUFFERSIZE ];
1017+ 	bi1  =  grapheme_get_break_iterator (u_break_iterator_buffer1 , & ustatus );
1018+ 	if  (U_FAILURE (ustatus )) {
1019+ 		intl_error_set_code (NULL , ustatus );
1020+ 		intl_error_set_custom_msg (NULL , "Error on grapheme_get_break_iterator for argument #1 ($string1)" , 0 );
1021+ 		efree (ustring2 );
1022+ 		efree (ustring1 );
1023+ 		ubrk_close (bi1 );
1024+ 		RETURN_FALSE ;
1025+ 	}
1026+ 
1027+ 	bi2  =  grapheme_get_break_iterator (u_break_iterator_buffer2 , & ustatus );
1028+ 	if  (U_FAILURE (ustatus )) {
1029+ 		intl_error_set_code (NULL , ustatus );
1030+ 		intl_error_set_custom_msg (NULL , "Error on grapheme_get_break_iterator for argument #2 ($string2)" , 0 );
1031+ 		efree (ustring2 );
1032+ 		efree (ustring1 );
1033+ 		ubrk_close (bi2 );
1034+ 		ubrk_close (bi1 );
1035+ 		RETURN_FALSE ;
1036+ 	}
1037+ 	ubrk_setText (bi1 , ustring1 , ustring1_len , & ustatus );
1038+ 
1039+ 	if  (U_FAILURE (ustatus )) {
1040+ 		intl_error_set_code (NULL , ustatus );
1041+ 
1042+ 		intl_error_set_custom_msg (NULL , "Error on ubrk_setText for argument #1 ($string1)" , 0 );
1043+ 		efree (ustring2 );
1044+ 		efree (ustring1 );
1045+ 		ubrk_close (bi2 );
1046+ 		ubrk_close (bi1 );
1047+ 		RETURN_FALSE ;
1048+ 	}
1049+ 
1050+ 	ubrk_setText (bi2 , ustring2 , ustring2_len , & ustatus );
1051+ 	if  (U_FAILURE (ustatus )) {
1052+ 		intl_error_set_code (NULL , ustatus );
1053+ 
1054+ 		intl_error_set_custom_msg (NULL , "Error on ubrk_setText for argument #2 ($string2)" , 0 );
1055+ 		efree (ustring2 );
1056+ 		efree (ustring1 );
1057+ 		ubrk_close (bi2 );
1058+ 		ubrk_close (bi1 );
1059+ 		RETURN_FALSE ;
1060+ 	}
1061+ 	UCollator  * collator  =  ucol_open ("" , & ustatus );
1062+ 	if  (U_FAILURE (ustatus )) {
1063+ 		intl_error_set_code (NULL , ustatus );
1064+ 
1065+ 		intl_error_set_custom_msg (NULL , "Error on ucol_open" , 0 );
1066+ 		efree (ustring2 );
1067+ 		efree (ustring1 );
1068+ 		ubrk_close (bi2 );
1069+ 		ubrk_close (bi1 );
1070+ 		ucol_close (collator );
1071+ 		RETURN_FALSE ;
1072+ 	}
1073+ 
1074+ 	zend_long  * p1 , * p2 , * tmp ;
1075+ 	p1  =  safe_emalloc (strlen_2  +  1 , sizeof (zend_long ), 0 );
1076+ 	p2  =  safe_emalloc (strlen_2  +  1 , sizeof (zend_long ), 0 );
1077+ 
1078+ 	for  (i2  =  0 ; i2  <= strlen_2 ; i2 ++ ) {
1079+ 		p1 [i2 ] =  i2  *  cost_ins ;
1080+ 	}
1081+ 
1082+ 	int32_t  current1  =  0 ;
1083+ 	int32_t  current2  =  0 ;
1084+ 	int32_t  pos1  =  0 ;
1085+ 	int32_t  pos2  =  0 ;
1086+ 
1087+ 	while  (true) {
1088+ 		current1  =  ubrk_current (bi1 );
1089+ 		pos1  =  ubrk_next (bi1 );
1090+ 		if  (pos1  ==  UBRK_DONE ) {
1091+ 			break ;
1092+ 		}
1093+ 		p2 [0 ] =  p1 [0 ] +  cost_del ;
1094+ 		for  (i2  =  0 , pos2  =  0 ; pos2  !=  UBRK_DONE ; i2 ++ ) {
1095+ 			current2  =  ubrk_current (bi2 );
1096+ 			pos2  =  ubrk_next (bi2 );
1097+ 			if  (pos2  ==  UBRK_DONE ) {
1098+ 				break ;
1099+ 			}
1100+ 			if  (ucol_strcoll (collator , ustring1  +  current1 , pos1  -  current1 , ustring2  +  current2 , pos2  -  current2 ) ==  UCOL_EQUAL ) {
1101+ 				c0  =  p1 [i2 ];
1102+ 			} else  {
1103+ 				c0  =  p1 [i2 ] +  cost_rep ;
1104+ 			}
1105+ 			c1  =  p1 [i2  +  1 ] +  cost_del ;
1106+ 			if  (c1  <  c0 ) {
1107+ 				c0  =  c1 ;
1108+ 			}
1109+ 			c2  =  p2 [i2 ] +  cost_ins ;
1110+ 			if  (c2  <  c0 ) {
1111+ 				c0  =  c2 ;
1112+ 			}
1113+ 			p2 [i2  +  1 ] =  c0 ;
1114+ 		}
1115+ 		ubrk_first (bi2 );
1116+ 		tmp  =  p1 ;
1117+ 		p1  =  p2 ;
1118+ 		p2  =  tmp ;
1119+ 	}
1120+ 
1121+ 	ucol_close (collator );
1122+ 
1123+ 	ubrk_close (bi1 );
1124+ 	ubrk_close (bi2 );
1125+ 
1126+ 	efree (ustring1 );
1127+ 	efree (ustring2 );
1128+ 
1129+ 	retval  =  p1 [strlen_2 ];
1130+ 
1131+ 	efree (p1 );
1132+ 	efree (p2 );
1133+ 	RETURN_LONG (retval );
1134+ }
1135+ 
9211136/* }}} */ 
0 commit comments