Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 82 additions & 1 deletion src/Intl/Grapheme/Grapheme.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@

namespace Symfony\Polyfill\Intl\Grapheme;

\define('SYMFONY_GRAPHEME_CLUSTER_RX', ((float) \PCRE_VERSION < 10 ? (float) \PCRE_VERSION >= 8.32 : (float) \PCRE_VERSION >= 10.39) ? '\X' : Grapheme::GRAPHEME_CLUSTER_RX);
\define('SYMFONY_GRAPHEME_CLUSTER_RX',
(false !== @preg_match('/\X/u', "a"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change looks suspicious to me. We were explicitly checking the PCRE version because older versions of PCRE also have a broken implementation of \X (there is even an open discussion mentioning that we might need to increase the bound from which we use the native feature)

? '\X'
: Grapheme::GRAPHEME_CLUSTER_RX
);


/**
* Partial intl implementation in pure PHP.
Expand Down Expand Up @@ -222,6 +227,82 @@ public static function grapheme_str_split($s, $len = 1)

return $chunks;
}
/**
* @param string $string1
* @param string $string2
* @param int $insertion_cost
* @param int $replacement_cost
* @param int $deletion_cost
*
* @return int|false
*
* @see https://wiki.php.net/rfc/grapheme_levenshtein
*/
public static function grapheme_levenshtein($string1, $string2, $insertion_cost = 1, $replacement_cost = 1, $deletion_cost = 1)
{
// Cast (PHP does this)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use native parameter types, so that PHP actually does it.

$string1 = (string) $string1;
$string2 = (string) $string2;
$insertion_cost = (int) $insertion_cost;
$replacement_cost = (int) $replacement_cost;
$deletion_cost = (int) $deletion_cost;

// Validate UTF-8 (RFC: return false if invalid)
if (1 !== preg_match('//u', $string1) || 1 !== preg_match('//u', $string2)) {
return false;
}

// Costs must be >= 0
if ($insertion_cost < 0 || $replacement_cost < 0 || $deletion_cost < 0) {
if (\PHP_VERSION_ID < 80000) {
return false;
}
throw new \ValueError('grapheme_levenshtein(): costs must be greater than or equal to 0');
}

// Grapheme cluster segmentation (same logic as grapheme_str_split)
preg_match_all('/(' . SYMFONY_GRAPHEME_CLUSTER_RX . ')/u', $string1, $m1);
preg_match_all('/(' . SYMFONY_GRAPHEME_CLUSTER_RX . ')/u', $string2, $m2);

$a = $m1[0] ?? [];
$b = $m2[0] ?? [];

$lenA = count($a);
$lenB = count($b);

// Edge cases
if (0 === $lenA) {
return $lenB * $insertion_cost;
}
if (0 === $lenB) {
return $lenA * $deletion_cost;
}

// Levenshtein DP matrix
$dp = array_fill(0, $lenA + 1, array_fill(0, $lenB + 1, 0));

for ($i = 1; $i <= $lenA; $i++) {
$dp[$i][0] = $dp[$i - 1][0] + $deletion_cost;
}
for ($j = 1; $j <= $lenB; $j++) {
$dp[0][$j] = $dp[0][$j - 1] + $insertion_cost;
}

for ($i = 1; $i <= $lenA; $i++) {
for ($j = 1; $j <= $lenB; $j++) {
$cost = ($a[$i - 1] === $b[$j - 1]) ? 0 : $replacement_cost;

$dp[$i][$j] = min(
$dp[$i - 1][$j] + $deletion_cost, // delete
$dp[$i][$j - 1] + $insertion_cost, // insert
$dp[$i - 1][$j - 1] + $cost // replace
);
}
}

return $dp[$lenA][$lenB];
}


private static function grapheme_position($s, $needle, $offset, $mode)
{
Expand Down
8 changes: 8 additions & 0 deletions src/Intl/Grapheme/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,11 @@ function grapheme_substr($string, $offset, $length = null) { return p\Grapheme::
if (!function_exists('grapheme_str_split')) {
function grapheme_str_split($string, $length = 1) { return p\Grapheme::grapheme_str_split($string, $length); }
}

if (!function_exists('grapheme_levenshtein')) {
function grapheme_levenshtein($string1, $string2, $insertion_cost = 1, $replacement_cost = 1, $deletion_cost = 1)
{
return Grapheme::grapheme_levenshtein($string1, $string2, $insertion_cost, $replacement_cost, $deletion_cost);
}
}

8 changes: 8 additions & 0 deletions src/Php85/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
*/

use Symfony\Polyfill\Php85 as p;
use Symfony\Polyfill\Intl\Grapheme\Grapheme;

if (\PHP_VERSION_ID >= 80500) {
return;
Expand All @@ -30,3 +31,10 @@ function array_first(array $array) { return p\Php85::array_first($array); }
if (!function_exists('array_last')) {
function array_last(array $array) { return p\Php85::array_last($array); }
}

if (\extension_loaded('intl') && !function_exists('grapheme_levenshtein')) {
function grapheme_levenshtein($string1, $string2, $insertion_cost = 1, $replacement_cost = 1, $deletion_cost = 1)
{
return Grapheme::grapheme_levenshtein($string1, $string2, $insertion_cost, $replacement_cost, $deletion_cost);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The implementation must be duplicated in the Php85 package if we want to provide it here, not call the other package.

}
}
28 changes: 28 additions & 0 deletions tests/Intl/Grapheme/grapheme_levenshtein.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?php

use Symfony\Polyfill\Intl\Grapheme\Grapheme;

return [
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this file does not make sense. It does not provide a phpt test, and we use PHPUnit tests anyway.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just wanted to make test file I'll look more deeper in this implementation

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the file you added is not a test file at all. It is not testing anything.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@stof I want to know is this function is needed to be implemented in here grapheme and php85

// Basic
[0, 'abc', 'abc'],
[1, 'abc', 'abd'],
[2, 'kitten', 'sitting'],

// Multibyte
[1, 'àbc', 'abc'],
[2, 'àbç', 'aBc'],

// Emoji
[0, '😊', '😊'],
[1, '😊', '😂'],
[1, "👨‍👩‍👧‍👦", "👨‍👩‍👦"],

// Empty
[0, '', ''],
[3, '', 'abc'],
[4, '', "👨‍👩‍👧‍👦"],

// Different lengths
[3, 'foo', 'foobar'],
[1, "🙂🙂", "🙂"],
];
Loading