Skip to content

Commit 33d7f71

Browse files
authored
Strings::toAscii() fixed to correctly convert german umlauts (BC break) (#240)
This also applies on Strings::webalize() method.
1 parent 2468dd3 commit 33d7f71

File tree

3 files changed

+5
-4
lines changed

3 files changed

+5
-4
lines changed

src/Utils/Strings.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ public static function toAscii(string $s): string
158158
$s = self::pcre('preg_replace', ['#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s]);
159159

160160
// transliteration (by Transliterator and iconv) is not optimal, replace some characters directly
161-
$s = strtr($s, ["\u{201E}" => '"', "\u{201C}" => '"', "\u{201D}" => '"', "\u{201A}" => "'", "\u{2018}" => "'", "\u{2019}" => "'", "\u{B0}" => '^', "\u{42F}" => 'Ya', "\u{44F}" => 'ya', "\u{42E}" => 'Yu', "\u{44E}" => 'yu']); // „ “ ” ‚ ‘ ’ ° Я я Ю ю
161+
$s = strtr($s, ["\u{201E}" => '"', "\u{201C}" => '"', "\u{201D}" => '"', "\u{201A}" => "'", "\u{2018}" => "'", "\u{2019}" => "'", "\u{B0}" => '^', "\u{42F}" => 'Ya', "\u{44F}" => 'ya', "\u{42E}" => 'Yu', "\u{44E}" => 'yu', "\u{c4}" => 'Ae', "\u{d6}" => 'Oe', "\u{dc}" => 'Ue', "\u{1e9e}" => 'Ss', "\u{e4}" => 'ae', "\u{f6}" => 'oe', "\u{fc}" => 'ue', "\u{df}" => 'ss']); // „ “ ” ‚ ‘ ’ ° Я я Ю ю Ä Ö Ü ẞ ä ö ü ß
162162
if ($iconv !== 'libiconv') {
163163
$s = strtr($s, ["\u{AE}" => '(R)', "\u{A9}" => '(c)', "\u{2026}" => '...', "\u{AB}" => '<<', "\u{BB}" => '>>', "\u{A3}" => 'lb', "\u{A5}" => 'yen', "\u{B2}" => '^2', "\u{B3}" => '^3', "\u{B5}" => 'u', "\u{B9}" => '^1', "\u{BA}" => 'o', "\u{BF}" => '?', "\u{2CA}" => "'", "\u{2CD}" => '_', "\u{2DD}" => '"', "\u{1FEF}" => '', "\u{20AC}" => 'EUR', "\u{2122}" => 'TM', "\u{212E}" => 'e', "\u{2190}" => '<-', "\u{2191}" => '^', "\u{2192}" => '->', "\u{2193}" => 'V', "\u{2194}" => '<->']); // ® © … « » £ ¥ ² ³ µ ¹ º ¿ ˊ ˍ ˝ ` € ™ ℮ ← ↑ → ↓ ↔
164164
}

tests/Utils/Strings.toAscii().phpt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use Tester\Assert;
1313
require __DIR__ . '/../bootstrap.php';
1414

1515

16-
Assert::same('ZLUTOUCKY KUN oooo--', Strings::toAscii("\u{17D}LU\u{164}OU\u{10C}K\u{DD} K\u{16E}\u{147} \u{F6}\u{151}\u{F4}o\x2d\u{2013}")); // ŽLUŤOUČKÝ KŮŇ öőôo
16+
Assert::same('ZLUTOUCKY KUN oeooo--', Strings::toAscii("\u{17D}LU\u{164}OU\u{10C}K\u{DD} K\u{16E}\u{147} \u{F6}\u{151}\u{F4}o\x2d\u{2013}")); // ŽLUŤOUČKÝ KŮŇ öőôo
1717
Assert::same('Zlutoucky kun', Strings::toAscii("Z\u{30C}lut\u{30C}ouc\u{30C}ky\u{301} ku\u{30A}n\u{30C}")); // Žluťoučký kůň with combining characters
1818
Assert::same('Z `\'"^~?', Strings::toAscii("\u{17D} `'\"^~?"));
1919
Assert::same('"""\'\'\'>><<^', Strings::toAscii("\u{201E}\u{201C}\u{201D}\u{201A}\u{2018}\u{2019}\u{BB}\u{AB}\u{B0}")); // „“”‚‘’»«°
@@ -31,3 +31,4 @@ if (class_exists('Transliterator') && \Transliterator::create('Any-Latin; Latin-
3131

3232

3333
Assert::same('Ya ya Yu yu', Strings::toAscii("\u{42F} \u{44F} \u{42E} \u{44E}")); // Я я Ю ю
34+
Assert::same('Ae Oe Ue Ss ae oe ue ss', Strings::toAscii("\u{c4} \u{d6} \u{dc} \u{1e9e} \u{e4} \u{f6} \u{fc} \u{df}")); // Ä Ö Ü ẞ ä ö ü ß

tests/Utils/Strings.webalize().phpt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ use Tester\Assert;
1313
require __DIR__ . '/../bootstrap.php';
1414

1515

16-
Assert::same('zlutoucky-kun-oooo', Strings::webalize("&\u{17D}LU\u{164}OU\u{10C}K\u{DD} K\u{16E}\u{147} \u{F6}\u{151}\u{F4}o!")); // &ŽLUŤOUČKÝ KŮŇ öőôo!
17-
Assert::same('ZLUTOUCKY-KUN-oooo', Strings::webalize("&\u{17D}LU\u{164}OU\u{10C}K\u{DD} K\u{16E}\u{147} \u{F6}\u{151}\u{F4}o!", null, false)); // &ŽLUŤOUČKÝ KŮŇ öőôo!
16+
Assert::same('zlutoucky-kun-oeooo', Strings::webalize("&\u{17D}LU\u{164}OU\u{10C}K\u{DD} K\u{16E}\u{147} \u{F6}\u{151}\u{F4}o!")); // &ŽLUŤOUČKÝ KŮŇ öőôo!
17+
Assert::same('ZLUTOUCKY-KUN-oeooo', Strings::webalize("&\u{17D}LU\u{164}OU\u{10C}K\u{DD} K\u{16E}\u{147} \u{F6}\u{151}\u{F4}o!", null, false)); // &ŽLUŤOUČKÝ KŮŇ öőôo!
1818
if (class_exists('Transliterator') && \Transliterator::create('Any-Latin; Latin-ASCII')) {
1919
Assert::same('1-4-!', Strings::webalize("\u{BC} !", '!'));
2020
}

0 commit comments

Comments
 (0)