Skip to content
This repository was archived by the owner on Jan 29, 2020. It is now read-only.

Commit ec0c147

Browse files
committed
Fix encoding invalid UTF-8 characters
1 parent d0d8a96 commit ec0c147

File tree

2 files changed

+31
-4
lines changed

2 files changed

+31
-4
lines changed

src/Uri.php

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,7 @@
1111

1212
use Psr\Http\Message\UriInterface;
1313

14-
use function array_key_exists;
1514
use function array_keys;
16-
use function count;
1715
use function explode;
1816
use function get_class;
1917
use function gettype;
@@ -23,10 +21,12 @@
2321
use function is_string;
2422
use function ltrim;
2523
use function parse_url;
24+
use function preg_match;
2625
use function preg_replace;
2726
use function preg_replace_callback;
2827
use function rawurlencode;
2928
use function sprintf;
29+
use function str_split;
3030
use function strpos;
3131
use function strtolower;
3232
use function substr;
@@ -560,6 +560,8 @@ private function filterScheme(string $scheme) : string
560560
*/
561561
private function filterUserInfoPart(string $part) : string
562562
{
563+
$part = $this->filterInvalidUtf8($part);
564+
563565
// Note the addition of `%` to initial charset; this allows `|` portion
564566
// to match and thus prevent double-encoding.
565567
return preg_replace_callback(
@@ -574,6 +576,8 @@ private function filterUserInfoPart(string $part) : string
574576
*/
575577
private function filterPath(string $path) : string
576578
{
579+
$path = $this->filterInvalidUtf8($path);
580+
577581
$path = preg_replace_callback(
578582
'/(?:[^' . self::CHAR_UNRESERVED . ')(:@&=\+\$,\/;%]+|%(?![A-Fa-f0-9]{2}))/u',
579583
[$this, 'urlEncodeChar'],
@@ -594,6 +598,25 @@ private function filterPath(string $path) : string
594598
return '/' . ltrim($path, '/');
595599
}
596600

601+
/**
602+
* Encode invalid UTF-8 characters in given string. All other characters are unchanged.
603+
*/
604+
private function filterInvalidUtf8(string $string) : string
605+
{
606+
if (preg_match('//u', $string)) {
607+
return $string;
608+
}
609+
610+
$letters = str_split($string);
611+
foreach ($letters as $i => $letter) {
612+
if (! preg_match('//u', $letter)) {
613+
$letters[$i] = $this->urlEncodeChar([$letter]);
614+
}
615+
}
616+
617+
return implode('', $letters);
618+
}
619+
597620
/**
598621
* Filter a query string to ensure it is propertly encoded.
599622
*
@@ -654,6 +677,8 @@ private function filterFragment(string $fragment) : string
654677
*/
655678
private function filterQueryOrFragment(string $value) : string
656679
{
680+
$value = $this->filterInvalidUtf8($value);
681+
657682
return preg_replace_callback(
658683
'/(?:[^' . self::CHAR_UNRESERVED . self::CHAR_SUB_DELIMS . '%:@\/\?]+|%(?![A-Fa-f0-9]{2}))/u',
659684
[$this, 'urlEncodeChar'],

test/UriTest.php

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ public function userInfoProvider()
101101
'at' => ['[email protected]', 'cred@foo', 'user%40example.com:cred%40foo'],
102102
'percent' => ['%25', '%25', '%25:%25'],
103103
'invalid-enc' => ['%ZZ', '%GG', '%25ZZ:%25GG'],
104+
'invalid-utf' => ["\x21\x92", '!?', '!%92:!%3F'],
104105
];
105106
// @codingStandardsIgnoreEnd
106107
}
@@ -617,7 +618,8 @@ public function utf8PathsDataProvider()
617618
return [
618619
['http://example.com/тестовый_путь/', '/тестовый_путь/'],
619620
['http://example.com/ουτοπία/', '/ουτοπία/'],
620-
["http://example.com/\x21\x92", "\x21\x92"],
621+
["http://example.com/\x21\x92", '/%21%92'],
622+
['http://example.com/!?', '/%21'],
621623
];
622624
}
623625

@@ -636,7 +638,7 @@ public function utf8QueryStringsDataProvider()
636638
return [
637639
['http://example.com/?q=тестовый_путь', 'q=тестовый_путь'],
638640
['http://example.com/?q=ουτοπία', 'q=ουτοπία'],
639-
["http://example.com/?q=\x21\x92", "?q=\x21\x92"],
641+
["http://example.com/?q=\x21\x92", 'q=!%92'],
640642
];
641643
}
642644

0 commit comments

Comments
 (0)