Skip to content
This repository was archived by the owner on Jan 29, 2020. It is now read-only.

Commit 49a2b96

Browse files
committed
Merge branch 'hotfix/372-invalid-utf8-in-uri' into develop
Forward port #372 Conflicts: CHANGELOG.md
2 parents 4930fd0 + 6dcf9e7 commit 49a2b96

File tree

3 files changed

+35
-5
lines changed

3 files changed

+35
-5
lines changed

CHANGELOG.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ All notable changes to this project will be documented in this file, in reverse
2424

2525
- Nothing.
2626

27-
## 2.1.5 - TBD
27+
## 2.1.5 - 2019-10-10
2828

2929
### Added
3030

@@ -44,7 +44,7 @@ All notable changes to this project will be documented in this file, in reverse
4444

4545
### Fixed
4646

47-
- Nothing.
47+
- [#372](https://github.com/zendframework/zend-diactoros/pull/372) fixes issues that occur in the `Zend\Diactoros\Uri` class when invalid UTF-8 characters are present the user-info, path, or query string, ensuring they are URL-encoded before being consumed. Previously, such characters could result in a fatal error, which was particularly problematic when marshaling the request URI for an application request cycle.
4848

4949
## 2.1.4 - 2019-10-08
5050

src/Uri.php

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,7 @@
1111

1212
use Psr\Http\Message\UriInterface;
1313

14-
use function array_key_exists;
1514
use function array_keys;
16-
use function count;
1715
use function explode;
1816
use function get_class;
1917
use function gettype;
@@ -23,10 +21,12 @@
2321
use function is_string;
2422
use function ltrim;
2523
use function parse_url;
24+
use function preg_match;
2625
use function preg_replace;
2726
use function preg_replace_callback;
2827
use function rawurlencode;
2928
use function sprintf;
29+
use function str_split;
3030
use function strpos;
3131
use function strtolower;
3232
use function substr;
@@ -560,6 +560,8 @@ private function filterScheme(string $scheme) : string
560560
*/
561561
private function filterUserInfoPart(string $part) : string
562562
{
563+
$part = $this->filterInvalidUtf8($part);
564+
563565
// Note the addition of `%` to initial charset; this allows `|` portion
564566
// to match and thus prevent double-encoding.
565567
return preg_replace_callback(
@@ -574,6 +576,8 @@ private function filterUserInfoPart(string $part) : string
574576
*/
575577
private function filterPath(string $path) : string
576578
{
579+
$path = $this->filterInvalidUtf8($path);
580+
577581
$path = preg_replace_callback(
578582
'/(?:[^' . self::CHAR_UNRESERVED . ')(:@&=\+\$,\/;%]+|%(?![A-Fa-f0-9]{2}))/u',
579583
[$this, 'urlEncodeChar'],
@@ -594,6 +598,26 @@ private function filterPath(string $path) : string
594598
return '/' . ltrim($path, '/');
595599
}
596600

601+
/**
602+
* Encode invalid UTF-8 characters in given string. All other characters are unchanged.
603+
*/
604+
private function filterInvalidUtf8(string $string) : string
605+
{
606+
// check if given string contains only valid UTF-8 characters
607+
if (preg_match('//u', $string)) {
608+
return $string;
609+
}
610+
611+
$letters = str_split($string);
612+
foreach ($letters as $i => $letter) {
613+
if (! preg_match('//u', $letter)) {
614+
$letters[$i] = $this->urlEncodeChar([$letter]);
615+
}
616+
}
617+
618+
return implode('', $letters);
619+
}
620+
597621
/**
598622
* Filter a query string to ensure it is propertly encoded.
599623
*
@@ -654,6 +678,8 @@ private function filterFragment(string $fragment) : string
654678
*/
655679
private function filterQueryOrFragment(string $value) : string
656680
{
681+
$value = $this->filterInvalidUtf8($value);
682+
657683
return preg_replace_callback(
658684
'/(?:[^' . self::CHAR_UNRESERVED . self::CHAR_SUB_DELIMS . '%:@\/\?]+|%(?![A-Fa-f0-9]{2}))/u',
659685
[$this, 'urlEncodeChar'],

test/UriTest.php

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ public function userInfoProvider()
101101
'at' => ['[email protected]', 'cred@foo', 'user%40example.com:cred%40foo'],
102102
'percent' => ['%25', '%25', '%25:%25'],
103103
'invalid-enc' => ['%ZZ', '%GG', '%25ZZ:%25GG'],
104+
'invalid-utf' => ["\x21\x92", '!?', '!%92:!%3F'],
104105
];
105106
// @codingStandardsIgnoreEnd
106107
}
@@ -616,7 +617,9 @@ public function utf8PathsDataProvider()
616617
{
617618
return [
618619
['http://example.com/тестовый_путь/', '/тестовый_путь/'],
619-
['http://example.com/ουτοπία/', '/ουτοπία/']
620+
['http://example.com/ουτοπία/', '/ουτοπία/'],
621+
["http://example.com/\x21\x92", '/%21%92'],
622+
['http://example.com/!?', '/%21'],
620623
];
621624
}
622625

@@ -635,6 +638,7 @@ public function utf8QueryStringsDataProvider()
635638
return [
636639
['http://example.com/?q=тестовый_путь', 'q=тестовый_путь'],
637640
['http://example.com/?q=ουτοπία', 'q=ουτοπία'],
641+
["http://example.com/?q=\x21\x92", 'q=!%92'],
638642
];
639643
}
640644

0 commit comments

Comments
 (0)