11
11
12
12
use Psr \Http \Message \UriInterface ;
13
13
14
- use function array_key_exists ;
15
14
use function array_keys ;
16
- use function count ;
17
15
use function explode ;
18
16
use function get_class ;
19
17
use function gettype ;
23
21
use function is_string ;
24
22
use function ltrim ;
25
23
use function parse_url ;
24
+ use function preg_match ;
26
25
use function preg_replace ;
27
26
use function preg_replace_callback ;
28
27
use function rawurlencode ;
29
28
use function sprintf ;
29
+ use function str_split ;
30
30
use function strpos ;
31
31
use function strtolower ;
32
32
use function substr ;
@@ -560,6 +560,8 @@ private function filterScheme(string $scheme) : string
560
560
*/
561
561
private function filterUserInfoPart (string $ part ) : string
562
562
{
563
+ $ part = $ this ->filterInvalidUtf8 ($ part );
564
+
563
565
// Note the addition of `%` to initial charset; this allows `|` portion
564
566
// to match and thus prevent double-encoding.
565
567
return preg_replace_callback (
@@ -574,6 +576,8 @@ private function filterUserInfoPart(string $part) : string
574
576
*/
575
577
private function filterPath (string $ path ) : string
576
578
{
579
+ $ path = $ this ->filterInvalidUtf8 ($ path );
580
+
577
581
$ path = preg_replace_callback (
578
582
'/(?:[^ ' . self ::CHAR_UNRESERVED . ')(:@&=\+\$,\/;%]+|%(?![A-Fa-f0-9]{2}))/u ' ,
579
583
[$ this , 'urlEncodeChar ' ],
@@ -594,6 +598,25 @@ private function filterPath(string $path) : string
594
598
return '/ ' . ltrim ($ path , '/ ' );
595
599
}
596
600
601
+ /**
602
+ * Encode invalid UTF-8 characters in given string. All other characters are unchanged.
603
+ */
604
+ private function filterInvalidUtf8 (string $ string ) : string
605
+ {
606
+ if (preg_match ('//u ' , $ string )) {
607
+ return $ string ;
608
+ }
609
+
610
+ $ letters = str_split ($ string );
611
+ foreach ($ letters as $ i => $ letter ) {
612
+ if (! preg_match ('//u ' , $ letter )) {
613
+ $ letters [$ i ] = $ this ->urlEncodeChar ([$ letter ]);
614
+ }
615
+ }
616
+
617
+ return implode ('' , $ letters );
618
+ }
619
+
597
620
/**
598
621
* Filter a query string to ensure it is propertly encoded.
599
622
*
@@ -654,6 +677,8 @@ private function filterFragment(string $fragment) : string
654
677
*/
655
678
private function filterQueryOrFragment (string $ value ) : string
656
679
{
680
+ $ value = $ this ->filterInvalidUtf8 ($ value );
681
+
657
682
return preg_replace_callback (
658
683
'/(?:[^ ' . self ::CHAR_UNRESERVED . self ::CHAR_SUB_DELIMS . '%:@\/\?]+|%(?![A-Fa-f0-9]{2}))/u ' ,
659
684
[$ this , 'urlEncodeChar ' ],
0 commit comments