1111
1212use Psr \Http \Message \UriInterface ;
1313
14- use function array_key_exists ;
1514use function array_keys ;
16- use function count ;
1715use function explode ;
1816use function get_class ;
1917use function gettype ;
2321use function is_string ;
2422use function ltrim ;
2523use function parse_url ;
24+ use function preg_match ;
2625use function preg_replace ;
2726use function preg_replace_callback ;
2827use function rawurlencode ;
2928use function sprintf ;
29+ use function str_split ;
3030use function strpos ;
3131use function strtolower ;
3232use function substr ;
@@ -560,6 +560,8 @@ private function filterScheme(string $scheme) : string
560560 */
561561 private function filterUserInfoPart (string $ part ) : string
562562 {
563+ $ part = $ this ->filterInvalidUtf8 ($ part );
564+
563565 // Note the addition of `%` to initial charset; this allows `|` portion
564566 // to match and thus prevent double-encoding.
565567 return preg_replace_callback (
@@ -574,6 +576,8 @@ private function filterUserInfoPart(string $part) : string
574576 */
575577 private function filterPath (string $ path ) : string
576578 {
579+ $ path = $ this ->filterInvalidUtf8 ($ path );
580+
577581 $ path = preg_replace_callback (
578582 '/(?:[^ ' . self ::CHAR_UNRESERVED . ')(:@&=\+\$,\/;%]+|%(?![A-Fa-f0-9]{2}))/u ' ,
579583 [$ this , 'urlEncodeChar ' ],
@@ -594,6 +598,25 @@ private function filterPath(string $path) : string
594598 return '/ ' . ltrim ($ path , '/ ' );
595599 }
596600
601+ /**
602+ * Encode invalid UTF-8 characters in given string. All other characters are unchanged.
603+ */
604+ private function filterInvalidUtf8 (string $ string ) : string
605+ {
606+ if (preg_match ('//u ' , $ string )) {
607+ return $ string ;
608+ }
609+
610+ $ letters = str_split ($ string );
611+ foreach ($ letters as $ i => $ letter ) {
612+ if (! preg_match ('//u ' , $ letter )) {
613+ $ letters [$ i ] = $ this ->urlEncodeChar ([$ letter ]);
614+ }
615+ }
616+
617+ return implode ('' , $ letters );
618+ }
619+
597620 /**
598621 * Filter a query string to ensure it is propertly encoded.
599622 *
@@ -654,6 +677,8 @@ private function filterFragment(string $fragment) : string
654677 */
655678 private function filterQueryOrFragment (string $ value ) : string
656679 {
680+ $ value = $ this ->filterInvalidUtf8 ($ value );
681+
657682 return preg_replace_callback (
658683 '/(?:[^ ' . self ::CHAR_UNRESERVED . self ::CHAR_SUB_DELIMS . '%:@\/\?]+|%(?![A-Fa-f0-9]{2}))/u ' ,
659684 [$ this , 'urlEncodeChar ' ],
0 commit comments