@@ -719,6 +719,65 @@ def test_urlsplit_strip_url(self):
719719 self .assertEqual (p .scheme , "https" )
720720 self .assertEqual (p .geturl (), "https://www.python.org/" )
721721
722+ def test_urlsplit_strip_url (self ):
723+ noise = bytes (range (0 , 0x20 + 1 ))
724+ base_url = "http://User:[email protected] :080/doc/?query=yes#frag" 725+
726+ url = noise .decode ("utf-8" ) + base_url
727+ p = urllib .parse .urlsplit (url )
728+ self .assertEqual (p .scheme , "http" )
729+ self .
assertEqual (
p .
netloc ,
"User:[email protected] :080" )
730+ self .assertEqual (p .path , "/doc/" )
731+ self .assertEqual (p .query , "query=yes" )
732+ self .assertEqual (p .fragment , "frag" )
733+ self .assertEqual (p .username , "User" )
734+ self .assertEqual (p .password , "Pass" )
735+ self .assertEqual (p .hostname , "www.python.org" )
736+ self .assertEqual (p .port , 80 )
737+ self .assertEqual (p .geturl (), base_url )
738+
739+ url = noise + base_url .encode ("utf-8" )
740+ p = urllib .parse .urlsplit (url )
741+ self .assertEqual (p .scheme , b"http" )
742+ self .
assertEqual (
p .
netloc ,
b"User:[email protected] :080" )
743+ self .assertEqual (p .path , b"/doc/" )
744+ self .assertEqual (p .query , b"query=yes" )
745+ self .assertEqual (p .fragment , b"frag" )
746+ self .assertEqual (p .username , b"User" )
747+ self .assertEqual (p .password , b"Pass" )
748+ self .assertEqual (p .hostname , b"www.python.org" )
749+ self .assertEqual (p .port , 80 )
750+ self .assertEqual (p .geturl (), base_url .encode ("utf-8" ))
751+
752+ # Test that trailing space is preserved as some applications rely on
753+ # this within query strings.
754+ query_spaces_url = "https://www.python.org:88/doc/?query= "
755+ p = urllib .parse .urlsplit (noise .decode ("utf-8" ) + query_spaces_url )
756+ self .assertEqual (p .scheme , "https" )
757+ self .assertEqual (p .netloc , "www.python.org:88" )
758+ self .assertEqual (p .path , "/doc/" )
759+ self .assertEqual (p .query , "query= " )
760+ self .assertEqual (p .port , 88 )
761+ self .assertEqual (p .geturl (), query_spaces_url )
762+
763+ p = urllib .parse .urlsplit ("www.pypi.org " )
764+ # That "hostname" gets considered a "path" due to the
765+ # trailing space and our existing logic... YUCK...
766+ # and re-assembles via geturl aka unurlsplit into the original.
767+ # django.core.validators.URLValidator (at least through v3.2) relies on
768+ # this, for better or worse, to catch it in a ValidationError via its
769+ # regular expressions.
770+ # Here we test the basic round trip concept of such a trailing space.
771+ self .assertEqual (urllib .parse .urlunsplit (p ), "www.pypi.org " )
772+
773+ # with scheme as cache-key
774+ url = "//www.python.org/"
775+ scheme = noise .decode ("utf-8" ) + "https" + noise .decode ("utf-8" )
776+ for _ in range (2 ):
777+ p = urllib .parse .urlsplit (url , scheme = scheme )
778+ self .assertEqual (p .scheme , "https" )
779+ self .assertEqual (p .geturl (), "https://www.python.org/" )
780+
722781 def test_attributes_bad_port (self ):
723782 """Check handling of invalid ports."""
724783 for bytes in (False , True ):
0 commit comments