@@ -649,14 +649,73 @@ def test_urlsplit_remove_unsafe_bytes(self):
649649 self .assertEqual (p .scheme , "http" )
650650 self .assertEqual (p .geturl (), "http://www.python.org/javascript:alert('msg')/?query=something#fragment" )
651651
652+ def test_urlsplit_strip_url (self ):
653+ noise = bytes (range (0 , 0x20 + 1 ))
654+ base_url = "http://User:[email protected] :080/doc/?query=yes#frag" 655+
656+ url = noise .decode ("utf-8" ) + base_url
657+ p = urllib .parse .urlsplit (url )
658+ self .assertEqual (p .scheme , "http" )
659+ self .
assertEqual (
p .
netloc ,
"User:[email protected] :080" )
660+ self .assertEqual (p .path , "/doc/" )
661+ self .assertEqual (p .query , "query=yes" )
662+ self .assertEqual (p .fragment , "frag" )
663+ self .assertEqual (p .username , "User" )
664+ self .assertEqual (p .password , "Pass" )
665+ self .assertEqual (p .hostname , "www.python.org" )
666+ self .assertEqual (p .port , 80 )
667+ self .assertEqual (p .geturl (), base_url )
668+
669+ url = noise + base_url .encode ("utf-8" )
670+ p = urllib .parse .urlsplit (url )
671+ self .assertEqual (p .scheme , b"http" )
672+ self .
assertEqual (
p .
netloc ,
b"User:[email protected] :080" )
673+ self .assertEqual (p .path , b"/doc/" )
674+ self .assertEqual (p .query , b"query=yes" )
675+ self .assertEqual (p .fragment , b"frag" )
676+ self .assertEqual (p .username , b"User" )
677+ self .assertEqual (p .password , b"Pass" )
678+ self .assertEqual (p .hostname , b"www.python.org" )
679+ self .assertEqual (p .port , 80 )
680+ self .assertEqual (p .geturl (), base_url .encode ("utf-8" ))
681+
682+ # Test that trailing space is preserved as some applications rely on
683+ # this within query strings.
684+ query_spaces_url = "https://www.python.org:88/doc/?query= "
685+ p = urllib .parse .urlsplit (noise .decode ("utf-8" ) + query_spaces_url )
686+ self .assertEqual (p .scheme , "https" )
687+ self .assertEqual (p .netloc , "www.python.org:88" )
688+ self .assertEqual (p .path , "/doc/" )
689+ self .assertEqual (p .query , "query= " )
690+ self .assertEqual (p .port , 88 )
691+ self .assertEqual (p .geturl (), query_spaces_url )
692+
693+ p = urllib .parse .urlsplit ("www.pypi.org " )
694+ # That "hostname" gets considered a "path" due to the
695+ # trailing space and our existing logic... YUCK...
696+ # and re-assembles via geturl aka unurlsplit into the original.
697+ # django.core.validators.URLValidator (at least through v3.2) relies on
698+ # this, for better or worse, to catch it in a ValidationError via its
699+ # regular expressions.
700+ # Here we test the basic round trip concept of such a trailing space.
701+ self .assertEqual (urllib .parse .urlunsplit (p ), "www.pypi.org " )
702+
703+ # with scheme as cache-key
704+ url = "//www.python.org/"
705+ scheme = noise .decode ("utf-8" ) + "https" + noise .decode ("utf-8" )
706+ for _ in range (2 ):
707+ p = urllib .parse .urlsplit (url , scheme = scheme )
708+ self .assertEqual (p .scheme , "https" )
709+ self .assertEqual (p .geturl (), "https://www.python.org/" )
710+
652711 def test_attributes_bad_port (self ):
653712 """Check handling of invalid ports."""
654713 for bytes in (False , True ):
655714 for parse in (urllib .parse .urlsplit , urllib .parse .urlparse ):
656715 for port in ("foo" , "1.5" , "-1" , "0x10" , "-0" , "1_1" , " 1" , "1 " , "६" ):
657716 with self .subTest (bytes = bytes , parse = parse , port = port ):
658717 netloc = "www.example.net:" + port
659- url = "http://" + netloc
718+ url = "http://" + netloc + "/"
660719 if bytes :
661720 if netloc .isascii () and port .isascii ():
662721 netloc = netloc .encode ("ascii" )
0 commit comments