@@ -349,18 +349,19 @@ def _encode(t):
349349 split = (scheme ,) + split
350350 self .checkRoundtrips (url , parsed , split )
351351
352- def checkJoin (self , base , relurl , expected ):
352+ def checkJoin (self , base , relurl , expected , * , relroundtrip = True ):
353353 with self .subTest (base = base , relurl = relurl ):
354354 self .assertEqual (urllib .parse .urljoin (base , relurl ), expected )
355355 baseb = base .encode ('ascii' )
356356 relurlb = relurl .encode ('ascii' )
357357 expectedb = expected .encode ('ascii' )
358358 self .assertEqual (urllib .parse .urljoin (baseb , relurlb ), expectedb )
359359
360- relurl = urllib .parse .urlunsplit (urllib .parse .urlsplit (relurl ))
361- self .assertEqual (urllib .parse .urljoin (base , relurl ), expected )
362- relurlb = urllib .parse .urlunsplit (urllib .parse .urlsplit (relurlb ))
363- self .assertEqual (urllib .parse .urljoin (baseb , relurlb ), expectedb )
360+ if relroundtrip :
361+ relurl = urllib .parse .urlunsplit (urllib .parse .urlsplit (relurl ))
362+ self .assertEqual (urllib .parse .urljoin (base , relurl ), expected )
363+ relurlb = urllib .parse .urlunsplit (urllib .parse .urlsplit (relurlb ))
364+ self .assertEqual (urllib .parse .urljoin (baseb , relurlb ), expectedb )
364365
365366 def test_unparse_parse (self ):
366367 str_cases = ['Python' , './Python' ,'x-newscheme://foo.com/stuff' ,'x://y' ,'x:/y' ,'x:/' ,'/' ,]
@@ -526,8 +527,6 @@ def test_RFC3986(self):
526527
527528 def test_urljoins (self ):
528529 self .checkJoin (SIMPLE_BASE , 'g:h' ,'g:h' )
529- self .checkJoin (SIMPLE_BASE , 'http:g' ,'http://a/b/c/g' )
530- self .checkJoin (SIMPLE_BASE , 'http:' ,'http://a/b/c/d' )
531530 self .checkJoin (SIMPLE_BASE , 'g' ,'http://a/b/c/g' )
532531 self .checkJoin (SIMPLE_BASE , './g' ,'http://a/b/c/g' )
533532 self .checkJoin (SIMPLE_BASE , 'g/' ,'http://a/b/c/g/' )
@@ -548,8 +547,6 @@ def test_urljoins(self):
548547 self .checkJoin (SIMPLE_BASE , 'g/./h' ,'http://a/b/c/g/h' )
549548 self .checkJoin (SIMPLE_BASE , 'g/../h' ,'http://a/b/c/h' )
550549 self .checkJoin (SIMPLE_BASE , 'http:g' ,'http://a/b/c/g' )
551- self .checkJoin (SIMPLE_BASE , 'http:' ,'http://a/b/c/d' )
552- self .checkJoin (SIMPLE_BASE , 'http:?y' ,'http://a/b/c/d?y' )
553550 self .checkJoin (SIMPLE_BASE , 'http:g?y' ,'http://a/b/c/g?y' )
554551 self .checkJoin (SIMPLE_BASE , 'http:g?y/./x' ,'http://a/b/c/g?y/./x' )
555552 self .checkJoin ('http:///' , '..' ,'http:///' )
@@ -579,6 +576,53 @@ def test_urljoins(self):
579576 # issue 23703: don't duplicate filename
580577 self .checkJoin ('a' , 'b' , 'b' )
581578
579+ # Test with empty (but defined) components.
580+ self .checkJoin (RFC1808_BASE , '' , 'http://a/b/c/d;p?q#f' )
581+ self .checkJoin (RFC1808_BASE , '#' , 'http://a/b/c/d;p?q#' , relroundtrip = False )
582+ self .checkJoin (RFC1808_BASE , '#z' , 'http://a/b/c/d;p?q#z' )
583+ self .checkJoin (RFC1808_BASE , '?' , 'http://a/b/c/d;p?' , relroundtrip = False )
584+ self .checkJoin (RFC1808_BASE , '?#z' , 'http://a/b/c/d;p?#z' , relroundtrip = False )
585+ self .checkJoin (RFC1808_BASE , '?y' , 'http://a/b/c/d;p?y' )
586+ self .checkJoin (RFC1808_BASE , ';' , 'http://a/b/c/;' )
587+ self .checkJoin (RFC1808_BASE , ';?y' , 'http://a/b/c/;?y' )
588+ self .checkJoin (RFC1808_BASE , ';#z' , 'http://a/b/c/;#z' )
589+ self .checkJoin (RFC1808_BASE , ';x' , 'http://a/b/c/;x' )
590+ self .checkJoin (RFC1808_BASE , '/w' , 'http://a/w' )
591+ self .checkJoin (RFC1808_BASE , '//' , 'http://a/b/c/d;p?q#f' )
592+ self .checkJoin (RFC1808_BASE , '//#z' , 'http://a/b/c/d;p?q#z' )
593+ self .checkJoin (RFC1808_BASE , '//?y' , 'http://a/b/c/d;p?y' )
594+ self .checkJoin (RFC1808_BASE , '//;x' , 'http://;x' )
595+ self .checkJoin (RFC1808_BASE , '///w' , 'http://a/w' )
596+ self .checkJoin (RFC1808_BASE , '//v' , 'http://v' )
597+ # For backward compatibility with RFC1630, the scheme name is allowed
598+ # to be present in a relative reference if it is the same as the base
599+ # URI scheme.
600+ self .checkJoin (RFC1808_BASE , 'http:' , 'http://a/b/c/d;p?q#f' )
601+ self .checkJoin (RFC1808_BASE , 'http:#' , 'http://a/b/c/d;p?q#' , relroundtrip = False )
602+ self .checkJoin (RFC1808_BASE , 'http:#z' , 'http://a/b/c/d;p?q#z' )
603+ self .checkJoin (RFC1808_BASE , 'http:?' , 'http://a/b/c/d;p?' , relroundtrip = False )
604+ self .checkJoin (RFC1808_BASE , 'http:?#z' , 'http://a/b/c/d;p?#z' , relroundtrip = False )
605+ self .checkJoin (RFC1808_BASE , 'http:?y' , 'http://a/b/c/d;p?y' )
606+ self .checkJoin (RFC1808_BASE , 'http:;' , 'http://a/b/c/;' )
607+ self .checkJoin (RFC1808_BASE , 'http:;?y' , 'http://a/b/c/;?y' )
608+ self .checkJoin (RFC1808_BASE , 'http:;#z' , 'http://a/b/c/;#z' )
609+ self .checkJoin (RFC1808_BASE , 'http:;x' , 'http://a/b/c/;x' )
610+ self .checkJoin (RFC1808_BASE , 'http:/w' , 'http://a/w' )
611+ self .checkJoin (RFC1808_BASE , 'http://' , 'http://a/b/c/d;p?q#f' )
612+ self .checkJoin (RFC1808_BASE , 'http://#z' , 'http://a/b/c/d;p?q#z' )
613+ self .checkJoin (RFC1808_BASE , 'http://?y' , 'http://a/b/c/d;p?y' )
614+ self .checkJoin (RFC1808_BASE , 'http://;x' , 'http://;x' )
615+ self .checkJoin (RFC1808_BASE , 'http:///w' , 'http://a/w' )
616+ self .checkJoin (RFC1808_BASE , 'http://v' , 'http://v' )
617+ # Different scheme is not ignored.
618+ self .checkJoin (RFC1808_BASE , 'https:' , 'https:' , relroundtrip = False )
619+ self .checkJoin (RFC1808_BASE , 'https:#' , 'https:#' , relroundtrip = False )
620+ self .checkJoin (RFC1808_BASE , 'https:#z' , 'https:#z' , relroundtrip = False )
621+ self .checkJoin (RFC1808_BASE , 'https:?' , 'https:?' , relroundtrip = False )
622+ self .checkJoin (RFC1808_BASE , 'https:?y' , 'https:?y' , relroundtrip = False )
623+ self .checkJoin (RFC1808_BASE , 'https:;' , 'https:;' )
624+ self .checkJoin (RFC1808_BASE , 'https:;x' , 'https:;x' )
625+
582626 def test_RFC2732 (self ):
583627 str_cases = [
584628 ('http://Test.python.org:5432/foo/' , 'test.python.org' , 5432 ),
@@ -641,16 +685,31 @@ def test_urldefrag(self):
641685 ('http://python.org/p?q' , 'http://python.org/p?q' , '' ),
642686 (RFC1808_BASE , 'http://a/b/c/d;p?q' , 'f' ),
643687 (RFC2396_BASE , 'http://a/b/c/d;p?q' , '' ),
688+ ('http://a/b/c;p?q#f' , 'http://a/b/c;p?q' , 'f' ),
689+ ('http://a/b/c;p?q#' , 'http://a/b/c;p?q' , '' ),
690+ ('http://a/b/c;p?q' , 'http://a/b/c;p?q' , '' ),
691+ ('http://a/b/c;p?#f' , 'http://a/b/c;p?' , 'f' ),
692+ ('http://a/b/c;p#f' , 'http://a/b/c;p' , 'f' ),
693+ ('http://a/b/c;?q#f' , 'http://a/b/c;?q' , 'f' ),
694+ ('http://a/b/c?q#f' , 'http://a/b/c?q' , 'f' ),
695+ ('http:///b/c;p?q#f' , 'http:///b/c;p?q' , 'f' ),
696+ ('http:b/c;p?q#f' , 'http:b/c;p?q' , 'f' ),
697+ ('http:;?q#f' , 'http:;?q' , 'f' ),
698+ ('http:?q#f' , 'http:?q' , 'f' ),
699+ ('//a/b/c;p?q#f' , '//a/b/c;p?q' , 'f' ),
700+ ('://a/b/c;p?q#f' , '://a/b/c;p?q' , 'f' ),
644701 ]
645702 def _encode (t ):
646703 return type (t )(x .encode ('ascii' ) for x in t )
647704 bytes_cases = [_encode (x ) for x in str_cases ]
648705 for url , defrag , frag in str_cases + bytes_cases :
649- result = urllib .parse .urldefrag (url )
650- self .assertEqual (result .geturl (), url )
651- self .assertEqual (result , (defrag , frag ))
652- self .assertEqual (result .url , defrag )
653- self .assertEqual (result .fragment , frag )
706+ with self .subTest (url ):
707+ result = urllib .parse .urldefrag (url )
708+ hash = '#' if isinstance (url , str ) else b'#'
709+ self .assertEqual (result .geturl (), url .rstrip (hash ))
710+ self .assertEqual (result , (defrag , frag ))
711+ self .assertEqual (result .url , defrag )
712+ self .assertEqual (result .fragment , frag )
654713
655714 def test_urlsplit_scoped_IPv6 (self ):
656715 p = urllib .parse .urlsplit ('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234' )
0 commit comments