diff --git a/SPECS/python3/CVE-2024-11168.patch b/SPECS/python3/CVE-2024-11168.patch index 25646db3822..0c3e178ef69 100644 --- a/SPECS/python3/CVE-2024-11168.patch +++ b/SPECS/python3/CVE-2024-11168.patch @@ -1,30 +1,26 @@ -From 9e35c846cce147f241b6857d5007905fb54e5806 Mon Sep 17 00:00:00 2001 -From: ankita -Date: Fri, 15 Nov 2024 18:38:46 +0530 -Subject: [PATCH] Adds checks to ensure that bracketed hosts found by urlsplit - are of IPv6 or IPvFuture format. Fixes CVE-2024-11168 +From a0d225a161732f7d67333105e012d7722c521f54 Mon Sep 17 00:00:00 2001 +From: JohnJamesUtley +Date: Tue, 25 Apr 2023 16:01:03 -0400 +Subject: [PATCH 1/4] Adds checks to ensure that bracketed hosts found by + urlsplit are of IPv6 or IPvFuture format Signed-off-by: ankita --- - Lib/test/test_urlparse.py | 26 + - Lib/test/test_urlparse.py.orig | 1385 +++++++++++++++++ - Lib/urllib/parse.py | 16 +- - Lib/urllib/parse.py.orig | 1209 ++++++++++++++ - ...-04-26-09-54-25.gh-issue-103848.aDSnpR.rst | 2 + - 5 files changed, 2637 insertions(+), 1 deletion(-) - create mode 100644 Lib/test/test_urlparse.py.orig - create mode 100644 Lib/urllib/parse.py.orig + Lib/test/test_urlparse.py | 23 +++++++++++++++++++ + Lib/urllib/parse.py | 20 +++++++++++++--- + ...-04-26-09-54-25.gh-issue-103848.aDSnpR.rst | 2 ++ + 3 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py -index 574da5b..c84df23 100644 +index 574da5b..5d82358 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py -@@ -1071,6 +1071,32 @@ class UrlParseTestCase(unittest.TestCase): +@@ -1071,6 +1071,29 @@ class UrlParseTestCase(unittest.TestCase): self.assertEqual(p2.scheme, 'tel') self.assertEqual(p2.path, '+31641044153') -+ def test_invalid_bracketed_hosts(self): ++ def test_splitting_bracketed_hosts(self): + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query') @@ -34,9 +30,6 @@ index 574da5b..c84df23 100644 + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query') -+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path') -+ -+ def test_splitting_bracketed_hosts(self): + p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') + self.assertEqual(p1.hostname, 'v6a.ip') + self.assertEqual(p1.username, 'user') @@ -53,1399 +46,8 @@ index 574da5b..c84df23 100644 def test_port_casting_failure_message(self): message = "Port could not be cast to integer value as 'oracle'" p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle') -diff --git a/Lib/test/test_urlparse.py.orig b/Lib/test/test_urlparse.py.orig -new file mode 100644 -index 0000000..574da5b ---- /dev/null -+++ b/Lib/test/test_urlparse.py.orig -@@ -0,0 +1,1385 @@ -+import sys -+import unicodedata -+import unittest -+import urllib.parse -+ -+RFC1808_BASE = "http://a/b/c/d;p?q#f" -+RFC2396_BASE = "http://a/b/c/d;p?q" -+RFC3986_BASE = 'http://a/b/c/d;p?q' -+SIMPLE_BASE = 'http://a/b/c/d' -+ -+# Each parse_qsl testcase is a two-tuple that contains -+# a string with the query and a list with the expected result. -+ -+parse_qsl_test_cases = [ -+ ("", []), -+ ("&", []), -+ ("&&", []), -+ ("=", [('', '')]), -+ ("=a", [('', 'a')]), -+ ("a", [('a', '')]), -+ ("a=", [('a', '')]), -+ ("&a=b", [('a', 'b')]), -+ ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]), -+ ("a=1&a=2", [('a', '1'), ('a', '2')]), -+ (b"", []), -+ (b"&", []), -+ (b"&&", []), -+ (b"=", [(b'', b'')]), -+ (b"=a", [(b'', b'a')]), -+ (b"a", [(b'a', b'')]), -+ (b"a=", [(b'a', b'')]), -+ (b"&a=b", [(b'a', b'b')]), -+ (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), -+ (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]), -+ (";a=b", [(';a', 'b')]), -+ ("a=a+b;b=b+c", [('a', 'a b;b=b c')]), -+ (b";a=b", [(b';a', b'b')]), -+ (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]), -+] -+ -+# Each parse_qs testcase is a two-tuple that contains -+# a string with the query and a dictionary with the expected result. -+ -+parse_qs_test_cases = [ -+ ("", {}), -+ ("&", {}), -+ ("&&", {}), -+ ("=", {'': ['']}), -+ ("=a", {'': ['a']}), -+ ("a", {'a': ['']}), -+ ("a=", {'a': ['']}), -+ ("&a=b", {'a': ['b']}), -+ ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}), -+ ("a=1&a=2", {'a': ['1', '2']}), -+ (b"", {}), -+ (b"&", {}), -+ (b"&&", {}), -+ (b"=", {b'': [b'']}), -+ (b"=a", {b'': [b'a']}), -+ (b"a", {b'a': [b'']}), -+ (b"a=", {b'a': [b'']}), -+ (b"&a=b", {b'a': [b'b']}), -+ (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), -+ (b"a=1&a=2", {b'a': [b'1', b'2']}), -+ (";a=b", {';a': ['b']}), -+ ("a=a+b;b=b+c", {'a': ['a b;b=b c']}), -+ (b";a=b", {b';a': [b'b']}), -+ (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}), -+] -+ -+class UrlParseTestCase(unittest.TestCase): -+ -+ def checkRoundtrips(self, url, parsed, split): -+ result = urllib.parse.urlparse(url) -+ self.assertEqual(result, parsed) -+ t = (result.scheme, result.netloc, result.path, -+ result.params, result.query, result.fragment) -+ self.assertEqual(t, parsed) -+ # put it back together and it should be the same -+ result2 = urllib.parse.urlunparse(result) -+ self.assertEqual(result2, url) -+ self.assertEqual(result2, result.geturl()) -+ -+ # the result of geturl() is a fixpoint; we can always parse it -+ # again to get the same result: -+ result3 = urllib.parse.urlparse(result.geturl()) -+ self.assertEqual(result3.geturl(), result.geturl()) -+ self.assertEqual(result3, result) -+ self.assertEqual(result3.scheme, result.scheme) -+ self.assertEqual(result3.netloc, result.netloc) -+ self.assertEqual(result3.path, result.path) -+ self.assertEqual(result3.params, result.params) -+ self.assertEqual(result3.query, result.query) -+ self.assertEqual(result3.fragment, result.fragment) -+ self.assertEqual(result3.username, result.username) -+ self.assertEqual(result3.password, result.password) -+ self.assertEqual(result3.hostname, result.hostname) -+ self.assertEqual(result3.port, result.port) -+ -+ # check the roundtrip using urlsplit() as well -+ result = urllib.parse.urlsplit(url) -+ self.assertEqual(result, split) -+ t = (result.scheme, result.netloc, result.path, -+ result.query, result.fragment) -+ self.assertEqual(t, split) -+ result2 = urllib.parse.urlunsplit(result) -+ self.assertEqual(result2, url) -+ self.assertEqual(result2, result.geturl()) -+ -+ # check the fixpoint property of re-parsing the result of geturl() -+ result3 = urllib.parse.urlsplit(result.geturl()) -+ self.assertEqual(result3.geturl(), result.geturl()) -+ self.assertEqual(result3, result) -+ self.assertEqual(result3.scheme, result.scheme) -+ self.assertEqual(result3.netloc, result.netloc) -+ self.assertEqual(result3.path, result.path) -+ self.assertEqual(result3.query, result.query) -+ self.assertEqual(result3.fragment, result.fragment) -+ self.assertEqual(result3.username, result.username) -+ self.assertEqual(result3.password, result.password) -+ self.assertEqual(result3.hostname, result.hostname) -+ self.assertEqual(result3.port, result.port) -+ -+ def test_qsl(self): -+ for orig, expect in parse_qsl_test_cases: -+ result = urllib.parse.parse_qsl(orig, keep_blank_values=True) -+ self.assertEqual(result, expect, "Error parsing %r" % orig) -+ expect_without_blanks = [v for v in expect if len(v[1])] -+ result = urllib.parse.parse_qsl(orig, keep_blank_values=False) -+ self.assertEqual(result, expect_without_blanks, -+ "Error parsing %r" % orig) -+ -+ def test_qs(self): -+ for orig, expect in parse_qs_test_cases: -+ result = urllib.parse.parse_qs(orig, keep_blank_values=True) -+ self.assertEqual(result, expect, "Error parsing %r" % orig) -+ expect_without_blanks = {v: expect[v] -+ for v in expect if len(expect[v][0])} -+ result = urllib.parse.parse_qs(orig, keep_blank_values=False) -+ self.assertEqual(result, expect_without_blanks, -+ "Error parsing %r" % orig) -+ -+ def test_roundtrips(self): -+ str_cases = [ -+ ('file:///tmp/junk.txt', -+ ('file', '', '/tmp/junk.txt', '', '', ''), -+ ('file', '', '/tmp/junk.txt', '', '')), -+ ('imap://mail.python.org/mbox1', -+ ('imap', 'mail.python.org', '/mbox1', '', '', ''), -+ ('imap', 'mail.python.org', '/mbox1', '', '')), -+ ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf', -+ ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', -+ '', '', ''), -+ ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', -+ '', '')), -+ ('nfs://server/path/to/file.txt', -+ ('nfs', 'server', '/path/to/file.txt', '', '', ''), -+ ('nfs', 'server', '/path/to/file.txt', '', '')), -+ ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/', -+ ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', -+ '', '', ''), -+ ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', -+ '', '')), -+ ('git+ssh://git@github.com/user/project.git', -+ ('git+ssh', 'git@github.com','/user/project.git', -+ '','',''), -+ ('git+ssh', 'git@github.com','/user/project.git', -+ '', '')), -+ ] -+ def _encode(t): -+ return (t[0].encode('ascii'), -+ tuple(x.encode('ascii') for x in t[1]), -+ tuple(x.encode('ascii') for x in t[2])) -+ bytes_cases = [_encode(x) for x in str_cases] -+ for url, parsed, split in str_cases + bytes_cases: -+ self.checkRoundtrips(url, parsed, split) -+ -+ def test_http_roundtrips(self): -+ # urllib.parse.urlsplit treats 'http:' as an optimized special case, -+ # so we test both 'http:' and 'https:' in all the following. -+ # Three cheers for white box knowledge! -+ str_cases = [ -+ ('://www.python.org', -+ ('www.python.org', '', '', '', ''), -+ ('www.python.org', '', '', '')), -+ ('://www.python.org#abc', -+ ('www.python.org', '', '', '', 'abc'), -+ ('www.python.org', '', '', 'abc')), -+ ('://www.python.org?q=abc', -+ ('www.python.org', '', '', 'q=abc', ''), -+ ('www.python.org', '', 'q=abc', '')), -+ ('://www.python.org/#abc', -+ ('www.python.org', '/', '', '', 'abc'), -+ ('www.python.org', '/', '', 'abc')), -+ ('://a/b/c/d;p?q#f', -+ ('a', '/b/c/d', 'p', 'q', 'f'), -+ ('a', '/b/c/d;p', 'q', 'f')), -+ ] -+ def _encode(t): -+ return (t[0].encode('ascii'), -+ tuple(x.encode('ascii') for x in t[1]), -+ tuple(x.encode('ascii') for x in t[2])) -+ bytes_cases = [_encode(x) for x in str_cases] -+ str_schemes = ('http', 'https') -+ bytes_schemes = (b'http', b'https') -+ str_tests = str_schemes, str_cases -+ bytes_tests = bytes_schemes, bytes_cases -+ for schemes, test_cases in (str_tests, bytes_tests): -+ for scheme in schemes: -+ for url, parsed, split in test_cases: -+ url = scheme + url -+ parsed = (scheme,) + parsed -+ split = (scheme,) + split -+ self.checkRoundtrips(url, parsed, split) -+ -+ def checkJoin(self, base, relurl, expected): -+ str_components = (base, relurl, expected) -+ self.assertEqual(urllib.parse.urljoin(base, relurl), expected) -+ bytes_components = baseb, relurlb, expectedb = [ -+ x.encode('ascii') for x in str_components] -+ self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb) -+ -+ def test_unparse_parse(self): -+ str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',] -+ bytes_cases = [x.encode('ascii') for x in str_cases] -+ for u in str_cases + bytes_cases: -+ self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u) -+ self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u) -+ -+ def test_RFC1808(self): -+ # "normal" cases from RFC 1808: -+ self.checkJoin(RFC1808_BASE, 'g:h', 'g:h') -+ self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g') -+ self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g') -+ self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/') -+ self.checkJoin(RFC1808_BASE, '/g', 'http://a/g') -+ self.checkJoin(RFC1808_BASE, '//g', 'http://g') -+ self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y') -+ self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') -+ self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s') -+ self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s') -+ self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') -+ self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s') -+ self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x') -+ self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') -+ self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/') -+ self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/') -+ self.checkJoin(RFC1808_BASE, '..', 'http://a/b/') -+ self.checkJoin(RFC1808_BASE, '../', 'http://a/b/') -+ self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g') -+ self.checkJoin(RFC1808_BASE, '../..', 'http://a/') -+ self.checkJoin(RFC1808_BASE, '../../', 'http://a/') -+ self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g') -+ -+ # "abnormal" cases from RFC 1808: -+ self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f') -+ self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.') -+ self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g') -+ self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..') -+ self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g') -+ self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g') -+ self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/') -+ self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h') -+ self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h') -+ -+ # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808), -+ # so we'll not actually run these tests (which expect 1808 behavior). -+ #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g') -+ #self.checkJoin(RFC1808_BASE, 'http:', 'http:') -+ -+ # XXX: The following tests are no longer compatible with RFC3986 -+ # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g') -+ # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g') -+ # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g') -+ # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g') -+ -+ -+ def test_RFC2368(self): -+ # Issue 11467: path that starts with a number is not parsed correctly -+ self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'), -+ ('mailto', '', '1337@example.org', '', '', '')) -+ -+ def test_RFC2396(self): -+ # cases from RFC 2396 -+ -+ self.checkJoin(RFC2396_BASE, 'g:h', 'g:h') -+ self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g') -+ self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g') -+ self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/') -+ self.checkJoin(RFC2396_BASE, '/g', 'http://a/g') -+ self.checkJoin(RFC2396_BASE, '//g', 'http://g') -+ self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y') -+ self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s') -+ self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s') -+ self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s') -+ self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x') -+ self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') -+ self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/') -+ self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/') -+ self.checkJoin(RFC2396_BASE, '..', 'http://a/b/') -+ self.checkJoin(RFC2396_BASE, '../', 'http://a/b/') -+ self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g') -+ self.checkJoin(RFC2396_BASE, '../..', 'http://a/') -+ self.checkJoin(RFC2396_BASE, '../../', 'http://a/') -+ self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g') -+ self.checkJoin(RFC2396_BASE, '', RFC2396_BASE) -+ self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.') -+ self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g') -+ self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..') -+ self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g') -+ self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g') -+ self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/') -+ self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h') -+ self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h') -+ self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y') -+ self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y') -+ self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') -+ self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x') -+ self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') -+ self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x') -+ -+ # XXX: The following tests are no longer compatible with RFC3986 -+ # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g') -+ # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g') -+ # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g') -+ # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g') -+ -+ def test_RFC3986(self): -+ self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y') -+ self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x') -+ self.checkJoin(RFC3986_BASE, 'g:h','g:h') -+ self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g') -+ self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g') -+ self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/') -+ self.checkJoin(RFC3986_BASE, '/g','http://a/g') -+ self.checkJoin(RFC3986_BASE, '//g','http://g') -+ self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y') -+ self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y') -+ self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s') -+ self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s') -+ self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s') -+ self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x') -+ self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x') -+ self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s') -+ self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q') -+ self.checkJoin(RFC3986_BASE, '.','http://a/b/c/') -+ self.checkJoin(RFC3986_BASE, './','http://a/b/c/') -+ self.checkJoin(RFC3986_BASE, '..','http://a/b/') -+ self.checkJoin(RFC3986_BASE, '../','http://a/b/') -+ self.checkJoin(RFC3986_BASE, '../g','http://a/b/g') -+ self.checkJoin(RFC3986_BASE, '../..','http://a/') -+ self.checkJoin(RFC3986_BASE, '../../','http://a/') -+ self.checkJoin(RFC3986_BASE, '../../g','http://a/g') -+ self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g') -+ -+ # Abnormal Examples -+ -+ # The 'abnormal scenarios' are incompatible with RFC2986 parsing -+ # Tests are here for reference. -+ -+ self.checkJoin(RFC3986_BASE, '../../../g','http://a/g') -+ self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g') -+ self.checkJoin(RFC3986_BASE, '/./g','http://a/g') -+ self.checkJoin(RFC3986_BASE, '/../g','http://a/g') -+ self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.') -+ self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g') -+ self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..') -+ self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g') -+ self.checkJoin(RFC3986_BASE, './../g','http://a/b/g') -+ self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/') -+ self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h') -+ self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h') -+ self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y') -+ self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y') -+ self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x') -+ self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x') -+ self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x') -+ self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x') -+ #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser -+ self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser -+ -+ # Test for issue9721 -+ self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x') -+ -+ def test_urljoins(self): -+ self.checkJoin(SIMPLE_BASE, 'g:h','g:h') -+ self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g') -+ self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d') -+ self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g') -+ self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g') -+ self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/') -+ self.checkJoin(SIMPLE_BASE, '/g','http://a/g') -+ self.checkJoin(SIMPLE_BASE, '//g','http://g') -+ self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y') -+ self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y') -+ self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x') -+ self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/') -+ self.checkJoin(SIMPLE_BASE, './','http://a/b/c/') -+ self.checkJoin(SIMPLE_BASE, '..','http://a/b/') -+ self.checkJoin(SIMPLE_BASE, '../','http://a/b/') -+ self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g') -+ self.checkJoin(SIMPLE_BASE, '../..','http://a/') -+ self.checkJoin(SIMPLE_BASE, '../../g','http://a/g') -+ self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g') -+ self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/') -+ self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h') -+ self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h') -+ self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g') -+ self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d') -+ self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y') -+ self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y') -+ self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x') -+ self.checkJoin('http:///', '..','http:///') -+ self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x') -+ self.checkJoin('', 'http://a/./g', 'http://a/./g') -+ self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2') -+ self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2') -+ self.checkJoin('ws://a/b','g','ws://a/g') -+ self.checkJoin('wss://a/b','g','wss://a/g') -+ -+ # XXX: The following tests are no longer compatible with RFC3986 -+ # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g') -+ # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g') -+ -+ # test for issue22118 duplicate slashes -+ self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo') -+ -+ # Non-RFC-defined tests, covering variations of base and trailing -+ # slashes -+ self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/') -+ self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/') -+ self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/') -+ self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/') -+ self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g') -+ self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/') -+ -+ # issue 23703: don't duplicate filename -+ self.checkJoin('a', 'b', 'b') -+ -+ def test_RFC2732(self): -+ str_cases = [ -+ ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), -+ ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432), -+ ('http://[::1]:5432/foo/', '::1', 5432), -+ ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432), -+ ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432), -+ ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/', -+ 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432), -+ ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432), -+ ('http://[::ffff:12.34.56.78]:5432/foo/', -+ '::ffff:12.34.56.78', 5432), -+ ('http://Test.python.org/foo/', 'test.python.org', None), -+ ('http://12.34.56.78/foo/', '12.34.56.78', None), -+ ('http://[::1]/foo/', '::1', None), -+ ('http://[dead:beef::1]/foo/', 'dead:beef::1', None), -+ ('http://[dead:beef::]/foo/', 'dead:beef::', None), -+ ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/', -+ 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), -+ ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None), -+ ('http://[::ffff:12.34.56.78]/foo/', -+ '::ffff:12.34.56.78', None), -+ ('http://Test.python.org:/foo/', 'test.python.org', None), -+ ('http://12.34.56.78:/foo/', '12.34.56.78', None), -+ ('http://[::1]:/foo/', '::1', None), -+ ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None), -+ ('http://[dead:beef::]:/foo/', 'dead:beef::', None), -+ ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/', -+ 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), -+ ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None), -+ ('http://[::ffff:12.34.56.78]:/foo/', -+ '::ffff:12.34.56.78', None), -+ ] -+ def _encode(t): -+ return t[0].encode('ascii'), t[1].encode('ascii'), t[2] -+ bytes_cases = [_encode(x) for x in str_cases] -+ for url, hostname, port in str_cases + bytes_cases: -+ urlparsed = urllib.parse.urlparse(url) -+ self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port)) -+ -+ str_cases = [ -+ 'http://::12.34.56.78]/', -+ 'http://[::1/foo/', -+ 'ftp://[::1/foo/bad]/bad', -+ 'http://[::1/foo/bad]/bad', -+ 'http://[::ffff:12.34.56.78'] -+ bytes_cases = [x.encode('ascii') for x in str_cases] -+ for invalid_url in str_cases + bytes_cases: -+ self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url) -+ -+ def test_urldefrag(self): -+ str_cases = [ -+ ('http://python.org#frag', 'http://python.org', 'frag'), -+ ('http://python.org', 'http://python.org', ''), -+ ('http://python.org/#frag', 'http://python.org/', 'frag'), -+ ('http://python.org/', 'http://python.org/', ''), -+ ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'), -+ ('http://python.org/?q', 'http://python.org/?q', ''), -+ ('http://python.org/p#frag', 'http://python.org/p', 'frag'), -+ ('http://python.org/p?q', 'http://python.org/p?q', ''), -+ (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'), -+ (RFC2396_BASE, 'http://a/b/c/d;p?q', ''), -+ ] -+ def _encode(t): -+ return type(t)(x.encode('ascii') for x in t) -+ bytes_cases = [_encode(x) for x in str_cases] -+ for url, defrag, frag in str_cases + bytes_cases: -+ result = urllib.parse.urldefrag(url) -+ self.assertEqual(result.geturl(), url) -+ self.assertEqual(result, (defrag, frag)) -+ self.assertEqual(result.url, defrag) -+ self.assertEqual(result.fragment, frag) -+ -+ def test_urlsplit_scoped_IPv6(self): -+ p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234') -+ self.assertEqual(p.hostname, "fe80::822a:a8ff:fe49:470c%tESt") -+ self.assertEqual(p.netloc, '[FE80::822a:a8ff:fe49:470c%tESt]:1234') -+ -+ p = urllib.parse.urlsplit(b'http://[FE80::822a:a8ff:fe49:470c%tESt]:1234') -+ self.assertEqual(p.hostname, b"fe80::822a:a8ff:fe49:470c%tESt") -+ self.assertEqual(p.netloc, b'[FE80::822a:a8ff:fe49:470c%tESt]:1234') -+ -+ def test_urlsplit_attributes(self): -+ url = "HTTP://WWW.PYTHON.ORG/doc/#frag" -+ p = urllib.parse.urlsplit(url) -+ self.assertEqual(p.scheme, "http") -+ self.assertEqual(p.netloc, "WWW.PYTHON.ORG") -+ self.assertEqual(p.path, "/doc/") -+ self.assertEqual(p.query, "") -+ self.assertEqual(p.fragment, "frag") -+ self.assertEqual(p.username, None) -+ self.assertEqual(p.password, None) -+ self.assertEqual(p.hostname, "www.python.org") -+ self.assertEqual(p.port, None) -+ # geturl() won't return exactly the original URL in this case -+ # since the scheme is always case-normalized -+ # We handle this by ignoring the first 4 characters of the URL -+ self.assertEqual(p.geturl()[4:], url[4:]) -+ -+ url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag" -+ p = urllib.parse.urlsplit(url) -+ self.assertEqual(p.scheme, "http") -+ self.assertEqual(p.netloc, "User:Pass@www.python.org:080") -+ self.assertEqual(p.path, "/doc/") -+ self.assertEqual(p.query, "query=yes") -+ self.assertEqual(p.fragment, "frag") -+ self.assertEqual(p.username, "User") -+ self.assertEqual(p.password, "Pass") -+ self.assertEqual(p.hostname, "www.python.org") -+ self.assertEqual(p.port, 80) -+ self.assertEqual(p.geturl(), url) -+ -+ # Addressing issue1698, which suggests Username can contain -+ # "@" characters. Though not RFC compliant, many ftp sites allow -+ # and request email addresses as usernames. -+ -+ url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag" -+ p = urllib.parse.urlsplit(url) -+ self.assertEqual(p.scheme, "http") -+ self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080") -+ self.assertEqual(p.path, "/doc/") -+ self.assertEqual(p.query, "query=yes") -+ self.assertEqual(p.fragment, "frag") -+ self.assertEqual(p.username, "User@example.com") -+ self.assertEqual(p.password, "Pass") -+ self.assertEqual(p.hostname, "www.python.org") -+ self.assertEqual(p.port, 80) -+ self.assertEqual(p.geturl(), url) -+ -+ # And check them all again, only with bytes this time -+ url = b"HTTP://WWW.PYTHON.ORG/doc/#frag" -+ p = urllib.parse.urlsplit(url) -+ self.assertEqual(p.scheme, b"http") -+ self.assertEqual(p.netloc, b"WWW.PYTHON.ORG") -+ self.assertEqual(p.path, b"/doc/") -+ self.assertEqual(p.query, b"") -+ self.assertEqual(p.fragment, b"frag") -+ self.assertEqual(p.username, None) -+ self.assertEqual(p.password, None) -+ self.assertEqual(p.hostname, b"www.python.org") -+ self.assertEqual(p.port, None) -+ self.assertEqual(p.geturl()[4:], url[4:]) -+ -+ url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag" -+ p = urllib.parse.urlsplit(url) -+ self.assertEqual(p.scheme, b"http") -+ self.assertEqual(p.netloc, b"User:Pass@www.python.org:080") -+ self.assertEqual(p.path, b"/doc/") -+ self.assertEqual(p.query, b"query=yes") -+ self.assertEqual(p.fragment, b"frag") -+ self.assertEqual(p.username, b"User") -+ self.assertEqual(p.password, b"Pass") -+ self.assertEqual(p.hostname, b"www.python.org") -+ self.assertEqual(p.port, 80) -+ self.assertEqual(p.geturl(), url) -+ -+ url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag" -+ p = urllib.parse.urlsplit(url) -+ self.assertEqual(p.scheme, b"http") -+ self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080") -+ self.assertEqual(p.path, b"/doc/") -+ self.assertEqual(p.query, b"query=yes") -+ self.assertEqual(p.fragment, b"frag") -+ self.assertEqual(p.username, b"User@example.com") -+ self.assertEqual(p.password, b"Pass") -+ self.assertEqual(p.hostname, b"www.python.org") -+ self.assertEqual(p.port, 80) -+ self.assertEqual(p.geturl(), url) -+ -+ # Verify an illegal port raises ValueError -+ url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag" -+ p = urllib.parse.urlsplit(url) -+ with self.assertRaisesRegex(ValueError, "out of range"): -+ p.port -+ -+ def test_urlsplit_remove_unsafe_bytes(self): -+ # Remove ASCII tabs and newlines from input -+ url = "http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" -+ p = urllib.parse.urlsplit(url) -+ self.assertEqual(p.scheme, "http") -+ self.assertEqual(p.netloc, "www.python.org") -+ self.assertEqual(p.path, "/javascript:alert('msg')/") -+ self.assertEqual(p.query, "query=something") -+ self.assertEqual(p.fragment, "fragment") -+ self.assertEqual(p.username, None) -+ self.assertEqual(p.password, None) -+ self.assertEqual(p.hostname, "www.python.org") -+ self.assertEqual(p.port, None) -+ self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment") -+ -+ # Remove ASCII tabs and newlines from input as bytes. -+ url = b"http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" -+ p = urllib.parse.urlsplit(url) -+ self.assertEqual(p.scheme, b"http") -+ self.assertEqual(p.netloc, b"www.python.org") -+ self.assertEqual(p.path, b"/javascript:alert('msg')/") -+ self.assertEqual(p.query, b"query=something") -+ self.assertEqual(p.fragment, b"fragment") -+ self.assertEqual(p.username, None) -+ self.assertEqual(p.password, None) -+ self.assertEqual(p.hostname, b"www.python.org") -+ self.assertEqual(p.port, None) -+ self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment") -+ -+ # with scheme as cache-key -+ url = "http://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" -+ scheme = "ht\ntp" -+ for _ in range(2): -+ p = urllib.parse.urlsplit(url, scheme=scheme) -+ self.assertEqual(p.scheme, "http") -+ self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment") -+ -+ def test_urlsplit_strip_url(self): -+ noise = bytes(range(0, 0x20 + 1)) -+ base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag" -+ -+ url = noise.decode("utf-8") + base_url -+ p = urllib.parse.urlsplit(url) -+ self.assertEqual(p.scheme, "http") -+ self.assertEqual(p.netloc, "User:Pass@www.python.org:080") -+ self.assertEqual(p.path, "/doc/") -+ self.assertEqual(p.query, "query=yes") -+ self.assertEqual(p.fragment, "frag") -+ self.assertEqual(p.username, "User") -+ self.assertEqual(p.password, "Pass") -+ self.assertEqual(p.hostname, "www.python.org") -+ self.assertEqual(p.port, 80) -+ self.assertEqual(p.geturl(), base_url) -+ -+ url = noise + base_url.encode("utf-8") -+ p = urllib.parse.urlsplit(url) -+ self.assertEqual(p.scheme, b"http") -+ self.assertEqual(p.netloc, b"User:Pass@www.python.org:080") -+ self.assertEqual(p.path, b"/doc/") -+ self.assertEqual(p.query, b"query=yes") -+ self.assertEqual(p.fragment, b"frag") -+ self.assertEqual(p.username, b"User") -+ self.assertEqual(p.password, b"Pass") -+ self.assertEqual(p.hostname, b"www.python.org") -+ self.assertEqual(p.port, 80) -+ self.assertEqual(p.geturl(), base_url.encode("utf-8")) -+ -+ # Test that trailing space is preserved as some applications rely on -+ # this within query strings. -+ query_spaces_url = "https://www.python.org:88/doc/?query= " -+ p = urllib.parse.urlsplit(noise.decode("utf-8") + query_spaces_url) -+ self.assertEqual(p.scheme, "https") -+ self.assertEqual(p.netloc, "www.python.org:88") -+ self.assertEqual(p.path, "/doc/") -+ self.assertEqual(p.query, "query= ") -+ self.assertEqual(p.port, 88) -+ self.assertEqual(p.geturl(), query_spaces_url) -+ -+ p = urllib.parse.urlsplit("www.pypi.org ") -+ # That "hostname" gets considered a "path" due to the -+ # trailing space and our existing logic... YUCK... -+ # and re-assembles via geturl aka unurlsplit into the original. -+ # django.core.validators.URLValidator (at least through v3.2) relies on -+ # this, for better or worse, to catch it in a ValidationError via its -+ # regular expressions. -+ # Here we test the basic round trip concept of such a trailing space. -+ self.assertEqual(urllib.parse.urlunsplit(p), "www.pypi.org ") -+ -+ # with scheme as cache-key -+ url = "//www.python.org/" -+ scheme = noise.decode("utf-8") + "https" + noise.decode("utf-8") -+ for _ in range(2): -+ p = urllib.parse.urlsplit(url, scheme=scheme) -+ self.assertEqual(p.scheme, "https") -+ self.assertEqual(p.geturl(), "https://www.python.org/") -+ -+ def test_attributes_bad_port(self): -+ """Check handling of invalid ports.""" -+ for bytes in (False, True): -+ for parse in (urllib.parse.urlsplit, urllib.parse.urlparse): -+ for port in ("foo", "1.5", "-1", "0x10"): -+ with self.subTest(bytes=bytes, parse=parse, port=port): -+ netloc = "www.example.net:" + port -+ url = "http://" + netloc + "/" -+ if bytes: -+ netloc = netloc.encode("ascii") -+ url = url.encode("ascii") -+ p = parse(url) -+ self.assertEqual(p.netloc, netloc) -+ with self.assertRaises(ValueError): -+ p.port -+ -+ def test_attributes_without_netloc(self): -+ # This example is straight from RFC 3261. It looks like it -+ # should allow the username, hostname, and port to be filled -+ # in, but doesn't. Since it's a URI and doesn't use the -+ # scheme://netloc syntax, the netloc and related attributes -+ # should be left empty. -+ uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15" -+ p = urllib.parse.urlsplit(uri) -+ self.assertEqual(p.netloc, "") -+ self.assertEqual(p.username, None) -+ self.assertEqual(p.password, None) -+ self.assertEqual(p.hostname, None) -+ self.assertEqual(p.port, None) -+ self.assertEqual(p.geturl(), uri) -+ -+ p = urllib.parse.urlparse(uri) -+ self.assertEqual(p.netloc, "") -+ self.assertEqual(p.username, None) -+ self.assertEqual(p.password, None) -+ self.assertEqual(p.hostname, None) -+ self.assertEqual(p.port, None) -+ self.assertEqual(p.geturl(), uri) -+ -+ # You guessed it, repeating the test with bytes input -+ uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15" -+ p = urllib.parse.urlsplit(uri) -+ self.assertEqual(p.netloc, b"") -+ self.assertEqual(p.username, None) -+ self.assertEqual(p.password, None) -+ self.assertEqual(p.hostname, None) -+ self.assertEqual(p.port, None) -+ self.assertEqual(p.geturl(), uri) -+ -+ p = urllib.parse.urlparse(uri) -+ self.assertEqual(p.netloc, b"") -+ self.assertEqual(p.username, None) -+ self.assertEqual(p.password, None) -+ self.assertEqual(p.hostname, None) -+ self.assertEqual(p.port, None) -+ self.assertEqual(p.geturl(), uri) -+ -+ def test_noslash(self): -+ # Issue 1637: http://foo.com?query is legal -+ self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"), -+ ('http', 'example.com', '', '', 'blahblah=/foo', '')) -+ self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"), -+ (b'http', b'example.com', b'', b'', b'blahblah=/foo', b'')) -+ -+ def test_withoutscheme(self): -+ # Test urlparse without scheme -+ # Issue 754016: urlparse goes wrong with IP:port without scheme -+ # RFC 1808 specifies that netloc should start with //, urlparse expects -+ # the same, otherwise it classifies the portion of url as path. -+ self.assertEqual(urllib.parse.urlparse("path"), -+ ('','','path','','','')) -+ self.assertEqual(urllib.parse.urlparse("//www.python.org:80"), -+ ('','www.python.org:80','','','','')) -+ self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), -+ ('http','www.python.org:80','','','','')) -+ # Repeat for bytes input -+ self.assertEqual(urllib.parse.urlparse(b"path"), -+ (b'',b'',b'path',b'',b'',b'')) -+ self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"), -+ (b'',b'www.python.org:80',b'',b'',b'',b'')) -+ self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), -+ (b'http',b'www.python.org:80',b'',b'',b'',b'')) -+ -+ def test_portseparator(self): -+ # Issue 754016 makes changes for port separator ':' from scheme separator -+ self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','','')) -+ self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','','')) -+ self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','','')) -+ self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','','')) -+ self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','','')) -+ self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), -+ ('http','www.python.org:80','','','','')) -+ # As usual, need to check bytes input as well -+ self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b'')) -+ self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b'')) -+ self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b'')) -+ self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b'')) -+ self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b'')) -+ self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), -+ (b'http',b'www.python.org:80',b'',b'',b'',b'')) -+ -+ def test_usingsys(self): -+ # Issue 3314: sys module is used in the error -+ self.assertRaises(TypeError, urllib.parse.urlencode, "foo") -+ -+ def test_anyscheme(self): -+ # Issue 7904: s3://foo.com/stuff has netloc "foo.com". -+ self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"), -+ ('s3', 'foo.com', '/stuff', '', '', '')) -+ self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"), -+ ('x-newscheme', 'foo.com', '/stuff', '', '', '')) -+ self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"), -+ ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment')) -+ self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"), -+ ('x-newscheme', 'foo.com', '/stuff', '', 'query', '')) -+ -+ # And for bytes... -+ self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"), -+ (b's3', b'foo.com', b'/stuff', b'', b'', b'')) -+ self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"), -+ (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b'')) -+ self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"), -+ (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment')) -+ self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"), -+ (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'')) -+ -+ def test_default_scheme(self): -+ # Exercise the scheme parameter of urlparse() and urlsplit() -+ for func in (urllib.parse.urlparse, urllib.parse.urlsplit): -+ with self.subTest(function=func): -+ result = func("http://example.net/", "ftp") -+ self.assertEqual(result.scheme, "http") -+ result = func(b"http://example.net/", b"ftp") -+ self.assertEqual(result.scheme, b"http") -+ self.assertEqual(func("path", "ftp").scheme, "ftp") -+ self.assertEqual(func("path", scheme="ftp").scheme, "ftp") -+ self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp") -+ self.assertEqual(func("path").scheme, "") -+ self.assertEqual(func(b"path").scheme, b"") -+ self.assertEqual(func(b"path", "").scheme, b"") -+ -+ def test_parse_fragments(self): -+ # Exercise the allow_fragments parameter of urlparse() and urlsplit() -+ tests = ( -+ ("http:#frag", "path", "frag"), -+ ("//example.net#frag", "path", "frag"), -+ ("index.html#frag", "path", "frag"), -+ (";a=b#frag", "params", "frag"), -+ ("?a=b#frag", "query", "frag"), -+ ("#frag", "path", "frag"), -+ ("abc#@frag", "path", "@frag"), -+ ("//abc#@frag", "path", "@frag"), -+ ("//abc:80#@frag", "path", "@frag"), -+ ("//abc#@frag:80", "path", "@frag:80"), -+ ) -+ for url, attr, expected_frag in tests: -+ for func in (urllib.parse.urlparse, urllib.parse.urlsplit): -+ if attr == "params" and func is urllib.parse.urlsplit: -+ attr = "path" -+ with self.subTest(url=url, function=func): -+ result = func(url, allow_fragments=False) -+ self.assertEqual(result.fragment, "") -+ self.assertTrue( -+ getattr(result, attr).endswith("#" + expected_frag)) -+ self.assertEqual(func(url, "", False).fragment, "") -+ -+ result = func(url, allow_fragments=True) -+ self.assertEqual(result.fragment, expected_frag) -+ self.assertFalse( -+ getattr(result, attr).endswith(expected_frag)) -+ self.assertEqual(func(url, "", True).fragment, -+ expected_frag) -+ self.assertEqual(func(url).fragment, expected_frag) -+ -+ def test_mixed_types_rejected(self): -+ # Several functions that process either strings or ASCII encoded bytes -+ # accept multiple arguments. Check they reject mixed type input -+ with self.assertRaisesRegex(TypeError, "Cannot mix str"): -+ urllib.parse.urlparse("www.python.org", b"http") -+ with self.assertRaisesRegex(TypeError, "Cannot mix str"): -+ urllib.parse.urlparse(b"www.python.org", "http") -+ with self.assertRaisesRegex(TypeError, "Cannot mix str"): -+ urllib.parse.urlsplit("www.python.org", b"http") -+ with self.assertRaisesRegex(TypeError, "Cannot mix str"): -+ urllib.parse.urlsplit(b"www.python.org", "http") -+ with self.assertRaisesRegex(TypeError, "Cannot mix str"): -+ urllib.parse.urlunparse(( b"http", "www.python.org","","","","")) -+ with self.assertRaisesRegex(TypeError, "Cannot mix str"): -+ urllib.parse.urlunparse(("http", b"www.python.org","","","","")) -+ with self.assertRaisesRegex(TypeError, "Cannot mix str"): -+ urllib.parse.urlunsplit((b"http", "www.python.org","","","")) -+ with self.assertRaisesRegex(TypeError, "Cannot mix str"): -+ urllib.parse.urlunsplit(("http", b"www.python.org","","","")) -+ with self.assertRaisesRegex(TypeError, "Cannot mix str"): -+ urllib.parse.urljoin("http://python.org", b"http://python.org") -+ with self.assertRaisesRegex(TypeError, "Cannot mix str"): -+ urllib.parse.urljoin(b"http://python.org", "http://python.org") -+ -+ def _check_result_type(self, str_type): -+ num_args = len(str_type._fields) -+ bytes_type = str_type._encoded_counterpart -+ self.assertIs(bytes_type._decoded_counterpart, str_type) -+ str_args = ('',) * num_args -+ bytes_args = (b'',) * num_args -+ str_result = str_type(*str_args) -+ bytes_result = bytes_type(*bytes_args) -+ encoding = 'ascii' -+ errors = 'strict' -+ self.assertEqual(str_result, str_args) -+ self.assertEqual(bytes_result.decode(), str_args) -+ self.assertEqual(bytes_result.decode(), str_result) -+ self.assertEqual(bytes_result.decode(encoding), str_args) -+ self.assertEqual(bytes_result.decode(encoding), str_result) -+ self.assertEqual(bytes_result.decode(encoding, errors), str_args) -+ self.assertEqual(bytes_result.decode(encoding, errors), str_result) -+ self.assertEqual(bytes_result, bytes_args) -+ self.assertEqual(str_result.encode(), bytes_args) -+ self.assertEqual(str_result.encode(), bytes_result) -+ self.assertEqual(str_result.encode(encoding), bytes_args) -+ self.assertEqual(str_result.encode(encoding), bytes_result) -+ self.assertEqual(str_result.encode(encoding, errors), bytes_args) -+ self.assertEqual(str_result.encode(encoding, errors), bytes_result) -+ -+ def test_result_pairs(self): -+ # Check encoding and decoding between result pairs -+ result_types = [ -+ urllib.parse.DefragResult, -+ urllib.parse.SplitResult, -+ urllib.parse.ParseResult, -+ ] -+ for result_type in result_types: -+ self._check_result_type(result_type) -+ -+ def test_parse_qs_encoding(self): -+ result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1") -+ self.assertEqual(result, {'key': ['\u0141\xE9']}) -+ result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8") -+ self.assertEqual(result, {'key': ['\u0141\xE9']}) -+ result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii") -+ self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']}) -+ result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii") -+ self.assertEqual(result, {'key': ['\u0141\ufffd-']}) -+ result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii", -+ errors="ignore") -+ self.assertEqual(result, {'key': ['\u0141-']}) -+ -+ def test_parse_qsl_encoding(self): -+ result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1") -+ self.assertEqual(result, [('key', '\u0141\xE9')]) -+ result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8") -+ self.assertEqual(result, [('key', '\u0141\xE9')]) -+ result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii") -+ self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')]) -+ result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii") -+ self.assertEqual(result, [('key', '\u0141\ufffd-')]) -+ result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii", -+ errors="ignore") -+ self.assertEqual(result, [('key', '\u0141-')]) -+ -+ def test_parse_qsl_max_num_fields(self): -+ with self.assertRaises(ValueError): -+ urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10) -+ urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10) -+ -+ def test_parse_qs_separator(self): -+ parse_qs_semicolon_cases = [ -+ (";", {}), -+ (";;", {}), -+ (";a=b", {'a': ['b']}), -+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), -+ ("a=1;a=2", {'a': ['1', '2']}), -+ (b";", {}), -+ (b";;", {}), -+ (b";a=b", {b'a': [b'b']}), -+ (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), -+ (b"a=1;a=2", {b'a': [b'1', b'2']}), -+ ] -+ for orig, expect in parse_qs_semicolon_cases: -+ with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"): -+ result = urllib.parse.parse_qs(orig, separator=';') -+ self.assertEqual(result, expect, "Error parsing %r" % orig) -+ result_bytes = urllib.parse.parse_qs(orig, separator=b';') -+ self.assertEqual(result_bytes, expect, "Error parsing %r" % orig) -+ -+ -+ def test_parse_qsl_separator(self): -+ parse_qsl_semicolon_cases = [ -+ (";", []), -+ (";;", []), -+ (";a=b", [('a', 'b')]), -+ ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]), -+ ("a=1;a=2", [('a', '1'), ('a', '2')]), -+ (b";", []), -+ (b";;", []), -+ (b";a=b", [(b'a', b'b')]), -+ (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), -+ (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]), -+ ] -+ for orig, expect in parse_qsl_semicolon_cases: -+ with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"): -+ result = urllib.parse.parse_qsl(orig, separator=';') -+ self.assertEqual(result, expect, "Error parsing %r" % orig) -+ result_bytes = urllib.parse.parse_qsl(orig, separator=b';') -+ self.assertEqual(result_bytes, expect, "Error parsing %r" % orig) -+ -+ -+ def test_urlencode_sequences(self): -+ # Other tests incidentally urlencode things; test non-covered cases: -+ # Sequence and object values. -+ result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True) -+ # we cannot rely on ordering here -+ assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'} -+ -+ class Trivial: -+ def __str__(self): -+ return 'trivial' -+ -+ result = urllib.parse.urlencode({'a': Trivial()}, True) -+ self.assertEqual(result, 'a=trivial') -+ -+ def test_urlencode_quote_via(self): -+ result = urllib.parse.urlencode({'a': 'some value'}) -+ self.assertEqual(result, "a=some+value") -+ result = urllib.parse.urlencode({'a': 'some value/another'}, -+ quote_via=urllib.parse.quote) -+ self.assertEqual(result, "a=some%20value%2Fanother") -+ result = urllib.parse.urlencode({'a': 'some value/another'}, -+ safe='/', quote_via=urllib.parse.quote) -+ self.assertEqual(result, "a=some%20value/another") -+ -+ def test_quote_from_bytes(self): -+ self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo') -+ result = urllib.parse.quote_from_bytes(b'archaeological arcana') -+ self.assertEqual(result, 'archaeological%20arcana') -+ result = urllib.parse.quote_from_bytes(b'') -+ self.assertEqual(result, '') -+ -+ def test_unquote_to_bytes(self): -+ result = urllib.parse.unquote_to_bytes('abc%20def') -+ self.assertEqual(result, b'abc def') -+ result = urllib.parse.unquote_to_bytes('') -+ self.assertEqual(result, b'') -+ -+ def test_quote_errors(self): -+ self.assertRaises(TypeError, urllib.parse.quote, b'foo', -+ encoding='utf-8') -+ self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict') -+ -+ def test_issue14072(self): -+ p1 = urllib.parse.urlsplit('tel:+31-641044153') -+ self.assertEqual(p1.scheme, 'tel') -+ self.assertEqual(p1.path, '+31-641044153') -+ p2 = urllib.parse.urlsplit('tel:+31641044153') -+ self.assertEqual(p2.scheme, 'tel') -+ self.assertEqual(p2.path, '+31641044153') -+ # assert the behavior for urlparse -+ p1 = urllib.parse.urlparse('tel:+31-641044153') -+ self.assertEqual(p1.scheme, 'tel') -+ self.assertEqual(p1.path, '+31-641044153') -+ p2 = urllib.parse.urlparse('tel:+31641044153') -+ self.assertEqual(p2.scheme, 'tel') -+ self.assertEqual(p2.path, '+31641044153') -+ -+ def test_port_casting_failure_message(self): -+ message = "Port could not be cast to integer value as 'oracle'" -+ p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle') -+ with self.assertRaisesRegex(ValueError, message): -+ p1.port -+ -+ p2 = urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle') -+ with self.assertRaisesRegex(ValueError, message): -+ p2.port -+ -+ def test_telurl_params(self): -+ p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516') -+ self.assertEqual(p1.scheme, 'tel') -+ self.assertEqual(p1.path, '123-4') -+ self.assertEqual(p1.params, 'phone-context=+1-650-516') -+ -+ p1 = urllib.parse.urlparse('tel:+1-201-555-0123') -+ self.assertEqual(p1.scheme, 'tel') -+ self.assertEqual(p1.path, '+1-201-555-0123') -+ self.assertEqual(p1.params, '') -+ -+ p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com') -+ self.assertEqual(p1.scheme, 'tel') -+ self.assertEqual(p1.path, '7042') -+ self.assertEqual(p1.params, 'phone-context=example.com') -+ -+ p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555') -+ self.assertEqual(p1.scheme, 'tel') -+ self.assertEqual(p1.path, '863-1234') -+ self.assertEqual(p1.params, 'phone-context=+1-914-555') -+ -+ def test_Quoter_repr(self): -+ quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE) -+ self.assertIn('Quoter', repr(quoter)) -+ -+ def test_all(self): -+ expected = [] -+ undocumented = { -+ 'splitattr', 'splithost', 'splitnport', 'splitpasswd', -+ 'splitport', 'splitquery', 'splittag', 'splittype', 'splituser', -+ 'splitvalue', -+ 'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap', -+ } -+ for name in dir(urllib.parse): -+ if name.startswith('_') or name in undocumented: -+ continue -+ object = getattr(urllib.parse, name) -+ if getattr(object, '__module__', None) == 'urllib.parse': -+ expected.append(name) -+ self.assertCountEqual(urllib.parse.__all__, expected) -+ -+ def test_urlsplit_normalization(self): -+ # Certain characters should never occur in the netloc, -+ # including under normalization. -+ # Ensure that ALL of them are detected and cause an error -+ illegal_chars = '/:#?@' -+ hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars} -+ denorm_chars = [ -+ c for c in map(chr, range(128, sys.maxunicode)) -+ if (hex_chars & set(unicodedata.decomposition(c).split())) -+ and c not in illegal_chars -+ ] -+ # Sanity check that we found at least one such character -+ self.assertIn('\u2100', denorm_chars) -+ self.assertIn('\uFF03', denorm_chars) -+ -+ # bpo-36742: Verify port separators are ignored when they -+ # existed prior to decomposition -+ urllib.parse.urlsplit('http://\u30d5\u309a:80') -+ with self.assertRaises(ValueError): -+ urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380') -+ -+ for scheme in ["http", "https", "ftp"]: -+ for netloc in ["netloc{}false.netloc", "n{}user@netloc"]: -+ for c in denorm_chars: -+ url = "{}://{}/path".format(scheme, netloc.format(c)) -+ with self.subTest(url=url, char='{:04X}'.format(ord(c))): -+ with self.assertRaises(ValueError): -+ urllib.parse.urlsplit(url) -+ -+class Utility_Tests(unittest.TestCase): -+ """Testcase to test the various utility functions in the urllib.""" -+ # In Python 2 this test class was in test_urllib. -+ -+ def test_splittype(self): -+ splittype = urllib.parse._splittype -+ self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring')) -+ self.assertEqual(splittype('opaquestring'), (None, 'opaquestring')) -+ self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring')) -+ self.assertEqual(splittype('type:'), ('type', '')) -+ self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string')) -+ -+ def test_splithost(self): -+ splithost = urllib.parse._splithost -+ self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'), -+ ('www.example.org:80', '/foo/bar/baz.html')) -+ self.assertEqual(splithost('//www.example.org:80'), -+ ('www.example.org:80', '')) -+ self.assertEqual(splithost('/foo/bar/baz.html'), -+ (None, '/foo/bar/baz.html')) -+ -+ # bpo-30500: # starts a fragment. -+ self.assertEqual(splithost('//127.0.0.1#@host.com'), -+ ('127.0.0.1', '/#@host.com')) -+ self.assertEqual(splithost('//127.0.0.1#@host.com:80'), -+ ('127.0.0.1', '/#@host.com:80')) -+ self.assertEqual(splithost('//127.0.0.1:80#@host.com'), -+ ('127.0.0.1:80', '/#@host.com')) -+ -+ # Empty host is returned as empty string. -+ self.assertEqual(splithost("///file"), -+ ('', '/file')) -+ -+ # Trailing semicolon, question mark and hash symbol are kept. -+ self.assertEqual(splithost("//example.net/file;"), -+ ('example.net', '/file;')) -+ self.assertEqual(splithost("//example.net/file?"), -+ ('example.net', '/file?')) -+ self.assertEqual(splithost("//example.net/file#"), -+ ('example.net', '/file#')) -+ -+ def test_splituser(self): -+ splituser = urllib.parse._splituser -+ self.assertEqual(splituser('User:Pass@www.python.org:080'), -+ ('User:Pass', 'www.python.org:080')) -+ self.assertEqual(splituser('@www.python.org:080'), -+ ('', 'www.python.org:080')) -+ self.assertEqual(splituser('www.python.org:080'), -+ (None, 'www.python.org:080')) -+ self.assertEqual(splituser('User:Pass@'), -+ ('User:Pass', '')) -+ self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'), -+ ('User@example.com:Pass', 'www.python.org:080')) -+ -+ def test_splitpasswd(self): -+ # Some of the password examples are not sensible, but it is added to -+ # confirming to RFC2617 and addressing issue4675. -+ splitpasswd = urllib.parse._splitpasswd -+ self.assertEqual(splitpasswd('user:ab'), ('user', 'ab')) -+ self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb')) -+ self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb')) -+ self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb')) -+ self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb')) -+ self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb')) -+ self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b')) -+ self.assertEqual(splitpasswd('user:a b'), ('user', 'a b')) -+ self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab')) -+ self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b')) -+ self.assertEqual(splitpasswd('user:'), ('user', '')) -+ self.assertEqual(splitpasswd('user'), ('user', None)) -+ self.assertEqual(splitpasswd(':ab'), ('', 'ab')) -+ -+ def test_splitport(self): -+ splitport = urllib.parse._splitport -+ self.assertEqual(splitport('parrot:88'), ('parrot', '88')) -+ self.assertEqual(splitport('parrot'), ('parrot', None)) -+ self.assertEqual(splitport('parrot:'), ('parrot', None)) -+ self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None)) -+ self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None)) -+ self.assertEqual(splitport('[::1]:88'), ('[::1]', '88')) -+ self.assertEqual(splitport('[::1]'), ('[::1]', None)) -+ self.assertEqual(splitport(':88'), ('', '88')) -+ -+ def test_splitnport(self): -+ splitnport = urllib.parse._splitnport -+ self.assertEqual(splitnport('parrot:88'), ('parrot', 88)) -+ self.assertEqual(splitnport('parrot'), ('parrot', -1)) -+ self.assertEqual(splitnport('parrot', 55), ('parrot', 55)) -+ self.assertEqual(splitnport('parrot:'), ('parrot', -1)) -+ self.assertEqual(splitnport('parrot:', 55), ('parrot', 55)) -+ self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1)) -+ self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55)) -+ self.assertEqual(splitnport('parrot:cheese'), ('parrot', None)) -+ self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None)) -+ -+ def test_splitquery(self): -+ # Normal cases are exercised by other tests; ensure that we also -+ # catch cases with no port specified (testcase ensuring coverage) -+ splitquery = urllib.parse._splitquery -+ self.assertEqual(splitquery('http://python.org/fake?foo=bar'), -+ ('http://python.org/fake', 'foo=bar')) -+ self.assertEqual(splitquery('http://python.org/fake?foo=bar?'), -+ ('http://python.org/fake?foo=bar', '')) -+ self.assertEqual(splitquery('http://python.org/fake'), -+ ('http://python.org/fake', None)) -+ self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar')) -+ -+ def test_splittag(self): -+ splittag = urllib.parse._splittag -+ self.assertEqual(splittag('http://example.com?foo=bar#baz'), -+ ('http://example.com?foo=bar', 'baz')) -+ self.assertEqual(splittag('http://example.com?foo=bar#'), -+ ('http://example.com?foo=bar', '')) -+ self.assertEqual(splittag('#baz'), ('', 'baz')) -+ self.assertEqual(splittag('http://example.com?foo=bar'), -+ ('http://example.com?foo=bar', None)) -+ self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'), -+ ('http://example.com?foo=bar#baz', 'boo')) -+ -+ def test_splitattr(self): -+ splitattr = urllib.parse._splitattr -+ self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'), -+ ('/path', ['attr1=value1', 'attr2=value2'])) -+ self.assertEqual(splitattr('/path;'), ('/path', [''])) -+ self.assertEqual(splitattr(';attr1=value1;attr2=value2'), -+ ('', ['attr1=value1', 'attr2=value2'])) -+ self.assertEqual(splitattr('/path'), ('/path', [])) -+ -+ def test_splitvalue(self): -+ # Normal cases are exercised by other tests; test pathological cases -+ # with no key/value pairs. (testcase ensuring coverage) -+ splitvalue = urllib.parse._splitvalue -+ self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar')) -+ self.assertEqual(splitvalue('foo='), ('foo', '')) -+ self.assertEqual(splitvalue('=bar'), ('', 'bar')) -+ self.assertEqual(splitvalue('foobar'), ('foobar', None)) -+ self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz')) -+ -+ def test_to_bytes(self): -+ result = urllib.parse._to_bytes('http://www.python.org') -+ self.assertEqual(result, 'http://www.python.org') -+ self.assertRaises(UnicodeError, urllib.parse._to_bytes, -+ 'http://www.python.org/medi\u00e6val') -+ -+ def test_unwrap(self): -+ for wrapped_url in ('', '', -+ 'URL:scheme://host/path', 'scheme://host/path'): -+ url = urllib.parse.unwrap(wrapped_url) -+ self.assertEqual(url, 'scheme://host/path') -+ -+ -+class DeprecationTest(unittest.TestCase): -+ -+ def test_splittype_deprecation(self): -+ with self.assertWarns(DeprecationWarning) as cm: -+ urllib.parse.splittype('') -+ self.assertEqual(str(cm.warning), -+ 'urllib.parse.splittype() is deprecated as of 3.8, ' -+ 'use urllib.parse.urlparse() instead') -+ -+ def test_splithost_deprecation(self): -+ with self.assertWarns(DeprecationWarning) as cm: -+ urllib.parse.splithost('') -+ self.assertEqual(str(cm.warning), -+ 'urllib.parse.splithost() is deprecated as of 3.8, ' -+ 'use urllib.parse.urlparse() instead') -+ -+ def test_splituser_deprecation(self): -+ with self.assertWarns(DeprecationWarning) as cm: -+ urllib.parse.splituser('') -+ self.assertEqual(str(cm.warning), -+ 'urllib.parse.splituser() is deprecated as of 3.8, ' -+ 'use urllib.parse.urlparse() instead') -+ -+ def test_splitpasswd_deprecation(self): -+ with self.assertWarns(DeprecationWarning) as cm: -+ urllib.parse.splitpasswd('') -+ self.assertEqual(str(cm.warning), -+ 'urllib.parse.splitpasswd() is deprecated as of 3.8, ' -+ 'use urllib.parse.urlparse() instead') -+ -+ def test_splitport_deprecation(self): -+ with self.assertWarns(DeprecationWarning) as cm: -+ urllib.parse.splitport('') -+ self.assertEqual(str(cm.warning), -+ 'urllib.parse.splitport() is deprecated as of 3.8, ' -+ 'use urllib.parse.urlparse() instead') -+ -+ def test_splitnport_deprecation(self): -+ with self.assertWarns(DeprecationWarning) as cm: -+ urllib.parse.splitnport('') -+ self.assertEqual(str(cm.warning), -+ 'urllib.parse.splitnport() is deprecated as of 3.8, ' -+ 'use urllib.parse.urlparse() instead') -+ -+ def test_splitquery_deprecation(self): -+ with self.assertWarns(DeprecationWarning) as cm: -+ urllib.parse.splitquery('') -+ self.assertEqual(str(cm.warning), -+ 'urllib.parse.splitquery() is deprecated as of 3.8, ' -+ 'use urllib.parse.urlparse() instead') -+ -+ def test_splittag_deprecation(self): -+ with self.assertWarns(DeprecationWarning) as cm: -+ urllib.parse.splittag('') -+ self.assertEqual(str(cm.warning), -+ 'urllib.parse.splittag() is deprecated as of 3.8, ' -+ 'use urllib.parse.urlparse() instead') -+ -+ def test_splitattr_deprecation(self): -+ with self.assertWarns(DeprecationWarning) as cm: -+ urllib.parse.splitattr('') -+ self.assertEqual(str(cm.warning), -+ 'urllib.parse.splitattr() is deprecated as of 3.8, ' -+ 'use urllib.parse.urlparse() instead') -+ -+ def test_splitvalue_deprecation(self): -+ with self.assertWarns(DeprecationWarning) as cm: -+ urllib.parse.splitvalue('') -+ self.assertEqual(str(cm.warning), -+ 'urllib.parse.splitvalue() is deprecated as of 3.8, ' -+ 'use urllib.parse.parse_qsl() instead') -+ -+ def test_to_bytes_deprecation(self): -+ with self.assertWarns(DeprecationWarning) as cm: -+ urllib.parse.to_bytes('') -+ self.assertEqual(str(cm.warning), -+ 'urllib.parse.to_bytes() is deprecated as of 3.8') -+ -+ -+if __name__ == "__main__": -+ unittest.main() diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py -index 5b7193f..2eb3448 100644 +index 5b7193f..5ab115b 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -36,6 +36,7 @@ import sys @@ -1456,6 +58,24 @@ index 5b7193f..2eb3448 100644 __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", "urlsplit", "urlunsplit", "urlencode", "parse_qs", +@@ -212,7 +213,7 @@ class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): + _, _, hostinfo = netloc.rpartition('@') + _, have_open_br, bracketed = hostinfo.partition('[') + if have_open_br: +- hostname, _, port = bracketed.partition(']') ++ hostname, _, port = bracketed.rpartition(']') + _, _, port = port.partition(':') + else: + hostname, _, port = hostinfo.partition(':') +@@ -242,7 +243,7 @@ class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes): + _, _, hostinfo = netloc.rpartition(b'@') + _, have_open_br, bracketed = hostinfo.partition(b'[') + if have_open_br: +- hostname, _, port = bracketed.partition(b']') ++ hostname, _, port = bracketed.rpartition(b']') + _, _, port = port.partition(b':') + else: + hostname, _, port = hostinfo.partition(b':') @@ -442,6 +443,17 @@ def _checknetloc(netloc): raise ValueError("netloc '" + netloc + "' contains invalid " + "characters under NFKC normalization") @@ -1485,1234 +105,139 @@ index 5b7193f..2eb3448 100644 (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") + if '[' in netloc and ']' in netloc: -+ bracketed_host = netloc.partition('[')[2].partition(']')[0] ++ bracketed_host = netloc.partition('[')[2].rpartition(']')[0] + _check_bracketed_host(bracketed_host) if allow_fragments and '#' in url: url, fragment = url.split('#', 1) if '?' in url: -diff --git a/Lib/urllib/parse.py.orig b/Lib/urllib/parse.py.orig -new file mode 100644 -index 0000000..5b7193f ---- /dev/null -+++ b/Lib/urllib/parse.py.orig -@@ -0,0 +1,1209 @@ -+"""Parse (absolute and relative) URLs. -+ -+urlparse module is based upon the following RFC specifications. -+ -+RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding -+and L. Masinter, January 2005. -+ -+RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter -+and L.Masinter, December 1999. -+ -+RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T. -+Berners-Lee, R. Fielding, and L. Masinter, August 1998. -+ -+RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998. -+ -+RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June -+1995. -+ -+RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M. -+McCahill, December 1994 -+ -+RFC 3986 is considered the current standard and any future changes to -+urlparse module should conform with it. The urlparse module is -+currently not entirely compliant with this RFC due to defacto -+scenarios for parsing, and for backward compatibility purposes, some -+parsing quirks from older RFCs are retained. The testcases in -+test_urlparse.py provides a good indicator of parsing behavior. -+ -+The WHATWG URL Parser spec should also be considered. We are not compliant with -+it either due to existing user code API behavior expectations (Hyrum's Law). -+It serves as a useful guide when making changes. -+""" -+ -+import re -+import sys -+import types -+import collections -+import warnings -+ -+__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", -+ "urlsplit", "urlunsplit", "urlencode", "parse_qs", -+ "parse_qsl", "quote", "quote_plus", "quote_from_bytes", -+ "unquote", "unquote_plus", "unquote_to_bytes", -+ "DefragResult", "ParseResult", "SplitResult", -+ "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"] -+ -+# A classification of schemes. -+# The empty string classifies URLs with no scheme specified, -+# being the default value returned by “urlsplit” and “urlparse”. -+ -+uses_relative = ['', 'ftp', 'http', 'gopher', 'nntp', 'imap', -+ 'wais', 'file', 'https', 'shttp', 'mms', -+ 'prospero', 'rtsp', 'rtspu', 'sftp', -+ 'svn', 'svn+ssh', 'ws', 'wss'] -+ -+uses_netloc = ['', 'ftp', 'http', 'gopher', 'nntp', 'telnet', -+ 'imap', 'wais', 'file', 'mms', 'https', 'shttp', -+ 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', -+ 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh', -+ 'ws', 'wss'] -+ -+uses_params = ['', 'ftp', 'hdl', 'prospero', 'http', 'imap', -+ 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', -+ 'mms', 'sftp', 'tel'] -+ -+# These are not actually used anymore, but should stay for backwards -+# compatibility. (They are undocumented, but have a public-looking name.) -+ -+non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', -+ 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] -+ -+uses_query = ['', 'http', 'wais', 'imap', 'https', 'shttp', 'mms', -+ 'gopher', 'rtsp', 'rtspu', 'sip', 'sips'] -+ -+uses_fragment = ['', 'ftp', 'hdl', 'http', 'gopher', 'news', -+ 'nntp', 'wais', 'https', 'shttp', 'snews', -+ 'file', 'prospero'] -+ -+# Characters valid in scheme names -+scheme_chars = ('abcdefghijklmnopqrstuvwxyz' -+ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' -+ '0123456789' -+ '+-.') -+ -+# Leading and trailing C0 control and space to be stripped per WHATWG spec. -+# == "".join([chr(i) for i in range(0, 0x20 + 1)]) -+_WHATWG_C0_CONTROL_OR_SPACE = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f ' -+ -+# Unsafe bytes to be removed per WHATWG spec -+_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n'] -+ -+# XXX: Consider replacing with functools.lru_cache -+MAX_CACHE_SIZE = 20 -+_parse_cache = {} -+ -+def clear_cache(): -+ """Clear the parse cache and the quoters cache.""" -+ _parse_cache.clear() -+ _safe_quoters.clear() -+ -+ -+# Helpers for bytes handling -+# For 3.2, we deliberately require applications that -+# handle improperly quoted URLs to do their own -+# decoding and encoding. If valid use cases are -+# presented, we may relax this by using latin-1 -+# decoding internally for 3.3 -+_implicit_encoding = 'ascii' -+_implicit_errors = 'strict' -+ -+def _noop(obj): -+ return obj -+ -+def _encode_result(obj, encoding=_implicit_encoding, -+ errors=_implicit_errors): -+ return obj.encode(encoding, errors) -+ -+def _decode_args(args, encoding=_implicit_encoding, -+ errors=_implicit_errors): -+ return tuple(x.decode(encoding, errors) if x else '' for x in args) -+ -+def _coerce_args(*args): -+ # Invokes decode if necessary to create str args -+ # and returns the coerced inputs along with -+ # an appropriate result coercion function -+ # - noop for str inputs -+ # - encoding function otherwise -+ str_input = isinstance(args[0], str) -+ for arg in args[1:]: -+ # We special-case the empty string to support the -+ # "scheme=''" default argument to some functions -+ if arg and isinstance(arg, str) != str_input: -+ raise TypeError("Cannot mix str and non-str arguments") -+ if str_input: -+ return args + (_noop,) -+ return _decode_args(args) + (_encode_result,) -+ -+# Result objects are more helpful than simple tuples -+class _ResultMixinStr(object): -+ """Standard approach to encoding parsed results from str to bytes""" -+ __slots__ = () -+ -+ def encode(self, encoding='ascii', errors='strict'): -+ return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self)) -+ -+ -+class _ResultMixinBytes(object): -+ """Standard approach to decoding parsed results from bytes to str""" -+ __slots__ = () -+ -+ def decode(self, encoding='ascii', errors='strict'): -+ return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self)) -+ -+ -+class _NetlocResultMixinBase(object): -+ """Shared methods for the parsed result objects containing a netloc element""" -+ __slots__ = () -+ -+ @property -+ def username(self): -+ return self._userinfo[0] -+ -+ @property -+ def password(self): -+ return self._userinfo[1] -+ -+ @property -+ def hostname(self): -+ hostname = self._hostinfo[0] -+ if not hostname: -+ return None -+ # Scoped IPv6 address may have zone info, which must not be lowercased -+ # like http://[fe80::822a:a8ff:fe49:470c%tESt]:1234/keys -+ separator = '%' if isinstance(hostname, str) else b'%' -+ hostname, percent, zone = hostname.partition(separator) -+ return hostname.lower() + percent + zone -+ -+ @property -+ def port(self): -+ port = self._hostinfo[1] -+ if port is not None: -+ try: -+ port = int(port, 10) -+ except ValueError: -+ message = f'Port could not be cast to integer value as {port!r}' -+ raise ValueError(message) from None -+ if not ( 0 <= port <= 65535): -+ raise ValueError("Port out of range 0-65535") -+ return port -+ -+ __class_getitem__ = classmethod(types.GenericAlias) -+ -+ -+class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): -+ __slots__ = () -+ -+ @property -+ def _userinfo(self): -+ netloc = self.netloc -+ userinfo, have_info, hostinfo = netloc.rpartition('@') -+ if have_info: -+ username, have_password, password = userinfo.partition(':') -+ if not have_password: -+ password = None -+ else: -+ username = password = None -+ return username, password -+ -+ @property -+ def _hostinfo(self): -+ netloc = self.netloc -+ _, _, hostinfo = netloc.rpartition('@') -+ _, have_open_br, bracketed = hostinfo.partition('[') -+ if have_open_br: -+ hostname, _, port = bracketed.partition(']') -+ _, _, port = port.partition(':') -+ else: -+ hostname, _, port = hostinfo.partition(':') -+ if not port: -+ port = None -+ return hostname, port -+ -+ -+class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes): -+ __slots__ = () -+ -+ @property -+ def _userinfo(self): -+ netloc = self.netloc -+ userinfo, have_info, hostinfo = netloc.rpartition(b'@') -+ if have_info: -+ username, have_password, password = userinfo.partition(b':') -+ if not have_password: -+ password = None -+ else: -+ username = password = None -+ return username, password -+ -+ @property -+ def _hostinfo(self): -+ netloc = self.netloc -+ _, _, hostinfo = netloc.rpartition(b'@') -+ _, have_open_br, bracketed = hostinfo.partition(b'[') -+ if have_open_br: -+ hostname, _, port = bracketed.partition(b']') -+ _, _, port = port.partition(b':') -+ else: -+ hostname, _, port = hostinfo.partition(b':') -+ if not port: -+ port = None -+ return hostname, port -+ -+ -+from collections import namedtuple -+ -+_DefragResultBase = namedtuple('DefragResult', 'url fragment') -+_SplitResultBase = namedtuple( -+ 'SplitResult', 'scheme netloc path query fragment') -+_ParseResultBase = namedtuple( -+ 'ParseResult', 'scheme netloc path params query fragment') -+ -+_DefragResultBase.__doc__ = """ -+DefragResult(url, fragment) -+ -+A 2-tuple that contains the url without fragment identifier and the fragment -+identifier as a separate argument. -+""" -+ -+_DefragResultBase.url.__doc__ = """The URL with no fragment identifier.""" -+ -+_DefragResultBase.fragment.__doc__ = """ -+Fragment identifier separated from URL, that allows indirect identification of a -+secondary resource by reference to a primary resource and additional identifying -+information. -+""" -+ -+_SplitResultBase.__doc__ = """ -+SplitResult(scheme, netloc, path, query, fragment) -+ -+A 5-tuple that contains the different components of a URL. Similar to -+ParseResult, but does not split params. -+""" -+ -+_SplitResultBase.scheme.__doc__ = """Specifies URL scheme for the request.""" -+ -+_SplitResultBase.netloc.__doc__ = """ -+Network location where the request is made to. -+""" -+ -+_SplitResultBase.path.__doc__ = """ -+The hierarchical path, such as the path to a file to download. -+""" -+ -+_SplitResultBase.query.__doc__ = """ -+The query component, that contains non-hierarchical data, that along with data -+in path component, identifies a resource in the scope of URI's scheme and -+network location. -+""" -+ -+_SplitResultBase.fragment.__doc__ = """ -+Fragment identifier, that allows indirect identification of a secondary resource -+by reference to a primary resource and additional identifying information. -+""" -+ -+_ParseResultBase.__doc__ = """ -+ParseResult(scheme, netloc, path, params, query, fragment) -+ -+A 6-tuple that contains components of a parsed URL. -+""" -+ -+_ParseResultBase.scheme.__doc__ = _SplitResultBase.scheme.__doc__ -+_ParseResultBase.netloc.__doc__ = _SplitResultBase.netloc.__doc__ -+_ParseResultBase.path.__doc__ = _SplitResultBase.path.__doc__ -+_ParseResultBase.params.__doc__ = """ -+Parameters for last path element used to dereference the URI in order to provide -+access to perform some operation on the resource. -+""" -+ -+_ParseResultBase.query.__doc__ = _SplitResultBase.query.__doc__ -+_ParseResultBase.fragment.__doc__ = _SplitResultBase.fragment.__doc__ -+ -+ -+# For backwards compatibility, alias _NetlocResultMixinStr -+# ResultBase is no longer part of the documented API, but it is -+# retained since deprecating it isn't worth the hassle -+ResultBase = _NetlocResultMixinStr -+ -+# Structured result objects for string data -+class DefragResult(_DefragResultBase, _ResultMixinStr): -+ __slots__ = () -+ def geturl(self): -+ if self.fragment: -+ return self.url + '#' + self.fragment -+ else: -+ return self.url -+ -+class SplitResult(_SplitResultBase, _NetlocResultMixinStr): -+ __slots__ = () -+ def geturl(self): -+ return urlunsplit(self) -+ -+class ParseResult(_ParseResultBase, _NetlocResultMixinStr): -+ __slots__ = () -+ def geturl(self): -+ return urlunparse(self) -+ -+# Structured result objects for bytes data -+class DefragResultBytes(_DefragResultBase, _ResultMixinBytes): -+ __slots__ = () -+ def geturl(self): -+ if self.fragment: -+ return self.url + b'#' + self.fragment -+ else: -+ return self.url -+ -+class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes): -+ __slots__ = () -+ def geturl(self): -+ return urlunsplit(self) -+ -+class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes): -+ __slots__ = () -+ def geturl(self): -+ return urlunparse(self) -+ -+# Set up the encode/decode result pairs -+def _fix_result_transcoding(): -+ _result_pairs = ( -+ (DefragResult, DefragResultBytes), -+ (SplitResult, SplitResultBytes), -+ (ParseResult, ParseResultBytes), -+ ) -+ for _decoded, _encoded in _result_pairs: -+ _decoded._encoded_counterpart = _encoded -+ _encoded._decoded_counterpart = _decoded -+ -+_fix_result_transcoding() -+del _fix_result_transcoding -+ -+def urlparse(url, scheme='', allow_fragments=True): -+ """Parse a URL into 6 components: -+ :///;?# -+ -+ The result is a named 6-tuple with fields corresponding to the -+ above. It is either a ParseResult or ParseResultBytes object, -+ depending on the type of the url parameter. -+ -+ The username, password, hostname, and port sub-components of netloc -+ can also be accessed as attributes of the returned object. -+ -+ The scheme argument provides the default value of the scheme -+ component when no scheme is found in url. -+ -+ If allow_fragments is False, no attempt is made to separate the -+ fragment component from the previous component, which can be either -+ path or query. -+ -+ Note that % escapes are not expanded. -+ """ -+ url, scheme, _coerce_result = _coerce_args(url, scheme) -+ splitresult = urlsplit(url, scheme, allow_fragments) -+ scheme, netloc, url, query, fragment = splitresult -+ if scheme in uses_params and ';' in url: -+ url, params = _splitparams(url) -+ else: -+ params = '' -+ result = ParseResult(scheme, netloc, url, params, query, fragment) -+ return _coerce_result(result) -+ -+def _splitparams(url): -+ if '/' in url: -+ i = url.find(';', url.rfind('/')) -+ if i < 0: -+ return url, '' -+ else: -+ i = url.find(';') -+ return url[:i], url[i+1:] -+ -+def _splitnetloc(url, start=0): -+ delim = len(url) # position of end of domain part of url, default is end -+ for c in '/?#': # look for delimiters; the order is NOT important -+ wdelim = url.find(c, start) # find first of this delim -+ if wdelim >= 0: # if found -+ delim = min(delim, wdelim) # use earliest delim position -+ return url[start:delim], url[delim:] # return (domain, rest) -+ -+def _checknetloc(netloc): -+ if not netloc or netloc.isascii(): -+ return -+ # looking for characters like \u2100 that expand to 'a/c' -+ # IDNA uses NFKC equivalence, so normalize for this check -+ import unicodedata -+ n = netloc.replace('@', '') # ignore characters already included -+ n = n.replace(':', '') # but not the surrounding text -+ n = n.replace('#', '') -+ n = n.replace('?', '') -+ netloc2 = unicodedata.normalize('NFKC', n) -+ if n == netloc2: -+ return -+ for c in '/?#@:': -+ if c in netloc2: -+ raise ValueError("netloc '" + netloc + "' contains invalid " + -+ "characters under NFKC normalization") -+ -+def urlsplit(url, scheme='', allow_fragments=True): -+ """Parse a URL into 5 components: -+ :///?# -+ -+ The result is a named 5-tuple with fields corresponding to the -+ above. It is either a SplitResult or SplitResultBytes object, -+ depending on the type of the url parameter. -+ -+ The username, password, hostname, and port sub-components of netloc -+ can also be accessed as attributes of the returned object. -+ -+ The scheme argument provides the default value of the scheme -+ component when no scheme is found in url. -+ -+ If allow_fragments is False, no attempt is made to separate the -+ fragment component from the previous component, which can be either -+ path or query. -+ -+ Note that % escapes are not expanded. -+ """ -+ -+ url, scheme, _coerce_result = _coerce_args(url, scheme) -+ # Only lstrip url as some applications rely on preserving trailing space. -+ # (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both) -+ url = url.lstrip(_WHATWG_C0_CONTROL_OR_SPACE) -+ scheme = scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE) -+ -+ for b in _UNSAFE_URL_BYTES_TO_REMOVE: -+ url = url.replace(b, "") -+ scheme = scheme.replace(b, "") -+ -+ allow_fragments = bool(allow_fragments) -+ key = url, scheme, allow_fragments, type(url), type(scheme) -+ cached = _parse_cache.get(key, None) -+ if cached: -+ return _coerce_result(cached) -+ if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth -+ clear_cache() -+ netloc = query = fragment = '' -+ i = url.find(':') -+ if i > 0: -+ for c in url[:i]: -+ if c not in scheme_chars: -+ break -+ else: -+ scheme, url = url[:i].lower(), url[i+1:] -+ -+ if url[:2] == '//': -+ netloc, url = _splitnetloc(url, 2) -+ if (('[' in netloc and ']' not in netloc) or -+ (']' in netloc and '[' not in netloc)): -+ raise ValueError("Invalid IPv6 URL") -+ if allow_fragments and '#' in url: -+ url, fragment = url.split('#', 1) -+ if '?' in url: -+ url, query = url.split('?', 1) -+ _checknetloc(netloc) -+ v = SplitResult(scheme, netloc, url, query, fragment) -+ _parse_cache[key] = v -+ return _coerce_result(v) -+ -+def urlunparse(components): -+ """Put a parsed URL back together again. This may result in a -+ slightly different, but equivalent URL, if the URL that was parsed -+ originally had redundant delimiters, e.g. a ? with an empty query -+ (the draft states that these are equivalent).""" -+ scheme, netloc, url, params, query, fragment, _coerce_result = ( -+ _coerce_args(*components)) -+ if params: -+ url = "%s;%s" % (url, params) -+ return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment))) -+ -+def urlunsplit(components): -+ """Combine the elements of a tuple as returned by urlsplit() into a -+ complete URL as a string. The data argument can be any five-item iterable. -+ This may result in a slightly different, but equivalent URL, if the URL that -+ was parsed originally had unnecessary delimiters (for example, a ? with an -+ empty query; the RFC states that these are equivalent).""" -+ scheme, netloc, url, query, fragment, _coerce_result = ( -+ _coerce_args(*components)) -+ if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): -+ if url and url[:1] != '/': url = '/' + url -+ url = '//' + (netloc or '') + url -+ if scheme: -+ url = scheme + ':' + url -+ if query: -+ url = url + '?' + query -+ if fragment: -+ url = url + '#' + fragment -+ return _coerce_result(url) -+ -+def urljoin(base, url, allow_fragments=True): -+ """Join a base URL and a possibly relative URL to form an absolute -+ interpretation of the latter.""" -+ if not base: -+ return url -+ if not url: -+ return base -+ -+ base, url, _coerce_result = _coerce_args(base, url) -+ bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ -+ urlparse(base, '', allow_fragments) -+ scheme, netloc, path, params, query, fragment = \ -+ urlparse(url, bscheme, allow_fragments) -+ -+ if scheme != bscheme or scheme not in uses_relative: -+ return _coerce_result(url) -+ if scheme in uses_netloc: -+ if netloc: -+ return _coerce_result(urlunparse((scheme, netloc, path, -+ params, query, fragment))) -+ netloc = bnetloc -+ -+ if not path and not params: -+ path = bpath -+ params = bparams -+ if not query: -+ query = bquery -+ return _coerce_result(urlunparse((scheme, netloc, path, -+ params, query, fragment))) -+ -+ base_parts = bpath.split('/') -+ if base_parts[-1] != '': -+ # the last item is not a directory, so will not be taken into account -+ # in resolving the relative path -+ del base_parts[-1] -+ -+ # for rfc3986, ignore all base path should the first character be root. -+ if path[:1] == '/': -+ segments = path.split('/') -+ else: -+ segments = base_parts + path.split('/') -+ # filter out elements that would cause redundant slashes on re-joining -+ # the resolved_path -+ segments[1:-1] = filter(None, segments[1:-1]) -+ -+ resolved_path = [] -+ -+ for seg in segments: -+ if seg == '..': -+ try: -+ resolved_path.pop() -+ except IndexError: -+ # ignore any .. segments that would otherwise cause an IndexError -+ # when popped from resolved_path if resolving for rfc3986 -+ pass -+ elif seg == '.': -+ continue -+ else: -+ resolved_path.append(seg) -+ -+ if segments[-1] in ('.', '..'): -+ # do some post-processing here. if the last segment was a relative dir, -+ # then we need to append the trailing '/' -+ resolved_path.append('') -+ -+ return _coerce_result(urlunparse((scheme, netloc, '/'.join( -+ resolved_path) or '/', params, query, fragment))) -+ -+ -+def urldefrag(url): -+ """Removes any existing fragment from URL. -+ -+ Returns a tuple of the defragmented URL and the fragment. If -+ the URL contained no fragments, the second element is the -+ empty string. -+ """ -+ url, _coerce_result = _coerce_args(url) -+ if '#' in url: -+ s, n, p, a, q, frag = urlparse(url) -+ defrag = urlunparse((s, n, p, a, q, '')) -+ else: -+ frag = '' -+ defrag = url -+ return _coerce_result(DefragResult(defrag, frag)) -+ -+_hexdig = '0123456789ABCDEFabcdef' -+_hextobyte = None -+ -+def unquote_to_bytes(string): -+ """unquote_to_bytes('abc%20def') -> b'abc def'.""" -+ # Note: strings are encoded as UTF-8. This is only an issue if it contains -+ # unescaped non-ASCII characters, which URIs should not. -+ if not string: -+ # Is it a string-like object? -+ string.split -+ return b'' -+ if isinstance(string, str): -+ string = string.encode('utf-8') -+ bits = string.split(b'%') -+ if len(bits) == 1: -+ return string -+ res = [bits[0]] -+ append = res.append -+ # Delay the initialization of the table to not waste memory -+ # if the function is never called -+ global _hextobyte -+ if _hextobyte is None: -+ _hextobyte = {(a + b).encode(): bytes.fromhex(a + b) -+ for a in _hexdig for b in _hexdig} -+ for item in bits[1:]: -+ try: -+ append(_hextobyte[item[:2]]) -+ append(item[2:]) -+ except KeyError: -+ append(b'%') -+ append(item) -+ return b''.join(res) -+ -+_asciire = re.compile('([\x00-\x7f]+)') -+ -+def unquote(string, encoding='utf-8', errors='replace'): -+ """Replace %xx escapes by their single-character equivalent. The optional -+ encoding and errors parameters specify how to decode percent-encoded -+ sequences into Unicode characters, as accepted by the bytes.decode() -+ method. -+ By default, percent-encoded sequences are decoded with UTF-8, and invalid -+ sequences are replaced by a placeholder character. -+ -+ unquote('abc%20def') -> 'abc def'. -+ """ -+ if isinstance(string, bytes): -+ return unquote_to_bytes(string).decode(encoding, errors) -+ if '%' not in string: -+ string.split -+ return string -+ if encoding is None: -+ encoding = 'utf-8' -+ if errors is None: -+ errors = 'replace' -+ bits = _asciire.split(string) -+ res = [bits[0]] -+ append = res.append -+ for i in range(1, len(bits), 2): -+ append(unquote_to_bytes(bits[i]).decode(encoding, errors)) -+ append(bits[i + 1]) -+ return ''.join(res) -+ -+ -+def parse_qs(qs, keep_blank_values=False, strict_parsing=False, -+ encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): -+ """Parse a query given as a string argument. -+ -+ Arguments: -+ -+ qs: percent-encoded query string to be parsed -+ -+ keep_blank_values: flag indicating whether blank values in -+ percent-encoded queries should be treated as blank strings. -+ A true value indicates that blanks should be retained as -+ blank strings. The default false value indicates that -+ blank values are to be ignored and treated as if they were -+ not included. -+ -+ strict_parsing: flag indicating what to do with parsing errors. -+ If false (the default), errors are silently ignored. -+ If true, errors raise a ValueError exception. -+ -+ encoding and errors: specify how to decode percent-encoded sequences -+ into Unicode characters, as accepted by the bytes.decode() method. -+ -+ max_num_fields: int. If set, then throws a ValueError if there -+ are more than n fields read by parse_qsl(). -+ -+ separator: str. The symbol to use for separating the query arguments. -+ Defaults to &. -+ -+ Returns a dictionary. -+ """ -+ parsed_result = {} -+ pairs = parse_qsl(qs, keep_blank_values, strict_parsing, -+ encoding=encoding, errors=errors, -+ max_num_fields=max_num_fields, separator=separator) -+ for name, value in pairs: -+ if name in parsed_result: -+ parsed_result[name].append(value) -+ else: -+ parsed_result[name] = [value] -+ return parsed_result -+ -+ -+def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, -+ encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): -+ """Parse a query given as a string argument. -+ -+ Arguments: -+ -+ qs: percent-encoded query string to be parsed -+ -+ keep_blank_values: flag indicating whether blank values in -+ percent-encoded queries should be treated as blank strings. -+ A true value indicates that blanks should be retained as blank -+ strings. The default false value indicates that blank values -+ are to be ignored and treated as if they were not included. -+ -+ strict_parsing: flag indicating what to do with parsing errors. If -+ false (the default), errors are silently ignored. If true, -+ errors raise a ValueError exception. -+ -+ encoding and errors: specify how to decode percent-encoded sequences -+ into Unicode characters, as accepted by the bytes.decode() method. -+ -+ max_num_fields: int. If set, then throws a ValueError -+ if there are more than n fields read by parse_qsl(). -+ -+ separator: str. The symbol to use for separating the query arguments. -+ Defaults to &. -+ -+ Returns a list, as G-d intended. -+ """ -+ qs, _coerce_result = _coerce_args(qs) -+ separator, _ = _coerce_args(separator) -+ -+ if not separator or (not isinstance(separator, (str, bytes))): -+ raise ValueError("Separator must be of type string or bytes.") -+ -+ # If max_num_fields is defined then check that the number of fields -+ # is less than max_num_fields. This prevents a memory exhaustion DOS -+ # attack via post bodies with many fields. -+ if max_num_fields is not None: -+ num_fields = 1 + qs.count(separator) -+ if max_num_fields < num_fields: -+ raise ValueError('Max number of fields exceeded') -+ -+ pairs = [s1 for s1 in qs.split(separator)] -+ r = [] -+ for name_value in pairs: -+ if not name_value and not strict_parsing: -+ continue -+ nv = name_value.split('=', 1) -+ if len(nv) != 2: -+ if strict_parsing: -+ raise ValueError("bad query field: %r" % (name_value,)) -+ # Handle case of a control-name with no equal sign -+ if keep_blank_values: -+ nv.append('') -+ else: -+ continue -+ if len(nv[1]) or keep_blank_values: -+ name = nv[0].replace('+', ' ') -+ name = unquote(name, encoding=encoding, errors=errors) -+ name = _coerce_result(name) -+ value = nv[1].replace('+', ' ') -+ value = unquote(value, encoding=encoding, errors=errors) -+ value = _coerce_result(value) -+ r.append((name, value)) -+ return r -+ -+def unquote_plus(string, encoding='utf-8', errors='replace'): -+ """Like unquote(), but also replace plus signs by spaces, as required for -+ unquoting HTML form values. -+ -+ unquote_plus('%7e/abc+def') -> '~/abc def' -+ """ -+ string = string.replace('+', ' ') -+ return unquote(string, encoding, errors) -+ -+_ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' -+ b'abcdefghijklmnopqrstuvwxyz' -+ b'0123456789' -+ b'_.-~') -+_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) -+_safe_quoters = {} -+ -+class Quoter(collections.defaultdict): -+ """A mapping from bytes (in range(0,256)) to strings. -+ -+ String values are percent-encoded byte values, unless the key < 128, and -+ in the "safe" set (either the specified safe set, or default set). -+ """ -+ # Keeps a cache internally, using defaultdict, for efficiency (lookups -+ # of cached keys don't call Python code at all). -+ def __init__(self, safe): -+ """safe: bytes object.""" -+ self.safe = _ALWAYS_SAFE.union(safe) -+ -+ def __repr__(self): -+ # Without this, will just display as a defaultdict -+ return "<%s %r>" % (self.__class__.__name__, dict(self)) -+ -+ def __missing__(self, b): -+ # Handle a cache miss. Store quoted string in cache and return. -+ res = chr(b) if b in self.safe else '%{:02X}'.format(b) -+ self[b] = res -+ return res -+ -+def quote(string, safe='/', encoding=None, errors=None): -+ """quote('abc def') -> 'abc%20def' -+ -+ Each part of a URL, e.g. the path info, the query, etc., has a -+ different set of reserved characters that must be quoted. The -+ quote function offers a cautious (not minimal) way to quote a -+ string for most of these parts. -+ -+ RFC 3986 Uniform Resource Identifier (URI): Generic Syntax lists -+ the following (un)reserved characters. -+ -+ unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" -+ reserved = gen-delims / sub-delims -+ gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" -+ sub-delims = "!" / "$" / "&" / "'" / "(" / ")" -+ / "*" / "+" / "," / ";" / "=" -+ -+ Each of the reserved characters is reserved in some component of a URL, -+ but not necessarily in all of them. -+ -+ The quote function %-escapes all characters that are neither in the -+ unreserved chars ("always safe") nor the additional chars set via the -+ safe arg. -+ -+ The default for the safe arg is '/'. The character is reserved, but in -+ typical usage the quote function is being called on a path where the -+ existing slash characters are to be preserved. -+ -+ Python 3.7 updates from using RFC 2396 to RFC 3986 to quote URL strings. -+ Now, "~" is included in the set of unreserved characters. -+ -+ string and safe may be either str or bytes objects. encoding and errors -+ must not be specified if string is a bytes object. -+ -+ The optional encoding and errors parameters specify how to deal with -+ non-ASCII characters, as accepted by the str.encode method. -+ By default, encoding='utf-8' (characters are encoded with UTF-8), and -+ errors='strict' (unsupported characters raise a UnicodeEncodeError). -+ """ -+ if isinstance(string, str): -+ if not string: -+ return string -+ if encoding is None: -+ encoding = 'utf-8' -+ if errors is None: -+ errors = 'strict' -+ string = string.encode(encoding, errors) -+ else: -+ if encoding is not None: -+ raise TypeError("quote() doesn't support 'encoding' for bytes") -+ if errors is not None: -+ raise TypeError("quote() doesn't support 'errors' for bytes") -+ return quote_from_bytes(string, safe) -+ -+def quote_plus(string, safe='', encoding=None, errors=None): -+ """Like quote(), but also replace ' ' with '+', as required for quoting -+ HTML form values. Plus signs in the original string are escaped unless -+ they are included in safe. It also does not have safe default to '/'. -+ """ -+ # Check if ' ' in string, where string may either be a str or bytes. If -+ # there are no spaces, the regular quote will produce the right answer. -+ if ((isinstance(string, str) and ' ' not in string) or -+ (isinstance(string, bytes) and b' ' not in string)): -+ return quote(string, safe, encoding, errors) -+ if isinstance(safe, str): -+ space = ' ' -+ else: -+ space = b' ' -+ string = quote(string, safe + space, encoding, errors) -+ return string.replace(' ', '+') -+ -+def quote_from_bytes(bs, safe='/'): -+ """Like quote(), but accepts a bytes object rather than a str, and does -+ not perform string-to-bytes encoding. It always returns an ASCII string. -+ quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f' -+ """ -+ if not isinstance(bs, (bytes, bytearray)): -+ raise TypeError("quote_from_bytes() expected bytes") -+ if not bs: -+ return '' -+ if isinstance(safe, str): -+ # Normalize 'safe' by converting to bytes and removing non-ASCII chars -+ safe = safe.encode('ascii', 'ignore') -+ else: -+ safe = bytes([c for c in safe if c < 128]) -+ if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe): -+ return bs.decode() -+ try: -+ quoter = _safe_quoters[safe] -+ except KeyError: -+ _safe_quoters[safe] = quoter = Quoter(safe).__getitem__ -+ return ''.join([quoter(char) for char in bs]) -+ -+def urlencode(query, doseq=False, safe='', encoding=None, errors=None, -+ quote_via=quote_plus): -+ """Encode a dict or sequence of two-element tuples into a URL query string. -+ -+ If any values in the query arg are sequences and doseq is true, each -+ sequence element is converted to a separate parameter. -+ -+ If the query arg is a sequence of two-element tuples, the order of the -+ parameters in the output will match the order of parameters in the -+ input. -+ -+ The components of a query arg may each be either a string or a bytes type. -+ -+ The safe, encoding, and errors parameters are passed down to the function -+ specified by quote_via (encoding and errors only if a component is a str). -+ """ -+ -+ if hasattr(query, "items"): -+ query = query.items() -+ else: -+ # It's a bother at times that strings and string-like objects are -+ # sequences. -+ try: -+ # non-sequence items should not work with len() -+ # non-empty strings will fail this -+ if len(query) and not isinstance(query[0], tuple): -+ raise TypeError -+ # Zero-length sequences of all types will get here and succeed, -+ # but that's a minor nit. Since the original implementation -+ # allowed empty dicts that type of behavior probably should be -+ # preserved for consistency -+ except TypeError: -+ ty, va, tb = sys.exc_info() -+ raise TypeError("not a valid non-string sequence " -+ "or mapping object").with_traceback(tb) -+ -+ l = [] -+ if not doseq: -+ for k, v in query: -+ if isinstance(k, bytes): -+ k = quote_via(k, safe) -+ else: -+ k = quote_via(str(k), safe, encoding, errors) -+ -+ if isinstance(v, bytes): -+ v = quote_via(v, safe) -+ else: -+ v = quote_via(str(v), safe, encoding, errors) -+ l.append(k + '=' + v) -+ else: -+ for k, v in query: -+ if isinstance(k, bytes): -+ k = quote_via(k, safe) -+ else: -+ k = quote_via(str(k), safe, encoding, errors) -+ -+ if isinstance(v, bytes): -+ v = quote_via(v, safe) -+ l.append(k + '=' + v) -+ elif isinstance(v, str): -+ v = quote_via(v, safe, encoding, errors) -+ l.append(k + '=' + v) -+ else: -+ try: -+ # Is this a sufficient test for sequence-ness? -+ x = len(v) -+ except TypeError: -+ # not a sequence -+ v = quote_via(str(v), safe, encoding, errors) -+ l.append(k + '=' + v) -+ else: -+ # loop over the sequence -+ for elt in v: -+ if isinstance(elt, bytes): -+ elt = quote_via(elt, safe) -+ else: -+ elt = quote_via(str(elt), safe, encoding, errors) -+ l.append(k + '=' + elt) -+ return '&'.join(l) -+ -+ -+def to_bytes(url): -+ warnings.warn("urllib.parse.to_bytes() is deprecated as of 3.8", -+ DeprecationWarning, stacklevel=2) -+ return _to_bytes(url) -+ -+ -+def _to_bytes(url): -+ """to_bytes(u"URL") --> 'URL'.""" -+ # Most URL schemes require ASCII. If that changes, the conversion -+ # can be relaxed. -+ # XXX get rid of to_bytes() -+ if isinstance(url, str): -+ try: -+ url = url.encode("ASCII").decode() -+ except UnicodeError: -+ raise UnicodeError("URL " + repr(url) + -+ " contains non-ASCII characters") -+ return url -+ -+ -+def unwrap(url): -+ """Transform a string like '' into 'scheme://host/path'. -+ -+ The string is returned unchanged if it's not a wrapped URL. -+ """ -+ url = str(url).strip() -+ if url[:1] == '<' and url[-1:] == '>': -+ url = url[1:-1].strip() -+ if url[:4] == 'URL:': -+ url = url[4:].strip() -+ return url -+ -+ -+def splittype(url): -+ warnings.warn("urllib.parse.splittype() is deprecated as of 3.8, " -+ "use urllib.parse.urlparse() instead", -+ DeprecationWarning, stacklevel=2) -+ return _splittype(url) -+ -+ -+_typeprog = None -+def _splittype(url): -+ """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" -+ global _typeprog -+ if _typeprog is None: -+ _typeprog = re.compile('([^/:]+):(.*)', re.DOTALL) -+ -+ match = _typeprog.match(url) -+ if match: -+ scheme, data = match.groups() -+ return scheme.lower(), data -+ return None, url -+ -+ -+def splithost(url): -+ warnings.warn("urllib.parse.splithost() is deprecated as of 3.8, " -+ "use urllib.parse.urlparse() instead", -+ DeprecationWarning, stacklevel=2) -+ return _splithost(url) -+ -+ -+_hostprog = None -+def _splithost(url): -+ """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" -+ global _hostprog -+ if _hostprog is None: -+ _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL) -+ -+ match = _hostprog.match(url) -+ if match: -+ host_port, path = match.groups() -+ if path and path[0] != '/': -+ path = '/' + path -+ return host_port, path -+ return None, url -+ -+ -+def splituser(host): -+ warnings.warn("urllib.parse.splituser() is deprecated as of 3.8, " -+ "use urllib.parse.urlparse() instead", -+ DeprecationWarning, stacklevel=2) -+ return _splituser(host) -+ -+ -+def _splituser(host): -+ """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" -+ user, delim, host = host.rpartition('@') -+ return (user if delim else None), host -+ -+ -+def splitpasswd(user): -+ warnings.warn("urllib.parse.splitpasswd() is deprecated as of 3.8, " -+ "use urllib.parse.urlparse() instead", -+ DeprecationWarning, stacklevel=2) -+ return _splitpasswd(user) -+ -+ -+def _splitpasswd(user): -+ """splitpasswd('user:passwd') -> 'user', 'passwd'.""" -+ user, delim, passwd = user.partition(':') -+ return user, (passwd if delim else None) -+ -+ -+def splitport(host): -+ warnings.warn("urllib.parse.splitport() is deprecated as of 3.8, " -+ "use urllib.parse.urlparse() instead", -+ DeprecationWarning, stacklevel=2) -+ return _splitport(host) -+ -+ -+# splittag('/path#tag') --> '/path', 'tag' -+_portprog = None -+def _splitport(host): -+ """splitport('host:port') --> 'host', 'port'.""" -+ global _portprog -+ if _portprog is None: -+ _portprog = re.compile('(.*):([0-9]*)', re.DOTALL) -+ -+ match = _portprog.fullmatch(host) -+ if match: -+ host, port = match.groups() -+ if port: -+ return host, port -+ return host, None -+ -+ -+def splitnport(host, defport=-1): -+ warnings.warn("urllib.parse.splitnport() is deprecated as of 3.8, " -+ "use urllib.parse.urlparse() instead", -+ DeprecationWarning, stacklevel=2) -+ return _splitnport(host, defport) -+ -+ -+def _splitnport(host, defport=-1): -+ """Split host and port, returning numeric port. -+ Return given default port if no ':' found; defaults to -1. -+ Return numerical port if a valid number are found after ':'. -+ Return None if ':' but not a valid number.""" -+ host, delim, port = host.rpartition(':') -+ if not delim: -+ host = port -+ elif port: -+ try: -+ nport = int(port) -+ except ValueError: -+ nport = None -+ return host, nport -+ return host, defport -+ -+ -+def splitquery(url): -+ warnings.warn("urllib.parse.splitquery() is deprecated as of 3.8, " -+ "use urllib.parse.urlparse() instead", -+ DeprecationWarning, stacklevel=2) -+ return _splitquery(url) -+ -+ -+def _splitquery(url): -+ """splitquery('/path?query') --> '/path', 'query'.""" -+ path, delim, query = url.rpartition('?') -+ if delim: -+ return path, query -+ return url, None -+ -+ -+def splittag(url): -+ warnings.warn("urllib.parse.splittag() is deprecated as of 3.8, " -+ "use urllib.parse.urlparse() instead", -+ DeprecationWarning, stacklevel=2) -+ return _splittag(url) -+ -+ -+def _splittag(url): -+ """splittag('/path#tag') --> '/path', 'tag'.""" -+ path, delim, tag = url.rpartition('#') -+ if delim: -+ return path, tag -+ return url, None -+ -+ -+def splitattr(url): -+ warnings.warn("urllib.parse.splitattr() is deprecated as of 3.8, " -+ "use urllib.parse.urlparse() instead", -+ DeprecationWarning, stacklevel=2) -+ return _splitattr(url) -+ -+ -+def _splitattr(url): -+ """splitattr('/path;attr1=value1;attr2=value2;...') -> -+ '/path', ['attr1=value1', 'attr2=value2', ...].""" -+ words = url.split(';') -+ return words[0], words[1:] -+ -+ -+def splitvalue(attr): -+ warnings.warn("urllib.parse.splitvalue() is deprecated as of 3.8, " -+ "use urllib.parse.parse_qsl() instead", -+ DeprecationWarning, stacklevel=2) -+ return _splitvalue(attr) -+ -+ -+def _splitvalue(attr): -+ """splitvalue('attr=value') --> 'attr', 'value'.""" -+ attr, delim, value = attr.partition('=') -+ return attr, (value if delim else None) diff --git a/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst b/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst new file mode 100644 -index 0000000..81e5904 +index 0000000..4ba1759 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst @@ -0,0 +1,2 @@ ++Add checks to ensure that bracketed hosts found by urlsplit are of IPv6 or ++IPvFuture format +-- +2.34.1 + + +From eea60813b908105536e0c759909217b011ba226b Mon Sep 17 00:00:00 2001 +From: "Gregory P. Smith" +Date: Tue, 9 May 2023 08:41:46 -0700 +Subject: [PATCH 2/4] ReSTify NEWS. + +Signed-off-by: ankita +--- + .../Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst b/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst +index 4ba1759..81e5904 100644 +--- a/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst ++++ b/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst +@@ -1,2 +1,2 @@ +-Add checks to ensure that bracketed hosts found by urlsplit are of IPv6 or +-IPvFuture format +Add checks to ensure that ``[`` bracketed ``]`` hosts found by +:func:`urllib.parse.urlsplit` are of IPv6 or IPvFuture format. -- 2.34.1 + +From 3f8dcc1a85c173308d2a3ef2f0f52267304a59bc Mon Sep 17 00:00:00 2001 +From: JohnJamesUtley +Date: Tue, 9 May 2023 16:21:02 -0400 +Subject: [PATCH 3/4] Splits bracketed host tests in two, replaces rpartition + for host brackets, adds comments, and a new test + +Signed-off-by: ankita +--- + Lib/test/test_urlparse.py | 5 ++++- + Lib/urllib/parse.py | 6 +++--- + 2 files changed, 7 insertions(+), 4 deletions(-) + +diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py +index 5d82358..4488589 100644 +--- a/Lib/test/test_urlparse.py ++++ b/Lib/test/test_urlparse.py +@@ -1071,7 +1071,7 @@ class UrlParseTestCase(unittest.TestCase): + self.assertEqual(p2.scheme, 'tel') + self.assertEqual(p2.path, '+31641044153') + +- def test_splitting_bracketed_hosts(self): ++ def test_invalid_bracketed_hosts(self): + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query') +@@ -1081,6 +1081,9 @@ class UrlParseTestCase(unittest.TestCase): + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path') ++ ++ def test_splitting_bracketed_hosts(self): + p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') + self.assertEqual(p1.hostname, 'v6a.ip') + self.assertEqual(p1.username, 'user') +diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py +index 5ab115b..2eb3448 100644 +--- a/Lib/urllib/parse.py ++++ b/Lib/urllib/parse.py +@@ -213,7 +213,7 @@ class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): + _, _, hostinfo = netloc.rpartition('@') + _, have_open_br, bracketed = hostinfo.partition('[') + if have_open_br: +- hostname, _, port = bracketed.rpartition(']') ++ hostname, _, port = bracketed.partition(']') + _, _, port = port.partition(':') + else: + hostname, _, port = hostinfo.partition(':') +@@ -243,7 +243,7 @@ class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes): + _, _, hostinfo = netloc.rpartition(b'@') + _, have_open_br, bracketed = hostinfo.partition(b'[') + if have_open_br: +- hostname, _, port = bracketed.rpartition(b']') ++ hostname, _, port = bracketed.partition(b']') + _, _, port = port.partition(b':') + else: + hostname, _, port = hostinfo.partition(b':') +@@ -506,7 +506,7 @@ def urlsplit(url, scheme='', allow_fragments=True): + (']' in netloc and '[' not in netloc)): + raise ValueError("Invalid IPv6 URL") + if '[' in netloc and ']' in netloc: +- bracketed_host = netloc.partition('[')[2].rpartition(']')[0] ++ bracketed_host = netloc.partition('[')[2].partition(']')[0] + _check_bracketed_host(bracketed_host) + if allow_fragments and '#' in url: + url, fragment = url.split('#', 1) +-- +2.34.1 + + +From 307ac68e88e93789e82eb002b7ce52d46d415f9a Mon Sep 17 00:00:00 2001 +From: "Gregory P. Smith" +Date: Tue, 9 May 2023 16:53:54 -0700 +Subject: [PATCH 4/4] remove trailing spaces + +Signed-off-by: ankita +--- + Lib/test/test_urlparse.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py +index 4488589..c84df23 100644 +--- a/Lib/test/test_urlparse.py ++++ b/Lib/test/test_urlparse.py +@@ -1082,7 +1082,7 @@ class UrlParseTestCase(unittest.TestCase): + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query') + self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path') +- ++ + def test_splitting_bracketed_hosts(self): + p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') + self.assertEqual(p1.hostname, 'v6a.ip') +-- +2.34.1 +