|
| 1 | +# -*- coding: utf-8 -*- |
1 | 2 | # Copyright (C) 2022 ActiveState Software Inc. |
2 | 3 | # test_urlparse.py is licensed under the PSFLv2 License. |
3 | 4 | # See the file LICENSE for details. |
|
8 | 9 | import unittest |
9 | 10 | import urlparse |
10 | 11 |
|
| 12 | +# Add ability to run sub-tests |
| 13 | +def sub_test(param_list): |
| 14 | + """Decorates a test case to run it as a set of subtests.""" |
| 15 | + |
| 16 | + def decorator(f): |
| 17 | + |
| 18 | + @functools.wraps(f) |
| 19 | + def wrapped(self): |
| 20 | + for param in param_list: |
| 21 | + with self.subTest(**param): |
| 22 | + f(self, **param) |
| 23 | + |
| 24 | + return wrapped |
| 25 | + |
| 26 | + return decorator |
| 27 | + |
| 28 | + |
11 | 29 | RFC1808_BASE = "http://a/b/c/d;p?q#f" |
12 | 30 | RFC2396_BASE = "http://a/b/c/d;p?q" |
13 | 31 | RFC3986_BASE = 'http://a/b/c/d;p?q' |
@@ -602,16 +620,92 @@ def test_urlsplit_remove_unsafe_bytes(self): |
602 | 620 | self.assertEqual(p.port, None) |
603 | 621 | self.assertEqual(p.geturl(), u"http://www.python.org/javascript:alert('msg')/#frag") |
604 | 622 |
|
| 623 | + def test_urlsplit_strip_url(self): |
| 624 | + noise = bytes(bytearray(range(0, 0x20 + 1))) |
| 625 | + base_url = "http://User:[email protected]:080/doc/?query=yes#frag" |
605 | 626 |
|
606 | | - def test_attributes_bad_port(self): |
607 | | - """Check handling of non-integer ports.""" |
608 | | - p = urlparse.urlsplit("http://www.example.net:foo") |
609 | | - self.assertEqual(p.netloc, "www.example.net:foo") |
610 | | - self.assertRaises(ValueError, lambda: p.port) |
| 627 | + url = noise.decode("utf-8") + base_url |
| 628 | + p = urlparse.urlsplit(url) |
| 629 | + self.assertEqual(p.scheme, "http") |
| 630 | + self. assertEqual( p. netloc, "User:[email protected]:080") |
| 631 | + self.assertEqual(p.path, "/doc/") |
| 632 | + self.assertEqual(p.query, "query=yes") |
| 633 | + self.assertEqual(p.fragment, "frag") |
| 634 | + self.assertEqual(p.username, "User") |
| 635 | + self.assertEqual(p.password, "Pass") |
| 636 | + self.assertEqual(p.hostname, "www.python.org") |
| 637 | + self.assertEqual(p.port, 80) |
| 638 | + self.assertEqual(p.geturl(), base_url) |
611 | 639 |
|
612 | | - p = urlparse.urlparse("http://www.example.net:foo") |
613 | | - self.assertEqual(p.netloc, "www.example.net:foo") |
614 | | - self.assertRaises(ValueError, lambda: p.port) |
| 640 | + url = noise + base_url.encode("utf-8") |
| 641 | + p = urlparse.urlsplit(url) |
| 642 | + self.assertEqual(p.scheme, b"http") |
| 643 | + self. assertEqual( p. netloc, b"User:[email protected]:080") |
| 644 | + self.assertEqual(p.path, b"/doc/") |
| 645 | + self.assertEqual(p.query, b"query=yes") |
| 646 | + self.assertEqual(p.fragment, b"frag") |
| 647 | + self.assertEqual(p.username, b"User") |
| 648 | + self.assertEqual(p.password, b"Pass") |
| 649 | + self.assertEqual(p.hostname, b"www.python.org") |
| 650 | + self.assertEqual(p.port, 80) |
| 651 | + self.assertEqual(p.geturl(), base_url.encode("utf-8")) |
| 652 | + |
| 653 | + # Test that trailing space is preserved as some applications rely on |
| 654 | + # this within query strings. |
| 655 | + query_spaces_url = "https://www.python.org:88/doc/?query= " |
| 656 | + p = urlparse.urlsplit(noise.decode("utf-8") + query_spaces_url) |
| 657 | + self.assertEqual(p.scheme, "https") |
| 658 | + self.assertEqual(p.netloc, "www.python.org:88") |
| 659 | + self.assertEqual(p.path, "/doc/") |
| 660 | + self.assertEqual(p.query, "query= ") |
| 661 | + self.assertEqual(p.port, 88) |
| 662 | + self.assertEqual(p.geturl(), query_spaces_url) |
| 663 | + |
| 664 | + p = urlparse.urlsplit("www.pypi.org ") |
| 665 | + # That "hostname" gets considered a "path" due to the |
| 666 | + # trailing space and our existing logic... YUCK... |
| 667 | + # and re-assembles via geturl aka unurlsplit into the original. |
| 668 | + # django.core.validators.URLValidator (at least through v3.2) relies on |
| 669 | + # this, for better or worse, to catch it in a ValidationError via its |
| 670 | + # regular expressions. |
| 671 | + # Here we test the basic round trip concept of such a trailing space. |
| 672 | + self.assertEqual(urlparse.urlunsplit(p), "www.pypi.org ") |
| 673 | + |
| 674 | + # with scheme as cache-key |
| 675 | + url = "//www.python.org/" |
| 676 | + scheme = noise.decode("utf-8") + "https" + noise.decode("utf-8") |
| 677 | + for _ in range(2): |
| 678 | + p = urlparse.urlsplit(url, scheme=scheme) |
| 679 | + self.assertEqual(p.scheme, "https") |
| 680 | + self.assertEqual(p.geturl(), "https://www.python.org/") |
| 681 | + |
| 682 | + def test_attributes_bad_port_a(self): |
| 683 | + """Check handling of invalid ports.""" |
| 684 | + for bytes in (False, True): |
| 685 | + for parse in (urlparse.urlsplit, urlparse.urlparse): |
| 686 | + # Spaces and invalid characters are stripped now, so the missing one's can't cause issues |
| 687 | + # for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६"): |
| 688 | + for port in ("foo", "1.5", "0x10", "1_1"): |
| 689 | + netloc = "www.example.net:" + port |
| 690 | + url = "http://" + netloc + "/" |
| 691 | + if bytes: |
| 692 | + netloc = netloc.encode("ascii") |
| 693 | + url = url.encode("ascii") |
| 694 | + p = parse(url) |
| 695 | + self.assertEqual(p.netloc, netloc) |
| 696 | + with self.assertRaises(ValueError): |
| 697 | + p.port |
| 698 | + |
| 699 | + def test_attributes_bad_port_b(self): |
| 700 | + """Check handling of invalid ports.""" |
| 701 | + for parse in (urlparse.urlsplit, urlparse.urlparse): |
| 702 | + for port in ("६"): |
| 703 | + netloc = "www.example.net:" + port |
| 704 | + url = "http://" + netloc + "/" |
| 705 | + p = parse(url) |
| 706 | + self.assertEqual(p.netloc, netloc) |
| 707 | + with self.assertRaises(ValueError): |
| 708 | + p.port |
615 | 709 |
|
616 | 710 | def test_attributes_without_netloc(self): |
617 | 711 | # This example is straight from RFC 3261. It looks like it |
|
0 commit comments