Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/w3lib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,5 @@ w3lib Package

.. automodule:: w3lib.url
:members:

.. autoclass:: ParseDataURIResult
5 changes: 0 additions & 5 deletions run-mypy.sh

This file was deleted.

2 changes: 1 addition & 1 deletion tests/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def test_illegal_entities(self):
def test_browser_hack(self):
# check browser hack for numeric character references in the 80-9F range
self.assertEqual(replace_entities("x™y", encoding="cp1252"), "x\u2122y")
self.assertEqual(replace_entities("x™y", encoding="cp1252"), u"x\u2122y")
self.assertEqual(replace_entities("x™y", encoding="cp1252"), "x\u2122y")

def test_missing_semicolon(self):
for entity, result in (
Expand Down
12 changes: 6 additions & 6 deletions w3lib/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,11 @@ def safe_url_string(
# IDNA encoding can fail for too long labels (>63 characters)
# or missing labels (e.g. http://.example.com)
try:
netloc = parts.netloc.encode("idna")
netloc_bytes = parts.netloc.encode("idna")
except UnicodeError:
netloc = parts.netloc.encode("utf-8")
netloc = parts.netloc
else:
netloc = netloc_bytes.decode()

# default encoding for path component SHOULD be UTF-8
if quote_path:
Expand All @@ -102,7 +104,7 @@ def safe_url_string(
return urlunsplit(
(
parts.scheme,
netloc.decode().rstrip(":"),
netloc.rstrip(":"),
path,
quote(parts.query.encode(encoding), _safe_chars),
quote(parts.fragment.encode(encoding), _safe_chars),
Expand Down Expand Up @@ -370,9 +372,7 @@ def any_to_uri(uri_or_path: str) -> str:
ParseDataURIResult.__doc__ = "The return value type of `w3lib.url.parse_data_uri`."


# If we add the return type hint sphinx would error:
# w3lib/url.py:docstring of w3lib.url.parse_data_uri::py:class reference target not found: w3lib.url.ParseDataURIResult
def parse_data_uri(uri: StrOrBytes): # type: ignore
def parse_data_uri(uri: StrOrBytes) -> ParseDataURIResult:
"""

Parse a data: URI, returning a 3-tuple of media type, dictionary of media
Expand Down