Skip to content

Commit 10d1faf

Browse files
committed
Handle IDNA encoding failures for netloc part
1 parent c25e8b4 commit 10d1faf

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

w3lib/url.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,18 @@ def safe_url_string(url, encoding='utf8', path_encoding='utf8'):
9090
parts = urlsplit(to_unicode(url, encoding=encoding,
9191
errors='percentencode'))
9292

93+
# IDNA encoding can fail for too long labels (>63 characters)
94+
# or missing labels (e.g. http://.example.com)
95+
try:
96+
netloc = parts.netloc.encode('idna')
97+
except UnicodeError:
98+
netloc = parts.netloc
99+
93100
# quote() in Python2 return type follows input type;
94101
# quote() in Python3 always returns Unicode (native str)
95102
return urlunsplit((
96103
to_native_str(parts.scheme),
97-
to_native_str(parts.netloc.encode('idna')),
104+
to_native_str(netloc),
98105

99106
# default encoding for path component SHOULD be UTF-8
100107
quote(to_bytes(parts.path, path_encoding), _safe_chars),

0 commit comments

Comments
 (0)