Skip to content

Commit 9bc38d2

Browse files
authored
Merge pull request #63 from redapple/safe-url-idna-error
Handle IDNA encoding failures when making URLs safe
2 parents bfcdde7 + 10d1faf commit 9bc38d2

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

tests/test_url.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,20 @@ def test_safe_url_idna(self):
165165
safeurl = safe_url_string(safe_result)
166166
self.assertEqual(safeurl, safe_result)
167167

168+
def test_safe_url_idna_encoding_failure(self):
169+
# missing DNS label
170+
self.assertEqual(
171+
safe_url_string(u"http://.example.com/résumé?q=résumé"),
172+
"http://.example.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9")
173+
174+
# DNS label too long
175+
self.assertEqual(
176+
safe_url_string(
177+
u"http://www.{label}.com/résumé?q=résumé".format(
178+
label=u"example"*11)),
179+
"http://www.{label}.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9".format(
180+
label=u"example"*11))
181+
168182
def test_safe_download_url(self):
169183
self.assertEqual(safe_download_url('http://www.example.org'),
170184
'http://www.example.org/')

w3lib/url.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,18 @@ def safe_url_string(url, encoding='utf8', path_encoding='utf8'):
9090
parts = urlsplit(to_unicode(url, encoding=encoding,
9191
errors='percentencode'))
9292

93+
# IDNA encoding can fail for too long labels (>63 characters)
94+
# or missing labels (e.g. http://.example.com)
95+
try:
96+
netloc = parts.netloc.encode('idna')
97+
except UnicodeError:
98+
netloc = parts.netloc
99+
93100
# quote() in Python2 return type follows input type;
94101
# quote() in Python3 always returns Unicode (native str)
95102
return urlunsplit((
96103
to_native_str(parts.scheme),
97-
to_native_str(parts.netloc.encode('idna')),
104+
to_native_str(netloc),
98105

99106
# default encoding for path component SHOULD be UTF-8
100107
quote(to_bytes(parts.path, path_encoding), _safe_chars),

0 commit comments

Comments
 (0)