Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,18 @@ def test_scheme_case_insensitive(self):
result = parse_data_uri("DaTa:,A%20brief%20note")
self.assertEqual(result.data, b"A brief note")

def test_safe_url_string_encode_idna_domain_with_port(self):
self.assertEqual(safe_url_string('http://新华网.中国:80'),
'http://xn--xkrr14bows.xn--fiqs8s:80')

def test_safe_url_string_encode_idna_domain_with_username_password_and_port_number(self):
self.assertEqual(safe_url_string('ftp://admin:admin@新华网.中国:21'),
'ftp://admin:[email protected]:21')

def test_safe_url_string_encode_idna_domain_with_username_without_password_and_port_number(self):
self.assertEqual(safe_url_string('ftp://admin:@新华网.中国:21'),
'ftp://admin:@xn--xkrr14bows.xn--fiqs8s:21')


if __name__ == "__main__":
unittest.main()
18 changes: 17 additions & 1 deletion w3lib/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,26 @@ def safe_url_string(
decoded = to_unicode(url, encoding=encoding, errors="percentencode")
parts = urlsplit(_ascii_tab_newline_re.sub("", decoded))

username, password, hostname, port_number = (
parts.username,
parts.password,
parts.hostname,
parts.port
)
netloc_bytes = b''

# IDNA encoding can fail for too long labels (>63 characters)
# or missing labels (e.g. http://.example.com)
try:
netloc_bytes = parts.netloc.encode("idna")
# When we have hostname we use it instead of netloc directly
if hostname:
if isinstance(username, str) and isinstance(password, str):
netloc_bytes += f'{username}:{password}@'.encode("idna")
netloc_bytes += hostname.encode("idna")
if port_number:
netloc_bytes += f":{port_number}".encode("idna")
else:
netloc_bytes = parts.netloc.encode("idna")
except UnicodeError:
netloc = parts.netloc
else:
Expand Down