Skip to content

Commit 11b9602

Browse files
committed
GH-85168: Use filesystem encoding when converting to/from file URIs
Adjust `urllib.request.url2pathname()` and `pathname2url()` to use the filesystem encoding when quoting and unquoting file URIs, rather than forcing use of UTF-8. No changes are needed in the `nturl2path` module because Windows always uses UTF-8, per PEP 529.
1 parent 3fecbe9 commit 11b9602

File tree

4 files changed

+20
-11
lines changed

4 files changed

+20
-11
lines changed

Lib/test/test_urllib.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -709,10 +709,6 @@ def tearDown(self):
709709

710710
def constructLocalFileUrl(self, filePath):
711711
filePath = os.path.abspath(filePath)
712-
try:
713-
filePath.encode("utf-8")
714-
except UnicodeEncodeError:
715-
raise unittest.SkipTest("filePath is not encodable to utf8")
716712
return "file://%s" % urllib.request.pathname2url(filePath)
717713

718714
def createNewTempFile(self, data=b""):
@@ -1561,6 +1557,13 @@ def test_pathname2url_posix(self):
15611557
self.assertEqual(fn('/'), '/')
15621558
self.assertEqual(fn('/a/b.c'), '/a/b.c')
15631559
self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
1560+
try:
1561+
expect = os.fsencode('\xe9')
1562+
except UnicodeEncodeError:
1563+
pass
1564+
else:
1565+
expect = urllib.parse.quote_from_bytes(expect)
1566+
self.assertEqual(fn('\xe9'), expect)
15641567

15651568
@unittest.skipUnless(sys.platform == 'win32',
15661569
'test specific to Windows pathnames.')
@@ -1611,6 +1614,12 @@ def test_url2pathname_posix(self):
16111614
self.assertEqual(fn('///foo/bar'), '/foo/bar')
16121615
self.assertEqual(fn('////foo/bar'), '//foo/bar')
16131616
self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
1617+
try:
1618+
expect = os.fsdecode(b'\xe9')
1619+
except UnicodeDecodeError:
1620+
pass
1621+
else:
1622+
self.assertEqual(fn('%e9'), expect)
16141623

16151624
class Utility_Tests(unittest.TestCase):
16161625
"""Testcase to test the various utility functions in the urllib."""

Lib/test/test_urllib2.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -718,10 +718,6 @@ def test_processors(self):
718718

719719

720720
def sanepathname2url(path):
721-
try:
722-
path.encode("utf-8")
723-
except UnicodeEncodeError:
724-
raise unittest.SkipTest("path is not encodable to utf8")
725721
urlpath = urllib.request.pathname2url(path)
726722
if os.name == "nt" and urlpath.startswith("///"):
727723
urlpath = urlpath[2:]

Lib/urllib/request.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@
103103
urlparse, urlsplit, urljoin, unwrap, quote, unquote,
104104
_splittype, _splithost, _splitport, _splituser, _splitpasswd,
105105
_splitattr, _splitquery, _splitvalue, _splittag, _to_bytes,
106-
unquote_to_bytes, urlunparse)
106+
quote_from_bytes, unquote_to_bytes, urlunparse)
107107
from urllib.response import addinfourl, addclosehook
108108

109109
# check for SSL
@@ -1660,12 +1660,12 @@ def url2pathname(pathname):
16601660
# URL has an empty authority section, so the path begins on the
16611661
# third character.
16621662
pathname = pathname[2:]
1663-
return unquote(pathname)
1663+
return os.fsdecode(unquote_to_bytes(pathname))
16641664

16651665
def pathname2url(pathname):
16661666
"""OS-specific conversion from a file system path to a relative URL
16671667
of the 'file' scheme; not recommended for general use."""
1668-
return quote(pathname)
1668+
return quote_from_bytes(os.fsencode(pathname))
16691669

16701670

16711671
ftpcache = {}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix issue where :func:`urllib.request.url2pathname` and
2+
:func:`~urllib.request.pathname2url` always used UTF-8 when quoting and
3+
unquoting file URIs. They now use the :term:`filesystem encoding and error
4+
handler`.

0 commit comments

Comments
 (0)