From 50231c5752206b5169bc261ba7c563db6231e048 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 14 Nov 2024 21:02:53 +0000 Subject: [PATCH 01/20] GH-126838: `url2pathname()`: handle non-empty authority section on POSIX Adjust `urllib.request.url2pathname()` to parse the URL authority and path with `urlsplit()` on POSIX. If the authority is empty or resolves to the current host, it is ignored and the URL path is used as the pathname. If not, we raise `URLError`. --- Lib/test/test_urllib.py | 9 ++-- Lib/urllib/request.py | 46 ++++++------------- ...-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst | 4 ++ 3 files changed, 25 insertions(+), 34 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 2c53ce3f99e675..f41e37f2b490b6 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -13,6 +13,7 @@ from test.support import socket_helper from test.support import warnings_helper import os +import socket try: import ssl except ImportError: @@ -713,7 +714,7 @@ def constructLocalFileUrl(self, filePath): filePath.encode("utf-8") except UnicodeEncodeError: raise unittest.SkipTest("filePath is not encodable to utf8") - return "file://%s" % urllib.request.pathname2url(filePath) + return "file:%s" % urllib.request.pathname2url(filePath) def createNewTempFile(self, data=b""): """Creates a new temporary file containing the specified data, @@ -1607,10 +1608,12 @@ def test_url2pathname_win(self): def test_url2pathname_posix(self): fn = urllib.request.url2pathname self.assertEqual(fn('/foo/bar'), '/foo/bar') - self.assertEqual(fn('//foo/bar'), '//foo/bar') + self.assertRaises(urllib.error.URLError, fn, '//foo/bar') self.assertEqual(fn('///foo/bar'), '/foo/bar') self.assertEqual(fn('////foo/bar'), '//foo/bar') - self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar') + self.assertEqual(fn('//localhost/foo/bar'), '/foo/bar') + self.assertEqual(fn('//127.0.0.1/foo/bar'), '/foo/bar') + self.assertEqual(fn(f'//{socket.gethostname()}/foo/bar'), '/foo/bar') class Utility_Tests(unittest.TestCase): """Testcase to test the various utility functions in the urllib.""" diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 18a837dd57ed59..89c06daa7d96c2 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1448,16 +1448,6 @@ def parse_http_list(s): return [part.strip() for part in res] class FileHandler(BaseHandler): - # Use local file or FTP depending on form of URL - def file_open(self, req): - url = req.selector - if url[:2] == '//' and url[2:3] != '/' and (req.host and - req.host != 'localhost'): - if not req.host in self.get_names(): - raise URLError("file:// scheme is supported only on localhost") - else: - return self.open_local_file(req) - # names for the localhost names = None def get_names(self): @@ -1474,9 +1464,8 @@ def get_names(self): def open_local_file(self, req): import email.utils import mimetypes - host = req.host - filename = req.selector - localfile = url2pathname(filename) + filename = req.full_url + localfile = url2pathname(filename.removeprefix('file:')) try: stats = os.stat(localfile) size = stats.st_size @@ -1485,24 +1474,20 @@ def open_local_file(self, req): headers = email.message_from_string( 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % (mtype or 'text/plain', size, modified)) - if host: - host, port = _splitport(host) - if not host or \ - (not port and _safe_gethostbyname(host) in self.get_names()): - if host: - origurl = 'file://' + host + filename - else: - origurl = 'file://' + filename - return addinfourl(open(localfile, 'rb'), headers, origurl) + return addinfourl(open(localfile, 'rb'), headers, filename) except OSError as exp: raise URLError(exp) - raise URLError('file not on local host') -def _safe_gethostbyname(host): + file_open = open_local_file + +def _is_local_host(host): + if not host or host == 'localhost': + return True try: - return socket.gethostbyname(host) + name = socket.gethostbyname(host) except socket.gaierror: - return None + return False + return name in FileHandler().get_names() class FTPHandler(BaseHandler): def ftp_open(self, req): @@ -1653,13 +1638,12 @@ def data_open(self, req): if os.name == 'nt': from nturl2path import url2pathname, pathname2url else: - def url2pathname(pathname): + def url2pathname(url): """OS-specific conversion from a relative URL of the 'file' scheme to a file system path; not recommended for general use.""" - if pathname[:3] == '///': - # URL has an empty authority section, so the path begins on the - # third character. - pathname = pathname[2:] + authority, pathname = urlsplit(f'file:{url}')[1:3] + if not _is_local_host(authority): + raise URLError(f'URL {url!r} uses non-local authority {authority!r}') return unquote(pathname) def pathname2url(pathname): diff --git a/Misc/NEWS.d/next/Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst b/Misc/NEWS.d/next/Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst new file mode 100644 index 00000000000000..976b5a1f3ae9cb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst @@ -0,0 +1,4 @@ +Fix issue where :func:`urllib.request.url2pathname` included any URL +authority in the resulting path, except on Windows. It now discards a local +authority, and raises :exc:`urllib.error.URLError` for a non-local +authority. From 9032105dfa6e1cb7bd64cafb4e01f9d9804149eb Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 14 Nov 2024 21:34:20 +0000 Subject: [PATCH 02/20] Fix test --- Lib/test/test_urllib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index f41e37f2b490b6..e8325827a0060a 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -529,7 +529,7 @@ def test_missing_localfile(self): def test_file_notexists(self): fd, tmp_file = tempfile.mkstemp() - tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/') + tmp_fileurl = 'file://localhost' + tmp_file.replace(os.path.sep, '/') try: self.assertTrue(os.path.exists(tmp_file)) with urlopen(tmp_fileurl) as fobj: From b91afca015c41f826078718a51a44aad6451f1ec Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 14 Nov 2024 21:41:03 +0000 Subject: [PATCH 03/20] Undo unnecessary change --- Lib/test/test_urllib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index e8325827a0060a..5fc05ca3139313 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -714,7 +714,7 @@ def constructLocalFileUrl(self, filePath): filePath.encode("utf-8") except UnicodeEncodeError: raise unittest.SkipTest("filePath is not encodable to utf8") - return "file:%s" % urllib.request.pathname2url(filePath) + return "file://%s" % urllib.request.pathname2url(filePath) def createNewTempFile(self, data=b""): """Creates a new temporary file containing the specified data, From 4b320b855025757dacd658cf553ce25bf42921ee Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 14 Nov 2024 21:55:49 +0000 Subject: [PATCH 04/20] Fix Windows tests --- Lib/test/test_urllib2.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index b90ccc2f125b93..d88af6ef2de853 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -827,14 +827,17 @@ def test_file(self): urls = [ "file://localhost%s" % urlpath, "file://%s" % urlpath, - "file://%s%s" % (socket.gethostbyname('localhost'), urlpath), ] - try: - localaddr = socket.gethostbyname(socket.gethostname()) - except socket.gaierror: - localaddr = '' - if localaddr: - urls.append("file://%s%s" % (localaddr, urlpath)) + if os.name != 'nt': + # On POSIX the local hostname may appear in a local file URL. + # On Windows this would be decoded as a UNC path. + urls.append("file://%s%s" % (socket.gethostbyname('localhost'), urlpath)) + try: + localaddr = socket.gethostbyname(socket.gethostname()) + except socket.gaierror: + localaddr = '' + if localaddr: + urls.append("file://%s%s" % (localaddr, urlpath)) for url in urls: f = open(TESTFN, "wb") From 0cc3a673fe7776630a8ccd6951a2085dd0f51137 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 14 Nov 2024 22:02:13 +0000 Subject: [PATCH 05/20] More test fixes --- Lib/test/test_urllib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 5fc05ca3139313..8b660e8f81abc4 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -529,7 +529,7 @@ def test_missing_localfile(self): def test_file_notexists(self): fd, tmp_file = tempfile.mkstemp() - tmp_fileurl = 'file://localhost' + tmp_file.replace(os.path.sep, '/') + tmp_fileurl = 'file:' + urllib.request.pathname2url(tmp_file) try: self.assertTrue(os.path.exists(tmp_file)) with urlopen(tmp_fileurl) as fobj: From ab800ab256dca8f67485154c0a701542b4c9bd24 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 19 Mar 2025 19:51:54 +0000 Subject: [PATCH 06/20] Updatez --- Doc/library/urllib.request.rst | 6 +++ Lib/urllib/request.py | 45 +++++++++---------- ...-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst | 10 +++-- 3 files changed, 34 insertions(+), 27 deletions(-) diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index 14785d21e74a11..06495cb38847b9 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -181,6 +181,12 @@ The :mod:`urllib.request` module defines the following functions: >>> url2pathname(url.removeprefix('file:')) 'C:\\Program Files' + .. versionchanged:: 3.14 + On non-Windows platforms, if a URL authority (e.g. a hostname) is + present, then it is discarded if it resolves to ``localhost``, otherwise + :exc:`~urllib.error.URLError` is raised. In previous versions the + authority is included in the returned path. + .. versionchanged:: 3.14 Windows drive letters are no longer converted to uppercase, and ``:`` characters not following a drive letter no longer cause an diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index de4cf12a2b12e9..52b8dbfd3a7fd9 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1450,29 +1450,17 @@ def parse_http_list(s): return [part.strip() for part in res] class FileHandler(BaseHandler): - # names for the localhost - names = None - def get_names(self): - if FileHandler.names is None: - try: - FileHandler.names = tuple( - socket.gethostbyname_ex('localhost')[2] + - socket.gethostbyname_ex(socket.gethostname())[2]) - except socket.gaierror: - FileHandler.names = (socket.gethostbyname('localhost'),) - return FileHandler.names - # not entirely sure what the rules are here def open_local_file(self, req): import email.utils import mimetypes - filename = req.full_url - localfile = url2pathname(filename.removeprefix('file:')) + filename = _splittype(req.full_url)[1] + localfile = url2pathname(filename) try: stats = os.stat(localfile) size = stats.st_size modified = email.utils.formatdate(stats.st_mtime, usegmt=True) - mtype = mimetypes.guess_type(filename)[0] + mtype = mimetypes.guess_file_type(localfile)[0] headers = email.message_from_string( 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % (mtype or 'text/plain', size, modified)) @@ -1483,14 +1471,25 @@ def open_local_file(self, req): file_open = open_local_file -def _is_local_host(host): - if not host or host == 'localhost': +_local_addresses = None + +def _is_local_authority(authority): + global _local_addresses + + if not authority or authority == 'localhost': return True try: - name = socket.gethostbyname(host) + address = socket.gethostbyname(authority) except socket.gaierror: return False - return name in FileHandler().get_names() + if _local_addresses is None: + try: + _local_addresses = tuple( + socket.gethostbyname_ex('localhost')[2] + + socket.gethostbyname_ex(socket.gethostname())[2]) + except socket.gaierror: + _local_addresses = (socket.gethostbyname('localhost'),) + return address in _local_addresses class FTPHandler(BaseHandler): def ftp_open(self, req): @@ -1639,12 +1638,12 @@ def url2pathname(url): """OS-specific conversion from a relative URL of the 'file' scheme to a file system path; not recommended for general use.""" authority, url = _splithost(url) - if os.name == 'nt': if authority and authority != 'localhost': + # e.g. file://server/share/file.txt url = '//' + authority + url elif url[:3] == '///': - # Skip past extra slash before UNC drive in URL path. + # e.g. file://///server/share/file.txt url = url[1:] else: if url[:1] == '/' and url[2:3] in (':', '|'): @@ -1654,8 +1653,8 @@ def url2pathname(url): # Older URLs use a pipe after a drive letter url = url[:1] + ':' + url[2:] url = url.replace('/', '\\') - elif not _is_local_host(authority): - raise URLError(f'URL {url!r} uses non-local authority {authority!r}') + elif not _is_local_authority(authority): + raise URLError("file:// scheme is supported only on localhost") encoding = sys.getfilesystemencoding() errors = sys.getfilesystemencodeerrors() return unquote(url, encoding=encoding, errors=errors) diff --git a/Misc/NEWS.d/next/Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst b/Misc/NEWS.d/next/Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst index 976b5a1f3ae9cb..b367a7adc257ea 100644 --- a/Misc/NEWS.d/next/Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst +++ b/Misc/NEWS.d/next/Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst @@ -1,4 +1,6 @@ -Fix issue where :func:`urllib.request.url2pathname` included any URL -authority in the resulting path, except on Windows. It now discards a local -authority, and raises :exc:`urllib.error.URLError` for a non-local -authority. +Fix issue where :func:`urllib.request.url2pathname` mishandled file URLs with +non-empty, non-``localhost`` authorities on non-Windows systems. Authorities +that resolve to ``localhost`` are now discarded; other authorities now cause +a :exc:`urllib.error.URLError` to be raised. Previously these authorities +were incorrectly included in the returned path. This change does not affect +Windows, where UNC paths are returned for non-local URLs. From 40d346a901952b37b8398931bdf653929de38927 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 19 Mar 2025 20:40:10 +0000 Subject: [PATCH 07/20] Fix tests #1 --- Lib/test/test_urllib2.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 088ee4c4f90803..d47984fbe7ff32 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -816,14 +816,17 @@ def test_file(self): urls = [ canonurl, parsed._replace(netloc='localhost').geturl(), - parsed._replace(netloc=socket.gethostbyname('localhost')).geturl(), ] - try: - localaddr = socket.gethostbyname(socket.gethostname()) - except socket.gaierror: - localaddr = '' - if localaddr: - urls.append(parsed._replace(netloc=localaddr).geturl()) + if os.name != 'nt': + # On POSIX the local hostname may appear in a local file URL. + # On Windows this would be decoded as a UNC path. + urls.append(parsed._replace(netloc=socket.gethostbyname('localhost')).geturl()) + try: + localaddr = socket.gethostbyname(socket.gethostname()) + except socket.gaierror: + localaddr = '' + if localaddr: + urls.append(parsed._replace(netloc=localaddr).geturl()) for url in urls: f = open(TESTFN, "wb") From 79f5d05db3ca06e65a7e912fe4fadd60b006de22 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 19 Mar 2025 21:30:06 +0000 Subject: [PATCH 08/20] Restore previous Windows behaviour --- Doc/library/pathlib.rst | 9 +++++---- Doc/library/urllib.request.rst | 9 +++++---- Lib/test/test_pathlib/test_pathlib.py | 4 +++- Lib/test/test_urllib2.py | 17 +++++++---------- Lib/urllib/request.py | 2 +- ...24-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst | 9 ++++----- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 7d3d6d1b9aebf4..95d29faa49a771 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -872,10 +872,11 @@ conforming to :rfc:`8089`. .. versionadded:: 3.13 .. versionchanged:: 3.14 - On non-Windows platforms, if a URL authority (e.g. a hostname) is - present, then it is discarded if it resolves to ``localhost``, otherwise - :exc:`ValueError` is raised. In previous versions the authority is - included in the returned path. + If a URL authority (e.g. a hostname) is present and resolves to + ``localhost``, it is discarded. If an authority is present and + *doesn't* resolve to ``localhost``, then on Windows a UNC path is + returned (as before), and on other platforms a :exc:`ValueError` is + raised. .. method:: Path.as_uri() diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index 06495cb38847b9..3b73d30a6ea6b3 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -182,10 +182,11 @@ The :mod:`urllib.request` module defines the following functions: 'C:\\Program Files' .. versionchanged:: 3.14 - On non-Windows platforms, if a URL authority (e.g. a hostname) is - present, then it is discarded if it resolves to ``localhost``, otherwise - :exc:`~urllib.error.URLError` is raised. In previous versions the - authority is included in the returned path. + If a URL authority (e.g. a hostname) is present and resolves to + ``localhost``, it is discarded. If an authority is present and + *doesn't* resolve to ``localhost``, then on Windows a UNC path is + returned (as before), and on other platforms a + :exc:`~urllib.error.URLError` is raised. .. versionchanged:: 3.14 Windows drive letters are no longer converted to uppercase, and ``:`` diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index b0fd10909bd789..3ad4b19e5d47df 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -3284,7 +3284,9 @@ def test_from_uri_posix(self): self.assertEqual(P.from_uri('file:////foo/bar'), P('//foo/bar')) self.assertEqual(P.from_uri('file://localhost/foo/bar'), P('/foo/bar')) self.assertEqual(P.from_uri('file://127.0.0.1/foo/bar'), P('/foo/bar')) - self.assertEqual(P.from_uri(f'file://{socket.gethostname()}/foo/bar'), P('/foo/bar')) + if not is_wasi: + self.assertEqual(P.from_uri(f'file://{socket.gethostname()}/foo/bar'), + P('/foo/bar')) self.assertRaises(ValueError, P.from_uri, 'foo/bar') self.assertRaises(ValueError, P.from_uri, '/foo/bar') self.assertRaises(ValueError, P.from_uri, '//foo/bar') diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index d47984fbe7ff32..088ee4c4f90803 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -816,17 +816,14 @@ def test_file(self): urls = [ canonurl, parsed._replace(netloc='localhost').geturl(), + parsed._replace(netloc=socket.gethostbyname('localhost')).geturl(), ] - if os.name != 'nt': - # On POSIX the local hostname may appear in a local file URL. - # On Windows this would be decoded as a UNC path. - urls.append(parsed._replace(netloc=socket.gethostbyname('localhost')).geturl()) - try: - localaddr = socket.gethostbyname(socket.gethostname()) - except socket.gaierror: - localaddr = '' - if localaddr: - urls.append(parsed._replace(netloc=localaddr).geturl()) + try: + localaddr = socket.gethostbyname(socket.gethostname()) + except socket.gaierror: + localaddr = '' + if localaddr: + urls.append(parsed._replace(netloc=localaddr).geturl()) for url in urls: f = open(TESTFN, "wb") diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index bef560ec2cee59..fe8aa09ba11597 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1639,7 +1639,7 @@ def url2pathname(url): to a file system path; not recommended for general use.""" authority, url = _splithost(url) if os.name == 'nt': - if authority and authority != 'localhost': + if not _is_local_authority(authority): # e.g. file://server/share/file.txt url = '//' + authority + url elif url[:3] == '///': diff --git a/Misc/NEWS.d/next/Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst b/Misc/NEWS.d/next/Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst index b367a7adc257ea..4c3c488d73ce30 100644 --- a/Misc/NEWS.d/next/Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst +++ b/Misc/NEWS.d/next/Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst @@ -1,6 +1,5 @@ Fix issue where :func:`urllib.request.url2pathname` mishandled file URLs with -non-empty, non-``localhost`` authorities on non-Windows systems. Authorities -that resolve to ``localhost`` are now discarded; other authorities now cause -a :exc:`urllib.error.URLError` to be raised. Previously these authorities -were incorrectly included in the returned path. This change does not affect -Windows, where UNC paths are returned for non-local URLs. +authorities. If an authority is present and resolves to ``localhost``, it is +now discarded. If an authority is present but *doesn't* resolve to +``localhost``, then on Windows a UNC path is returned (as before), and on +other platforms a :exc:`urllib.errors.URLError` is now raised. From bb69e493a5e943e97c98e7a590e73ba1ec1a5fa4 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 19 Mar 2025 22:10:10 +0000 Subject: [PATCH 09/20] Fix WASI, docs --- Lib/pathlib/__init__.py | 3 +-- Lib/urllib/request.py | 3 +-- .../Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 420bfa4a075a52..659a8771a2c02a 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -1274,10 +1274,9 @@ def from_uri(cls, uri): from urllib.error import URLError from urllib.request import url2pathname try: - pathname = url2pathname(uri.removeprefix('file:')) + path = cls(url2pathname(uri.removeprefix('file:'))) except URLError as exc: raise ValueError(exc.reason) from None - path = cls(pathname) if not path.is_absolute(): raise ValueError(f"URI is not absolute: {uri!r}") return path diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index fe8aa09ba11597..d4a1ef2bb042f2 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1450,7 +1450,6 @@ def parse_http_list(s): return [part.strip() for part in res] class FileHandler(BaseHandler): - # not entirely sure what the rules are here def open_local_file(self, req): import email.utils import mimetypes @@ -1480,7 +1479,7 @@ def _is_local_authority(authority): return True try: address = socket.gethostbyname(authority) - except socket.gaierror: + except (socket.gaierror, AttributeError): return False if _local_addresses is None: try: diff --git a/Misc/NEWS.d/next/Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst b/Misc/NEWS.d/next/Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst index 4c3c488d73ce30..857cc359229daa 100644 --- a/Misc/NEWS.d/next/Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst +++ b/Misc/NEWS.d/next/Library/2024-11-14-21-17-48.gh-issue-126838.Yr5vKF.rst @@ -2,4 +2,4 @@ Fix issue where :func:`urllib.request.url2pathname` mishandled file URLs with authorities. If an authority is present and resolves to ``localhost``, it is now discarded. If an authority is present but *doesn't* resolve to ``localhost``, then on Windows a UNC path is returned (as before), and on -other platforms a :exc:`urllib.errors.URLError` is now raised. +other platforms a :exc:`urllib.error.URLError` is now raised. From 7adddbf4f369dade5b99af2c0313a6e2d0319f9f Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 19 Mar 2025 22:29:08 +0000 Subject: [PATCH 10/20] wasi yousa problem? --- Lib/test/test_pathlib/test_pathlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 3ad4b19e5d47df..5346fca330e23c 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -3283,8 +3283,8 @@ def test_from_uri_posix(self): self.assertEqual(P.from_uri('file:///foo/bar'), P('/foo/bar')) self.assertEqual(P.from_uri('file:////foo/bar'), P('//foo/bar')) self.assertEqual(P.from_uri('file://localhost/foo/bar'), P('/foo/bar')) - self.assertEqual(P.from_uri('file://127.0.0.1/foo/bar'), P('/foo/bar')) if not is_wasi: + self.assertEqual(P.from_uri('file://127.0.0.1/foo/bar'), P('/foo/bar')) self.assertEqual(P.from_uri(f'file://{socket.gethostname()}/foo/bar'), P('/foo/bar')) self.assertRaises(ValueError, P.from_uri, 'foo/bar') From b5e54604ac1dc44b17f88019ee0611c3da30323a Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 20 Mar 2025 01:58:56 +0000 Subject: [PATCH 11/20] Update whatsnew --- Doc/whatsnew/3.14.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 767cf9a1f08dc2..0901b5dec34f83 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -998,7 +998,24 @@ urllib * Upgrade HTTP digest authentication algorithm for :mod:`urllib.request` by supporting SHA-256 digest authentication as specified in :rfc:`7616`. (Contributed by Calvin Bui in :gh:`128193`.) +* Improve support for ``file:`` URLs. + In :func:`urllib.request.url2pathname`: + + - Discard URL authorities that resolve to ``localhost``. + - Raise :exc:`~urllib.error.URLError` if a URL authority doesn't resolve + to ``localhost``, except on Windows where we return a UNC path. + + In :func:`urllib.request.pathname2url`: + + - Include an empty URL authority when a path begins with a slash. For + example, the path ``/etc/hosts`` is converted to the URL ``///etc/hosts``. + + On Windows, drive letters are no longer converted to uppercase, and ``:`` + characters not following a drive letter no longer cause an :exc:`OSError` + exception to be raised. + + (Contributed by Barney Gale in :gh:`125866`.) uuid ---- From 67fccd53f6221bd1f6d0a684ce5732b7b866c640 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 20 Mar 2025 02:00:17 +0000 Subject: [PATCH 12/20] Apply suggestions from code review Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/library/pathlib.rst | 2 +- Doc/library/urllib.request.rst | 2 +- Lib/urllib/request.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index f86234ce765043..6f04995c222501 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -871,7 +871,7 @@ conforming to :rfc:`8089`. .. versionadded:: 3.13 - .. versionchanged:: 3.14 + .. versionchanged:: next If a URL authority (e.g. a hostname) is present and resolves to ``localhost``, it is discarded. If an authority is present and *doesn't* resolve to ``localhost``, then on Windows a UNC path is diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index 3b73d30a6ea6b3..d475c5459316a7 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -181,7 +181,7 @@ The :mod:`urllib.request` module defines the following functions: >>> url2pathname(url.removeprefix('file:')) 'C:\\Program Files' - .. versionchanged:: 3.14 + .. versionchanged:: next If a URL authority (e.g. a hostname) is present and resolves to ``localhost``, it is discarded. If an authority is present and *doesn't* resolve to ``localhost``, then on Windows a UNC path is diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index d4a1ef2bb042f2..6a3271b6c376b6 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1463,7 +1463,7 @@ def open_local_file(self, req): headers = email.message_from_string( 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % (mtype or 'text/plain', size, modified)) - origurl = 'file:' + pathname2url(localfile) + origurl = f'file:{pathname2url(localfile)}' return addinfourl(open(localfile, 'rb'), headers, origurl) except OSError as exp: raise URLError(exp, exp.filename) From 89ddf7f3cf8d20ae21572b8bdcd7006bccdad2d7 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 20 Mar 2025 02:03:53 +0000 Subject: [PATCH 13/20] Address some review feedback --- Doc/library/pathlib.rst | 9 ++++----- Doc/library/urllib.request.rst | 9 ++++----- Doc/whatsnew/3.14.rst | 2 +- Lib/urllib/request.py | 4 ++-- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 6f04995c222501..8bc394c8e22e20 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -872,11 +872,10 @@ conforming to :rfc:`8089`. .. versionadded:: 3.13 .. versionchanged:: next - If a URL authority (e.g. a hostname) is present and resolves to - ``localhost``, it is discarded. If an authority is present and - *doesn't* resolve to ``localhost``, then on Windows a UNC path is - returned (as before), and on other platforms a :exc:`ValueError` is - raised. + If a URL authority (e.g. a hostname) is present and resolves to a local + address, it is discarded. If an authority is present and *doesn't* + resolve to a local address, then on Windows a UNC path is returned (as + before), and on other platforms a :exc:`ValueError` is raised. .. method:: Path.as_uri() diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index d475c5459316a7..38b07cab2e073f 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -182,11 +182,10 @@ The :mod:`urllib.request` module defines the following functions: 'C:\\Program Files' .. versionchanged:: next - If a URL authority (e.g. a hostname) is present and resolves to - ``localhost``, it is discarded. If an authority is present and - *doesn't* resolve to ``localhost``, then on Windows a UNC path is - returned (as before), and on other platforms a - :exc:`~urllib.error.URLError` is raised. + If a URL authority (e.g. a hostname) is present and resolves to a local + address, it is discarded. If an authority is present and *doesn't* + resolve to a local address, then on Windows a UNC path is returned (as + before), and on other platforms :exc:`~urllib.error.URLError` is raised. .. versionchanged:: 3.14 Windows drive letters are no longer converted to uppercase, and ``:`` diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 0901b5dec34f83..b9e040c94911f4 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1002,7 +1002,7 @@ urllib In :func:`urllib.request.url2pathname`: - - Discard URL authorities that resolve to ``localhost``. + - Discard URL authorities that resolve to a local address. - Raise :exc:`~urllib.error.URLError` if a URL authority doesn't resolve to ``localhost``, except on Windows where we return a UNC path. diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 6a3271b6c376b6..d3a300c9769c42 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1483,11 +1483,11 @@ def _is_local_authority(authority): return False if _local_addresses is None: try: - _local_addresses = tuple( + _local_addresses = frozenset( socket.gethostbyname_ex('localhost')[2] + socket.gethostbyname_ex(socket.gethostname())[2]) except socket.gaierror: - _local_addresses = (socket.gethostbyname('localhost'),) + _local_addresses = frozenset(socket.gethostbyname('localhost'),) return address in _local_addresses class FTPHandler(BaseHandler): From d327f34f7afc7578c8da912e74373e6541bb1832 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 29 Mar 2025 19:51:39 +0000 Subject: [PATCH 14/20] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/library/pathlib.rst | 1 + Doc/whatsnew/3.14.rst | 2 ++ 2 files changed, 3 insertions(+) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 8bc394c8e22e20..b82986902861b2 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -877,6 +877,7 @@ conforming to :rfc:`8089`. resolve to a local address, then on Windows a UNC path is returned (as before), and on other platforms a :exc:`ValueError` is raised. + .. method:: Path.as_uri() Represent the path as a 'file' URI. :exc:`ValueError` is raised if diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b9e040c94911f4..6dd9f94d3f280b 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -998,6 +998,7 @@ urllib * Upgrade HTTP digest authentication algorithm for :mod:`urllib.request` by supporting SHA-256 digest authentication as specified in :rfc:`7616`. (Contributed by Calvin Bui in :gh:`128193`.) + * Improve support for ``file:`` URLs. In :func:`urllib.request.url2pathname`: @@ -1017,6 +1018,7 @@ urllib (Contributed by Barney Gale in :gh:`125866`.) + uuid ---- From 313a68f23109d0b4fa9f3423a916583917f6ab1a Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 29 Mar 2025 19:52:51 +0000 Subject: [PATCH 15/20] Swap .. versionchanged:: order --- Doc/library/urllib.request.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index 38b07cab2e073f..dd2b9e11f758fc 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -158,16 +158,16 @@ The :mod:`urllib.request` module defines the following functions: >>> 'file:' + pathname2url(path) 'file:///C:/Program%20Files' - .. versionchanged:: 3.14 - Paths beginning with a slash are converted to URLs with authority - sections. For example, the path ``/etc/hosts`` is converted to - the URL ``///etc/hosts``. - .. versionchanged:: 3.14 Windows drive letters are no longer converted to uppercase, and ``:`` characters not following a drive letter no longer cause an :exc:`OSError` exception to be raised on Windows. + .. versionchanged:: 3.14 + Paths beginning with a slash are converted to URLs with authority + sections. For example, the path ``/etc/hosts`` is converted to + the URL ``///etc/hosts``. + .. function:: url2pathname(url) @@ -181,17 +181,17 @@ The :mod:`urllib.request` module defines the following functions: >>> url2pathname(url.removeprefix('file:')) 'C:\\Program Files' + .. versionchanged:: 3.14 + Windows drive letters are no longer converted to uppercase, and ``:`` + characters not following a drive letter no longer cause an + :exc:`OSError` exception to be raised on Windows. + .. versionchanged:: next If a URL authority (e.g. a hostname) is present and resolves to a local address, it is discarded. If an authority is present and *doesn't* resolve to a local address, then on Windows a UNC path is returned (as before), and on other platforms :exc:`~urllib.error.URLError` is raised. - .. versionchanged:: 3.14 - Windows drive letters are no longer converted to uppercase, and ``:`` - characters not following a drive letter no longer cause an - :exc:`OSError` exception to be raised on Windows. - .. function:: getproxies() From e1f6d10db818d6fa8c25defc3df5bef78eff986c Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 29 Mar 2025 19:55:07 +0000 Subject: [PATCH 16/20] Add test cases involving ports --- Lib/test/test_urllib.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index c3062f9e86eef7..ab51bc0f723f32 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1533,6 +1533,10 @@ def test_url2pathname_posix(self): fn = urllib.request.url2pathname self.assertEqual(fn('/foo/bar'), '/foo/bar') self.assertRaises(urllib.error.URLError, fn, '//foo/bar') + self.assertRaises(urllib.error.URLError, fn, '//localhost:/foo/bar') + self.assertRaises(urllib.error.URLError, fn, '//:80/foo/bar') + self.assertRaises(urllib.error.URLError, fn, '//:/foo/bar') + self.assertRaises(urllib.error.URLError, fn, '//c:80/foo/bar') self.assertEqual(fn('///foo/bar'), '/foo/bar') self.assertEqual(fn('////foo/bar'), '//foo/bar') self.assertEqual(fn('//localhost/foo/bar'), '/foo/bar') From c5f5e24715a6eae6cbbc8dec9873e43c45debc37 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 29 Mar 2025 20:07:58 +0000 Subject: [PATCH 17/20] Add test methods for common posix/windows results --- Lib/test/test_urllib.py | 46 +++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index ab51bc0f723f32..c4e533abaec089 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1425,6 +1425,21 @@ def test_quoting(self): "url2pathname() failed; %s != %s" % (expect, result)) + def test_pathname2url(self): + # Test cases common to Windows and POSIX. + fn = urllib.request.pathname2url + sep = os.path.sep + self.assertEqual(fn(''), '') + self.assertEqual(fn(f'{sep}'), '///') + self.assertEqual(fn(f'{sep}{sep}'), '////') + self.assertEqual(fn('a'), 'a') + self.assertEqual(fn(f'a{sep}b.c'), 'a/b.c') + self.assertEqual(fn(f'{sep}a{sep}b.c'), '///a/b.c') + self.assertEqual(fn(f'{sep}{sep}a{sep}b.c'), '////a/b.c') + self.assertEqual(fn(f'{sep}{sep}{sep}a{sep}b.c'), '/////a/b.c') + self.assertEqual(fn(f'{sep}{sep}{sep}{sep}a{sep}b.c'), '//////a/b.c') + self.assertEqual(fn(f'{sep}a{sep}b%#c'), '///a/b%25%23c') + @unittest.skipUnless(sys.platform == 'win32', 'test specific to Windows pathnames.') def test_pathname2url_win(self): @@ -1463,17 +1478,6 @@ def test_pathname2url_win(self): for url in urls: self.assertEqual(fn(urllib.request.url2pathname(url)), url) - @unittest.skipIf(sys.platform == 'win32', - 'test specific to POSIX pathnames') - def test_pathname2url_posix(self): - fn = urllib.request.pathname2url - self.assertEqual(fn('/'), '///') - self.assertEqual(fn('/a/b.c'), '///a/b.c') - self.assertEqual(fn('//a/b.c'), '////a/b.c') - self.assertEqual(fn('///a/b.c'), '/////a/b.c') - self.assertEqual(fn('////a/b.c'), '//////a/b.c') - self.assertEqual(fn('/a/b%#c'), '///a/b%25%23c') - @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII') def test_pathname2url_nonascii(self): encoding = sys.getfilesystemencoding() @@ -1481,11 +1485,25 @@ def test_pathname2url_nonascii(self): url = urllib.parse.quote(os_helper.FS_NONASCII, encoding=encoding, errors=errors) self.assertEqual(urllib.request.pathname2url(os_helper.FS_NONASCII), url) + def test_url2pathname(self): + # Test cases common to Windows and POSIX. + fn = urllib.request.url2pathname + sep = os.path.sep + self.assertEqual(fn(''), '') + self.assertEqual(fn('/'), f'{sep}') + self.assertEqual(fn('///'), f'{sep}') + self.assertEqual(fn('////'), f'{sep}{sep}') + self.assertEqual(fn('foo'), 'foo') + self.assertEqual(fn('foo/bar'), f'foo{sep}bar') + self.assertEqual(fn('/foo/bar'), f'{sep}foo{sep}bar') + self.assertEqual(fn('//localhost/foo/bar'), f'{sep}foo{sep}bar') + self.assertEqual(fn('///foo/bar'), f'{sep}foo{sep}bar') + self.assertEqual(fn('////foo/bar'), f'{sep}{sep}foo{sep}bar') + @unittest.skipUnless(sys.platform == 'win32', 'test specific to Windows pathnames.') def test_url2pathname_win(self): fn = urllib.request.url2pathname - self.assertEqual(fn('/'), '\\') self.assertEqual(fn('/C:/'), 'C:\\') self.assertEqual(fn("///C|"), 'C:') self.assertEqual(fn("///C:"), 'C:') @@ -1531,15 +1549,11 @@ def test_url2pathname_win(self): 'test specific to POSIX pathnames') def test_url2pathname_posix(self): fn = urllib.request.url2pathname - self.assertEqual(fn('/foo/bar'), '/foo/bar') self.assertRaises(urllib.error.URLError, fn, '//foo/bar') self.assertRaises(urllib.error.URLError, fn, '//localhost:/foo/bar') self.assertRaises(urllib.error.URLError, fn, '//:80/foo/bar') self.assertRaises(urllib.error.URLError, fn, '//:/foo/bar') self.assertRaises(urllib.error.URLError, fn, '//c:80/foo/bar') - self.assertEqual(fn('///foo/bar'), '/foo/bar') - self.assertEqual(fn('////foo/bar'), '//foo/bar') - self.assertEqual(fn('//localhost/foo/bar'), '/foo/bar') self.assertEqual(fn('//127.0.0.1/foo/bar'), '/foo/bar') self.assertEqual(fn(f'//{socket.gethostname()}/foo/bar'), '/foo/bar') From 25521c1775e9f64d0da2ad21cde73e496903d919 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 29 Mar 2025 20:19:30 +0000 Subject: [PATCH 18/20] Windows test fixes --- Lib/test/test_urllib.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index c4e533abaec089..ecf429e17811a4 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1430,14 +1430,10 @@ def test_pathname2url(self): fn = urllib.request.pathname2url sep = os.path.sep self.assertEqual(fn(''), '') - self.assertEqual(fn(f'{sep}'), '///') - self.assertEqual(fn(f'{sep}{sep}'), '////') + self.assertEqual(fn(sep), '///') self.assertEqual(fn('a'), 'a') self.assertEqual(fn(f'a{sep}b.c'), 'a/b.c') self.assertEqual(fn(f'{sep}a{sep}b.c'), '///a/b.c') - self.assertEqual(fn(f'{sep}{sep}a{sep}b.c'), '////a/b.c') - self.assertEqual(fn(f'{sep}{sep}{sep}a{sep}b.c'), '/////a/b.c') - self.assertEqual(fn(f'{sep}{sep}{sep}{sep}a{sep}b.c'), '//////a/b.c') self.assertEqual(fn(f'{sep}a{sep}b%#c'), '///a/b%25%23c') @unittest.skipUnless(sys.platform == 'win32', @@ -1478,6 +1474,14 @@ def test_pathname2url_win(self): for url in urls: self.assertEqual(fn(urllib.request.url2pathname(url)), url) + @unittest.skipIf(sys.platform == 'win32', + 'test specific to POSIX pathnames') + def test_pathname2url_posix(self): + fn = urllib.request.pathname2url + self.assertEqual(fn('//a/b.c'), '////a/b.c') + self.assertEqual(fn('///a/b.c'), '/////a/b.c') + self.assertEqual(fn('////a/b.c'), '//////a/b.c') + @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII') def test_pathname2url_nonascii(self): encoding = sys.getfilesystemencoding() From fa7745691ff9f650e7d6ed07588cf85ea38a1570 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 29 Mar 2025 20:24:14 +0000 Subject: [PATCH 19/20] Restore FileHandler.names and get_names() --- Lib/urllib/request.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index d3a300c9769c42..84c075ec8b359f 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1450,6 +1450,19 @@ def parse_http_list(s): return [part.strip() for part in res] class FileHandler(BaseHandler): + # names for the localhost + names = None + def get_names(self): + if FileHandler.names is None: + try: + FileHandler.names = tuple( + socket.gethostbyname_ex('localhost')[2] + + socket.gethostbyname_ex(socket.gethostname())[2]) + except socket.gaierror: + FileHandler.names = (socket.gethostbyname('localhost'),) + return FileHandler.names + + # not entirely sure what the rules are here def open_local_file(self, req): import email.utils import mimetypes @@ -1470,25 +1483,14 @@ def open_local_file(self, req): file_open = open_local_file -_local_addresses = None - def _is_local_authority(authority): - global _local_addresses - if not authority or authority == 'localhost': return True try: address = socket.gethostbyname(authority) except (socket.gaierror, AttributeError): return False - if _local_addresses is None: - try: - _local_addresses = frozenset( - socket.gethostbyname_ex('localhost')[2] + - socket.gethostbyname_ex(socket.gethostname())[2]) - except socket.gaierror: - _local_addresses = frozenset(socket.gethostbyname('localhost'),) - return address in _local_addresses + return address in FileHandler().get_names() class FTPHandler(BaseHandler): def ftp_open(self, req): From 3a506418f9aa6b3f05e5dafc11325709596d5ecb Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 10 Apr 2025 04:10:23 +0100 Subject: [PATCH 20/20] Docs improvements --- Doc/library/urllib.request.rst | 9 +++++---- Doc/whatsnew/3.14.rst | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index dd2b9e11f758fc..bc9e90de9e1918 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -187,10 +187,11 @@ The :mod:`urllib.request` module defines the following functions: :exc:`OSError` exception to be raised on Windows. .. versionchanged:: next - If a URL authority (e.g. a hostname) is present and resolves to a local - address, it is discarded. If an authority is present and *doesn't* - resolve to a local address, then on Windows a UNC path is returned (as - before), and on other platforms :exc:`~urllib.error.URLError` is raised. + This function calls :func:`socket.gethostbyname` if the URL authority + isn't empty or ``localhost``. If the authority resolves to a local IP + address then it is discarded; otherwise, on Windows a UNC path is + returned (as before), and on other platforms a + :exc:`~urllib.error.URLError` is raised. .. function:: getproxies() diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 6dd9f94d3f280b..ab7477ce468686 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -999,11 +999,11 @@ urllib supporting SHA-256 digest authentication as specified in :rfc:`7616`. (Contributed by Calvin Bui in :gh:`128193`.) -* Improve support for ``file:`` URLs. +* Improve standards compliance when parsing and emitting ``file:`` URLs. In :func:`urllib.request.url2pathname`: - - Discard URL authorities that resolve to a local address. + - Discard URL authorities that resolve to a local IP address. - Raise :exc:`~urllib.error.URLError` if a URL authority doesn't resolve to ``localhost``, except on Windows where we return a UNC path.