Skip to content
Draft
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions Doc/library/importlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -416,12 +416,16 @@ ABC hierarchy::
Loaders that have a file-like storage back-end
that allows storing arbitrary data
can implement this abstract method to give direct access
to the data stored. :exc:`OSError` is to be raised if the *path* cannot
be found. The *path* is expected to be constructed using a module's
:attr:`__file__` attribute or an item from a package's :attr:`__path__`.
to the data stored.

An :exc:`OSError` is to be raised if the *path* cannot be found, and
a :exc:`ValueError` is raised if the *path* cannot be handled (e.g.,
the *path* contains null characters or the *path* is too long). The
*path* is expected to be constructed using a module's :attr:`__file__`
attribute or an item from a package's :attr:`__path__`.

.. versionchanged:: 3.4
Raises :exc:`OSError` instead of :exc:`NotImplementedError`.
Raise :exc:`OSError` by default instead of :exc:`NotImplementedError`.


.. class:: InspectLoader
Expand Down Expand Up @@ -551,6 +555,9 @@ ABC hierarchy::

Reads *path* as a binary file and returns the bytes from it.

An :exc:`OSError` is to be raised if the *path* cannot be found, and
a :exc:`ValueError` is raised if the *path* is invalid (e.g., *path*
contains null characters or is too long).

.. class:: SourceLoader

Expand Down Expand Up @@ -582,12 +589,13 @@ ABC hierarchy::
- ``'size'`` (optional): the size in bytes of the source code.

Any other keys in the dictionary are ignored, to allow for future
extensions. If the path cannot be handled, :exc:`OSError` is raised.
extensions. If the path cannot be handled, raises an :exc:`OSError`
or a :exc:`ValueError` depending on the reason.

.. versionadded:: 3.3

.. versionchanged:: 3.4
Raise :exc:`OSError` instead of :exc:`NotImplementedError`.
Raise :exc:`OSError` by default instead of :exc:`NotImplementedError`.

.. method:: path_mtime(path)

Expand All @@ -597,20 +605,21 @@ ABC hierarchy::
.. deprecated:: 3.3
This method is deprecated in favour of :meth:`path_stats`. You don't
have to implement it, but it is still available for compatibility
purposes. Raise :exc:`OSError` if the path cannot be handled.
purposes. If the path cannot be handled, raises an :exc:`OSError`
or a :exc:`ValueError` depending on the reason.

.. versionchanged:: 3.4
Raise :exc:`OSError` instead of :exc:`NotImplementedError`.
Raise :exc:`OSError` by default instead of :exc:`NotImplementedError`.

.. method:: set_data(path, data)

Optional abstract method which writes the specified bytes to a file
path. Any intermediate directories which do not exist are to be created
automatically.

When writing to the path fails because the path is read-only
(:const:`errno.EACCES`/:exc:`PermissionError`), do not propagate the
exception.
When writing to the path fails by raising an :class:`OSError` (e.g.,
the path is read-only (:const:`errno.EACCES`/:exc:`PermissionError`)
or a :class:`ValueError`, the exception is not propagated.

.. versionchanged:: 3.4
No longer raises :exc:`NotImplementedError` when called.
Expand Down
36 changes: 22 additions & 14 deletions Lib/importlib/_bootstrap_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ def _path_stat(path):
Made a separate function to make it easier to override in experiments
(e.g. cache stat results).

This function may raise OSError or ValueError.
"""
return _os.stat(path)

Expand All @@ -156,7 +157,7 @@ def _path_is_mode_type(path, mode):
"""Test whether the path is the specified mode type."""
try:
stat_info = _path_stat(path)
except OSError:
except (OSError, ValueError):
return False
return (stat_info.st_mode & 0o170000) == mode

Expand Down Expand Up @@ -211,7 +212,7 @@ def _write_atomic(path, data, mode=0o666):
with _io.FileIO(fd, 'wb') as file:
file.write(data)
_os.replace(path_tmp, path)
except OSError:
except:
try:
_os.unlink(path_tmp)
except OSError:
Expand Down Expand Up @@ -654,7 +655,7 @@ def _calc_mode(path):
"""Calculate the mode permissions for a bytecode file."""
try:
mode = _path_stat(path).st_mode
except OSError:
except (OSError, ValueError):
mode = 0o666
# We always ensure write access so we can update cached files
# later even when the source files are read-only on Windows (#6074)
Expand Down Expand Up @@ -990,7 +991,7 @@ def find_spec(cls, fullname, path=None, target=None):
return None
try:
_path_stat(filepath)
except OSError:
except (OSError, ValueError):
return None
for loader, suffixes in _get_supported_file_loaders():
if filepath.endswith(tuple(suffixes)):
Expand Down Expand Up @@ -1036,7 +1037,7 @@ def path_mtime(self, path):
"""Optional method that returns the modification time (an int) for the
specified path (a str).

Raises OSError when the path cannot be handled.
Raises OSError or ValueError when the path cannot be handled.
"""
raise OSError

Expand All @@ -1050,7 +1051,7 @@ def path_stats(self, path):
- 'size' (optional) is the size in bytes of the source code.

Implementing this method allows the loader to read bytecode files.
Raises OSError when the path cannot be handled.
Raises OSError or ValueError when the path cannot be handled.
"""
return {'mtime': self.path_mtime(path)}

Expand All @@ -1076,7 +1077,7 @@ def get_source(self, fullname):
path = self.get_filename(fullname)
try:
source_bytes = self.get_data(path)
except OSError as exc:
except (OSError, ValueError) as exc:
raise ImportError('source not available through get_data()',
name=fullname) from exc
return decode_source(source_bytes)
Expand Down Expand Up @@ -1109,13 +1110,13 @@ def get_code(self, fullname):
else:
try:
st = self.path_stats(source_path)
except OSError:
except (OSError, ValueError):
pass
else:
source_mtime = int(st['mtime'])
try:
data = self.get_data(bytecode_path)
except OSError:
except (OSError, ValueError):
pass
else:
exc_details = {
Expand Down Expand Up @@ -1211,7 +1212,10 @@ def get_filename(self, fullname):
return self.path

def get_data(self, path):
"""Return the data from path as raw bytes."""
"""Return the data from path as raw bytes.

This may raise an OSError or a ValueError if the path is invalid.
"""
if isinstance(self, (SourceLoader, ExtensionFileLoader)):
with _io.open_code(str(path)) as file:
return file.read()
Expand Down Expand Up @@ -1255,19 +1259,20 @@ def set_data(self, path, data, *, _mode=0o666):
except FileExistsError:
# Probably another Python process already created the dir.
continue
except OSError as exc:
# Could be a permission error, read-only filesystem: just forget
# about writing the data.
except (OSError, ValueError) as exc:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be made narrower rather than broader (i.e. only ignoring PermissionError)

# Could be a permission error, read-only filesystem, or
# an invalid path name: just forget about writing the data.
_bootstrap._verbose_message('could not create {!r}: {!r}',
parent, exc)
return
try:
_write_atomic(path, data, _mode)
_bootstrap._verbose_message('created {!r}', path)
except OSError as exc:
# Same as above: just don't write the bytecode.
_bootstrap._verbose_message('could not create {!r}: {!r}', path,
exc)
else:
_bootstrap._verbose_message('created {!r}', path)


class SourcelessFileLoader(FileLoader, _LoaderBasics):
Expand Down Expand Up @@ -1631,6 +1636,9 @@ def find_spec(self, fullname, target=None):
mtime = _path_stat(self.path or _os.getcwd()).st_mtime
except OSError:
mtime = -1
except ValueError:
# Invalid paths will never be usable even if mtime = -1.
return None
if mtime != self._path_mtime:
self._fill_cache()
self._path_mtime = mtime
Expand Down
27 changes: 22 additions & 5 deletions Lib/test/test_importlib/import_/test_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,30 @@ def test_path_importer_cache_empty_string(self):
self.check_found(found, importer)
self.assertIn(os.getcwd(), sys.path_importer_cache)

def test_None_on_sys_path(self):
# Putting None in sys.path[0] caused an import regression from Python
# 3.2: http://bugs.python.org/issue16514
def test_invalid_names_in_sys_path(self):
for name, desc in [
# Putting None in sys.path[0] caused an import regression from
# Python 3.2: http://bugs.python.org/issue16514
(None, 'None in sys.path[0]'),
# embedded NUL characters raise ValueError in os.stat()
('\x00', 'NUL bytes path'),
(f'Top{os.sep}Mid\x00', 'path with embedded NUL bytes'),
# A filename with surrogate codes. A UnicodeEncodeError is raised
# by os.stat() upon querying, which is a subclass of ValueError.
("\uD834\uDD1E", 'surrogate codes (MUSICAL SYMBOL G CLEF)'),
# For POSIX platforms, an OSError will be raised but for Windows
# platforms, a ValueError is raised due to the path_t converter.
# See: https://github.com/python/cpython/issues/122353
('a' * 1_000_000, 'very long path'),
]:
with self.subTest(desc):
self._test_invalid_name_in_sys_path(name)

def _test_invalid_name_in_sys_path(self, name):
new_path = sys.path[:]
new_path.insert(0, None)
new_path.insert(0, name)
new_path_importer_cache = sys.path_importer_cache.copy()
new_path_importer_cache.pop(None, None)
new_path_importer_cache.pop(name, None)
new_path_hooks = [zipimport.zipimporter,
self.machinery.FileFinder.path_hook(
*self.importlib._bootstrap_external._get_supported_file_loaders())]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Handle :exc:`ValueError`\s raised by OS-related functions during import if
``sys.path`` contains invalid path names. Patch by Bénédikt Tran.