From e13ed9cbb63ffef79245d0ab3aa9b4f27bef2307 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 19 Jan 2025 00:45:43 +0000 Subject: [PATCH 1/2] GH-128520: Make `pathlib._abc.WritablePath` a sibling of `ReadablePath` In the private pathlib ABCs, support write-only virtual filesystems by making `WritablePath` inherit directly from `JoinablePath`, rather than subclassing `ReadablePath`. There are two complications: - `ReadablePath.open()` applies to both reading and writing - `ReadablePath.copy` is secretly an object that supports the *read* side of copying, whereas `WritablePath.copy` is a different kind of object supporting the *write* side We untangle these as follow: - A new `pathlib._abc.magic_open()` function replaces the `open()` method, which is dropped from the ABCs but remains in `pathlib.Path`. The function works like `io.open()`, but additionally accepts objects with `__open_rb__()` or `__open_wb__()` methods as appropriate for the mode. These new dunders are made abstract methods of `ReadablePath` and `WritablePath` respectively. If the pathlib ABCs are made public, we could consider blessing an "openable" protocol and supporting it in `io.open()`, removing the need for `pathlib._abc.magic_open()`. - `ReadablePath.copy` becomes a true method, whereas `WritablePath.copy` is deleted. A new `ReadablePath._copy_reader` property provides a `CopyReader` object, and similarly `WritablePath._copy_writer` is a `CopyWriter` object. Once GH-125413 is resolved, we'll be able to move the `CopyReader` functionality into `ReadablePath.info` and eliminate `ReadablePath._copy_reader`. --- Lib/pathlib/_abc.py | 134 ++++++++++++++-------- Lib/pathlib/_local.py | 54 +++++++-- Lib/test/test_pathlib/test_pathlib.py | 11 +- Lib/test/test_pathlib/test_pathlib_abc.py | 88 ++++++-------- 4 files changed, 175 insertions(+), 112 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 38bc660e0aeb30..26b9f1d5935d0d 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -12,6 +12,7 @@ """ import functools +import io import operator import posixpath from errno import EINVAL @@ -41,6 +42,40 @@ def _explode_path(path): return path, names +def magic_open(path, mode='r', buffering=-1, encoding=None, errors=None, + newline=None): + """ + Open the file pointed to by this path and return a file object, as + the built-in open() function does. + """ + try: + return io.open(path, mode, buffering, encoding, errors, newline) + except TypeError: + pass + cls = type(path) + text = 'b' not in mode + mode = ''.join(sorted(c for c in mode if c not in 'bt')) + if text: + try: + attr = getattr(cls, f'__open_{mode}__') + except AttributeError: + pass + else: + return attr(path, buffering, encoding, errors, newline) + + try: + attr = getattr(cls, f'__open_{mode}b__') + except AttributeError: + pass + else: + stream = attr(path, buffering) + if text: + stream = io.TextIOWrapper(stream, encoding, errors, newline) + return stream + + raise TypeError(f"{cls.__name__} can't be opened with mode {mode!r}") + + class PathGlobber(_GlobberBase): """ Class providing shell-style globbing for path objects. @@ -58,35 +93,15 @@ def concat_path(path, text): class CopyReader: """ - Class that implements copying between path objects. An instance of this - class is available from the ReadablePath.copy property; it's made callable - so that ReadablePath.copy() can be treated as a method. - - The target path's CopyWriter drives the process from its _create() method. - Files and directories are exchanged by calling methods on the source and - target paths, and metadata is exchanged by calling - source.copy._read_metadata() and target.copy._write_metadata(). + Class that implements the "read" part of copying between path objects. + An instance of this class is available from the ReadablePath._copy_reader + property. """ __slots__ = ('_path',) def __init__(self, path): self._path = path - def __call__(self, target, follow_symlinks=True, dirs_exist_ok=False, - preserve_metadata=False): - """ - Recursively copy this file or directory tree to the given destination. - """ - if not isinstance(target, ReadablePath): - target = self._path.with_segments(target) - - # Delegate to the target path's CopyWriter object. - try: - create = target.copy._create - except AttributeError: - raise TypeError(f"Target is not writable: {target}") from None - return create(self._path, follow_symlinks, dirs_exist_ok, preserve_metadata) - _readable_metakeys = frozenset() def _read_metadata(self, metakeys, *, follow_symlinks=True): @@ -96,8 +111,16 @@ def _read_metadata(self, metakeys, *, follow_symlinks=True): raise NotImplementedError -class CopyWriter(CopyReader): - __slots__ = () +class CopyWriter: + """ + Class that implements the "write" part of copying between path objects. An + instance of this class is available from the WritablePath._copy_writer + property. + """ + __slots__ = ('_path',) + + def __init__(self, path): + self._path = path _writable_metakeys = frozenset() @@ -110,7 +133,7 @@ def _write_metadata(self, metadata, *, follow_symlinks=True): def _create(self, source, follow_symlinks, dirs_exist_ok, preserve_metadata): self._ensure_distinct_path(source) if preserve_metadata: - metakeys = self._writable_metakeys & source.copy._readable_metakeys + metakeys = self._writable_metakeys & source._copy_reader._readable_metakeys else: metakeys = None if not follow_symlinks and source.is_symlink(): @@ -128,22 +151,22 @@ def _create_dir(self, source, metakeys, follow_symlinks, dirs_exist_ok): for src in children: dst = self._path.joinpath(src.name) if not follow_symlinks and src.is_symlink(): - dst.copy._create_symlink(src, metakeys) + dst._copy_writer._create_symlink(src, metakeys) elif src.is_dir(): - dst.copy._create_dir(src, metakeys, follow_symlinks, dirs_exist_ok) + dst._copy_writer._create_dir(src, metakeys, follow_symlinks, dirs_exist_ok) else: - dst.copy._create_file(src, metakeys) + dst._copy_writer._create_file(src, metakeys) if metakeys: - metadata = source.copy._read_metadata(metakeys) + metadata = source._copy_reader._read_metadata(metakeys) if metadata: self._write_metadata(metadata) def _create_file(self, source, metakeys): """Copy the given file to our path.""" self._ensure_different_file(source) - with source.open('rb') as source_f: + with magic_open(source, 'rb') as source_f: try: - with self._path.open('wb') as target_f: + with magic_open(self._path, 'wb') as target_f: copyfileobj(source_f, target_f) except IsADirectoryError as e: if not self._path.exists(): @@ -152,7 +175,7 @@ def _create_file(self, source, metakeys): f'Directory does not exist: {self._path}') from e raise if metakeys: - metadata = source.copy._read_metadata(metakeys) + metadata = source._copy_reader._read_metadata(metakeys) if metadata: self._write_metadata(metadata) @@ -160,7 +183,7 @@ def _create_symlink(self, source, metakeys): """Copy the given symbolic link to our path.""" self._path.symlink_to(source.readlink()) if metakeys: - metadata = source.copy._read_metadata(metakeys, follow_symlinks=False) + metadata = source._copy_reader._read_metadata(metakeys, follow_symlinks=False) if metadata: self._write_metadata(metadata, follow_symlinks=False) @@ -420,11 +443,10 @@ def is_symlink(self): """ raise NotImplementedError - def open(self, mode='r', buffering=-1, encoding=None, - errors=None, newline=None): + def __open_rb__(self, buffering=-1): """ - Open the file pointed to by this path and return a file object, as - the built-in open() function does. + Open the file pointed to by this path for reading in binary mode and + return a file object, like open(mode='rb'). """ raise NotImplementedError @@ -432,14 +454,14 @@ def read_bytes(self): """ Open the file in bytes mode, read it, and close the file. """ - with self.open(mode='rb', buffering=0) as f: + with magic_open(self, mode='rb', buffering=0) as f: return f.read() def read_text(self, encoding=None, errors=None, newline=None): """ Open the file in text mode, read it, and close the file. """ - with self.open(mode='r', encoding=encoding, errors=errors, newline=newline) as f: + with magic_open(self, mode='r', encoding=encoding, errors=errors, newline=newline) as f: return f.read() def _scandir(self): @@ -532,7 +554,22 @@ def readlink(self): """ raise NotImplementedError - copy = property(CopyReader, doc=CopyReader.__call__.__doc__) + _copy_reader = property(CopyReader) + + def copy(self, target, follow_symlinks=True, dirs_exist_ok=False, + preserve_metadata=False): + """ + Recursively copy this file or directory tree to the given destination. + """ + if not isinstance(target, ReadablePath): + target = self.with_segments(target) + + # Delegate to the target path's CopyWriter object. + try: + create = target._copy_writer._create + except AttributeError: + raise TypeError(f"Target is not writable: {target}") from None + return create(self, follow_symlinks, dirs_exist_ok, preserve_metadata) def copy_into(self, target_dir, *, follow_symlinks=True, dirs_exist_ok=False, preserve_metadata=False): @@ -551,7 +588,7 @@ def copy_into(self, target_dir, *, follow_symlinks=True, preserve_metadata=preserve_metadata) -class WritablePath(ReadablePath): +class WritablePath(JoinablePath): __slots__ = () def symlink_to(self, target, target_is_directory=False): @@ -567,13 +604,20 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): """ raise NotImplementedError + def __open_wb__(self, buffering=-1): + """ + Open the file pointed to by this path for writing in binary mode and + return a file object, like open(mode='wb'). + """ + raise NotImplementedError + def write_bytes(self, data): """ Open the file in bytes mode, write to it, and close the file. """ # type-check for the buffer interface before truncating the file view = memoryview(data) - with self.open(mode='wb') as f: + with magic_open(self, mode='wb') as f: return f.write(view) def write_text(self, data, encoding=None, errors=None, newline=None): @@ -583,7 +627,7 @@ def write_text(self, data, encoding=None, errors=None, newline=None): if not isinstance(data, str): raise TypeError('data must be str, not %s' % data.__class__.__name__) - with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: + with magic_open(self, mode='w', encoding=encoding, errors=errors, newline=newline) as f: return f.write(data) - copy = property(CopyWriter, doc=CopyWriter.__call__.__doc__) + _copy_writer = property(CopyWriter) diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index d6afb31424265c..5e8e6af7ad3970 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -20,7 +20,7 @@ grp = None from pathlib._os import copyfile -from pathlib._abc import CopyWriter, JoinablePath, WritablePath +from pathlib._abc import CopyReader, CopyWriter, JoinablePath, ReadablePath, WritablePath __all__ = [ @@ -65,9 +65,10 @@ def __repr__(self): return "<{}.parents>".format(type(self._path).__name__) -class _LocalCopyWriter(CopyWriter): - """This object implements the Path.copy callable. Don't try to construct - it yourself.""" +class _LocalCopyReader(CopyReader): + """This object implements the "read" part of copying local paths. Don't + try to construct it yourself. + """ __slots__ = () _readable_metakeys = {'mode', 'times_ns'} @@ -75,7 +76,7 @@ class _LocalCopyWriter(CopyWriter): _readable_metakeys.add('flags') if hasattr(os, 'listxattr'): _readable_metakeys.add('xattrs') - _readable_metakeys = _writable_metakeys = frozenset(_readable_metakeys) + _readable_metakeys = frozenset(_readable_metakeys) def _read_metadata(self, metakeys, *, follow_symlinks=True): metadata = {} @@ -97,6 +98,15 @@ def _read_metadata(self, metakeys, *, follow_symlinks=True): raise return metadata + +class _LocalCopyWriter(CopyWriter): + """This object implements the "write" part of copying local paths. Don't + try to construct it yourself. + """ + __slots__ = () + + _writable_metakeys = _LocalCopyReader._readable_metakeys + def _write_metadata(self, metadata, *, follow_symlinks=True): def _nop(*args, ns=None, follow_symlinks=None): pass @@ -171,7 +181,7 @@ def _create_symlink(self, source, metakeys): """Copy the given symlink to the given target.""" self._path.symlink_to(source.readlink(), source.is_dir()) if metakeys: - metadata = source.copy._read_metadata(metakeys, follow_symlinks=False) + metadata = source._copy_reader._read_metadata(metakeys, follow_symlinks=False) if metadata: self._write_metadata(metadata, follow_symlinks=False) @@ -683,7 +693,7 @@ class PureWindowsPath(PurePath): __slots__ = () -class Path(WritablePath, PurePath): +class Path(WritablePath, ReadablePath, PurePath): """PurePath subclass that can make system calls. Path represents a filesystem path but unlike PurePath, also offers @@ -823,6 +833,13 @@ def open(self, mode='r', buffering=-1, encoding=None, encoding = io.text_encoding(encoding) return io.open(self, mode, buffering, encoding, errors, newline) + def read_bytes(self): + """ + Open the file in bytes mode, read it, and close the file. + """ + with self.open(mode='rb', buffering=0) as f: + return f.read() + def read_text(self, encoding=None, errors=None, newline=None): """ Open the file in text mode, read it, and close the file. @@ -830,7 +847,17 @@ def read_text(self, encoding=None, errors=None, newline=None): # Call io.text_encoding() here to ensure any warning is raised at an # appropriate stack level. encoding = io.text_encoding(encoding) - return super().read_text(encoding, errors, newline) + with self.open(mode='r', encoding=encoding, errors=errors, newline=newline) as f: + return f.read() + + def write_bytes(self, data): + """ + Open the file in bytes mode, write to it, and close the file. + """ + # type-check for the buffer interface before truncating the file + view = memoryview(data) + with self.open(mode='wb') as f: + return f.write(view) def write_text(self, data, encoding=None, errors=None, newline=None): """ @@ -839,7 +866,11 @@ def write_text(self, data, encoding=None, errors=None, newline=None): # Call io.text_encoding() here to ensure any warning is raised at an # appropriate stack level. encoding = io.text_encoding(encoding) - return super().write_text(data, encoding, errors, newline) + if not isinstance(data, str): + raise TypeError('data must be str, not %s' % + data.__class__.__name__) + with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: + return f.write(data) _remove_leading_dot = operator.itemgetter(slice(2, None)) _remove_trailing_slash = operator.itemgetter(slice(-1)) @@ -1122,7 +1153,8 @@ def replace(self, target): os.replace(self, target) return self.with_segments(target) - copy = property(_LocalCopyWriter, doc=_LocalCopyWriter.__call__.__doc__) + _copy_reader = property(_LocalCopyReader) + _copy_writer = property(_LocalCopyWriter) def move(self, target): """ @@ -1136,7 +1168,7 @@ def move(self, target): else: if not isinstance(target, WritablePath): target = self.with_segments(target_str) - target.copy._ensure_different_file(self) + target._copy_writer._ensure_different_file(self) try: os.replace(self, target_str) return target diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index ad5a9f9c8de9d6..866a2d07dd692a 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -924,7 +924,7 @@ class cls(pathlib.PurePath): # Tests for the concrete classes. # -class PathTest(test_pathlib_abc.DummyWritablePathTest, PurePathTest): +class PathTest(test_pathlib_abc.DummyRWPathTest, PurePathTest): """Tests for the FS-accessing functionalities of the Path classes.""" cls = pathlib.Path can_symlink = os_helper.can_symlink() @@ -1102,6 +1102,15 @@ def with_segments(self, *pathsegments): for dirpath, dirnames, filenames in p.walk(): self.assertEqual(42, dirpath.session_id) + def test_open_common(self): + p = self.cls(self.base) + with (p / 'fileA').open('r') as f: + self.assertIsInstance(f, io.TextIOBase) + self.assertEqual(f.read(), "this is file A\n") + with (p / 'fileA').open('rb') as f: + self.assertIsInstance(f, io.BufferedIOBase) + self.assertEqual(f.read().strip(), b"this is file A") + def test_open_unbuffered(self): p = self.cls(self.base) with (p / 'fileA').open('rb', buffering=0) as f: diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 6ba012e0208a53..d60bb147b72971 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -4,7 +4,7 @@ import errno import unittest -from pathlib._abc import JoinablePath, ReadablePath, WritablePath +from pathlib._abc import JoinablePath, ReadablePath, WritablePath, magic_open from pathlib._types import Parser import posixpath @@ -918,7 +918,7 @@ def test_with_suffix_invalid(self): class DummyWritablePathIO(io.BytesIO): """ - Used by DummyWritablePath to implement `open('w')` + Used by DummyWritablePath to implement `__open_wb__()` """ def __init__(self, files, path): @@ -931,38 +931,16 @@ def close(self): super().close() -class DummyReadablePath(ReadablePath): +class DummyReadablePath(ReadablePath, DummyJoinablePath): """ Simple implementation of DummyReadablePath that keeps files and directories in memory. """ - __slots__ = ('_segments') + __slots__ = () _files = {} _directories = {} - def __init__(self, *segments): - self._segments = segments - - def __str__(self): - if self._segments: - return self.parser.join(*self._segments) - return '' - - def __eq__(self, other): - if not isinstance(other, DummyReadablePath): - return NotImplemented - return str(self) == str(other) - - def __hash__(self): - return hash(str(self)) - - def __repr__(self): - return "{}({!r})".format(self.__class__.__name__, str(self)) - - def with_segments(self, *pathsegments): - return type(self)(*pathsegments) - def exists(self, *, follow_symlinks=True): return self.is_dir() or self.is_file() @@ -975,33 +953,13 @@ def is_file(self, *, follow_symlinks=True): def is_symlink(self): return False - def open(self, mode='r', buffering=-1, encoding=None, - errors=None, newline=None): - if buffering != -1 and not (buffering == 0 and 'b' in mode): - raise NotImplementedError + def __open_rb__(self, buffering=-1): path = str(self) if path in self._directories: raise IsADirectoryError(errno.EISDIR, "Is a directory", path) - - text = 'b' not in mode - mode = ''.join(c for c in mode if c not in 'btU') - if mode == 'r': - if path not in self._files: - raise FileNotFoundError(errno.ENOENT, "File not found", path) - stream = io.BytesIO(self._files[path]) - elif mode == 'w': - # FIXME: move to DummyWritablePath - parent, name = posixpath.split(path) - if parent not in self._directories: - raise FileNotFoundError(errno.ENOENT, "File not found", parent) - stream = DummyWritablePathIO(self._files, path) - self._files[path] = b'' - self._directories[parent].add(name) - else: - raise NotImplementedError - if text: - stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors, newline=newline) - return stream + elif path not in self._files: + raise FileNotFoundError(errno.ENOENT, "File not found", path) + return io.BytesIO(self._files[path]) def iterdir(self): path = str(self).rstrip('/') @@ -1013,9 +971,20 @@ def iterdir(self): raise FileNotFoundError(errno.ENOENT, "File not found", path) -class DummyWritablePath(DummyReadablePath, WritablePath): +class DummyWritablePath(WritablePath, DummyJoinablePath): __slots__ = () + def __open_wb__(self, buffering=-1): + path = str(self) + if path in self._directories: + raise IsADirectoryError(errno.EISDIR, "Is a directory", path) + parent, name = posixpath.split(path) + if parent not in self._directories: + raise FileNotFoundError(errno.ENOENT, "File not found", parent) + self._files[path] = b'' + self._directories[parent].add(name) + return DummyWritablePathIO(self._files, path) + def mkdir(self, mode=0o777, parents=False, exist_ok=False): path = str(self) parent = str(self.parent) @@ -1121,12 +1090,12 @@ def test_exists(self): self.assertIs(False, P(self.base + '\udfff').exists()) self.assertIs(False, P(self.base + '\x00').exists()) - def test_open_common(self): + def test_magic_open(self): p = self.cls(self.base) - with (p / 'fileA').open('r') as f: + with magic_open(p / 'fileA', 'r') as f: self.assertIsInstance(f, io.TextIOBase) self.assertEqual(f.read(), "this is file A\n") - with (p / 'fileA').open('rb') as f: + with magic_open(p / 'fileA', 'rb') as f: self.assertIsInstance(f, io.BufferedIOBase) self.assertEqual(f.read().strip(), b"this is file A") @@ -1359,9 +1328,18 @@ def test_is_symlink(self): self.assertIs((P / 'linkA\x00').is_file(), False) -class DummyWritablePathTest(DummyReadablePathTest): +class DummyWritablePathTest(DummyJoinablePathTest): cls = DummyWritablePath + +class DummyRWPath(DummyWritablePath, DummyReadablePath): + __slots__ = () + + +class DummyRWPathTest(DummyWritablePathTest, DummyReadablePathTest): + cls = DummyRWPath + can_symlink = False + def test_read_write_bytes(self): p = self.cls(self.base) (p / 'fileA').write_bytes(b'abcdefg') From 5934ba1b3d88b120675b35ee0e6af6279bebaf0e Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 19 Jan 2025 03:02:04 +0000 Subject: [PATCH 2/2] Duck-type the _copy_writer --- Lib/pathlib/_abc.py | 4 ++-- Lib/pathlib/_local.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 26b9f1d5935d0d..d55cc6f243cf2b 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -561,7 +561,7 @@ def copy(self, target, follow_symlinks=True, dirs_exist_ok=False, """ Recursively copy this file or directory tree to the given destination. """ - if not isinstance(target, ReadablePath): + if not hasattr(target, '_copy_writer'): target = self.with_segments(target) # Delegate to the target path's CopyWriter object. @@ -579,7 +579,7 @@ def copy_into(self, target_dir, *, follow_symlinks=True, name = self.name if not name: raise ValueError(f"{self!r} has an empty name") - elif isinstance(target_dir, ReadablePath): + elif hasattr(target_dir, '_copy_writer'): target = target_dir / name else: target = self.with_segments(target_dir, name) diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 5e8e6af7ad3970..2b42f3c22254b8 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -1166,7 +1166,7 @@ def move(self, target): except TypeError: pass else: - if not isinstance(target, WritablePath): + if not hasattr(target, '_copy_writer'): target = self.with_segments(target_str) target._copy_writer._ensure_different_file(self) try: @@ -1187,7 +1187,7 @@ def move_into(self, target_dir): name = self.name if not name: raise ValueError(f"{self!r} has an empty name") - elif isinstance(target_dir, WritablePath): + elif hasattr(target_dir, '_copy_writer'): target = target_dir / name else: target = self.with_segments(target_dir, name)