-
-
Notifications
You must be signed in to change notification settings - Fork 33.2k
GH-125413: pathlib: use scandir() to speed up copy()
#126263
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
9b223ca
fc40a79
e688be8
6f5ebb8
09aff17
00abf8a
3db0e18
c4bd4a8
69eb3c8
7dd35b4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -87,6 +87,110 @@ def isabs(self, path): | |
| raise UnsupportedOperation(self._unsupported_msg('isabs()')) | ||
|
|
||
|
|
||
| class CopierBase: | ||
| """Base class for path copiers, which transfer files and directories from | ||
| one path object to another. | ||
|
|
||
| A reference to this class is available as PathBase._copier. When | ||
| PathBase.copy() is called, it uses the copier type of the *target* path to | ||
| perform the copy; this allows writing of data and metadata to occur | ||
| together (or in a particular order) where supported or required by the | ||
| path type. | ||
| """ | ||
| __slots__ = ('follow_symlinks', 'dirs_exist_ok', 'preserve_metadata') | ||
|
|
||
| def __init__(self, follow_symlinks=True, dirs_exist_ok=False, | ||
| preserve_metadata=False): | ||
| self.follow_symlinks = follow_symlinks | ||
| self.dirs_exist_ok = dirs_exist_ok | ||
| self.preserve_metadata = preserve_metadata | ||
|
|
||
| @classmethod | ||
| def ensure_different_files(cls, source, target): | ||
| """Raise OSError(EINVAL) if both paths refer to the same file.""" | ||
| try: | ||
| if not target.samefile(source): | ||
| return | ||
| except (OSError, ValueError): | ||
| return | ||
| err = OSError(EINVAL, "Source and target are the same file") | ||
| err.filename = str(source) | ||
| err.filename2 = str(target) | ||
| raise err | ||
|
|
||
| @classmethod | ||
| def ensure_distinct_paths(cls, source, target): | ||
| """Raise OSError(EINVAL) if the target is within the source path.""" | ||
| # Note: there is no straightforward, foolproof algorithm to determine | ||
| # if one directory is within another (a particularly perverse example | ||
| # would be a single network share mounted in one location via NFS, and | ||
| # in another location via CIFS), so we simply checks whether the | ||
| # other path is lexically equal to, or within, this path. | ||
| if source == target: | ||
| err = OSError(EINVAL, "Source and target are the same path") | ||
| elif source in target.parents: | ||
| err = OSError(EINVAL, "Source path is a parent of target path") | ||
| else: | ||
| return | ||
| err.filename = str(source) | ||
| err.filename2 = str(target) | ||
| raise err | ||
|
|
||
| def copy(self, source, target): | ||
| """Copy the given file or directory tree to the given target.""" | ||
| self.ensure_distinct_paths(source, target) | ||
| if self.preserve_metadata: | ||
| metadata_keys = source._readable_metadata & target._writable_metadata | ||
| else: | ||
| metadata_keys = frozenset() | ||
| if not self.follow_symlinks and source.is_symlink(): | ||
| self.copy_symlink(source, target, metadata_keys) | ||
| elif source.is_dir(): | ||
| self.copy_dir(source, target, metadata_keys) | ||
| else: | ||
| self.copy_file(source, target, metadata_keys) | ||
|
|
||
| def copy_dir(self, source, target, metadata_keys, dir_entry=None): | ||
| """Copy the given directory to the given target.""" | ||
| metadata = source._read_metadata(metadata_keys, dir_entry=dir_entry) | ||
| with source.scandir() as entries: | ||
| target.mkdir(exist_ok=self.dirs_exist_ok) | ||
| for entry in entries: | ||
| src = source.joinpath(entry.name) | ||
| dst = target.joinpath(entry.name) | ||
| if not self.follow_symlinks and entry.is_symlink(): | ||
| self.copy_symlink(src, dst, metadata_keys, entry) | ||
| elif entry.is_dir(): | ||
| self.copy_dir(src, dst, metadata_keys, entry) | ||
| else: | ||
| self.copy_file(src, dst, metadata_keys, entry) | ||
| target._write_metadata(metadata) | ||
|
|
||
| def copy_file(self, source, target, metadata_keys, dir_entry=None): | ||
| """Copy the given file to the given target.""" | ||
| self.ensure_different_files(source, target) | ||
| metadata = source._read_metadata(metadata_keys, dir_entry=dir_entry) | ||
| with source.open('rb') as source_f: | ||
| try: | ||
| with target.open('wb') as target_f: | ||
| copyfileobj(source_f, target_f) | ||
| except IsADirectoryError as e: | ||
| if not target.exists(): | ||
| # Raise a less confusing exception. | ||
| raise FileNotFoundError( | ||
| f'Directory does not exist: {target}') from e | ||
| else: | ||
| raise | ||
barneygale marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| target._write_metadata(metadata) | ||
|
|
||
| def copy_symlink(self, source, target, metadata_keys, dir_entry=None): | ||
| """Copy the given symlink to the given target.""" | ||
| metadata = source._read_metadata( | ||
| metadata_keys, follow_symlinks=False, dir_entry=dir_entry) | ||
| target.symlink_to(source.readlink()) | ||
| target._write_metadata(metadata, follow_symlinks=False) | ||
|
|
||
|
|
||
| class PathGlobber(_GlobberBase): | ||
| """ | ||
| Class providing shell-style globbing for path objects. | ||
|
|
@@ -413,6 +517,9 @@ class PathBase(PurePathBase): | |
|
|
||
| # Maximum number of symlinks to follow in resolve() | ||
| _max_symlinks = 40 | ||
| _copier = CopierBase | ||
| _readable_metadata = frozenset() | ||
| _writable_metadata = frozenset() | ||
|
|
||
| @classmethod | ||
| def _unsupported_msg(cls, attribute): | ||
|
|
@@ -553,39 +660,6 @@ def samefile(self, other_path): | |
| return (st.st_ino == other_st.st_ino and | ||
| st.st_dev == other_st.st_dev) | ||
|
|
||
| def _ensure_different_file(self, other_path): | ||
| """ | ||
| Raise OSError(EINVAL) if both paths refer to the same file. | ||
| """ | ||
| try: | ||
| if not self.samefile(other_path): | ||
| return | ||
| except (OSError, ValueError): | ||
| return | ||
| err = OSError(EINVAL, "Source and target are the same file") | ||
| err.filename = str(self) | ||
| err.filename2 = str(other_path) | ||
| raise err | ||
|
|
||
| def _ensure_distinct_path(self, other_path): | ||
| """ | ||
| Raise OSError(EINVAL) if the other path is within this path. | ||
| """ | ||
| # Note: there is no straightforward, foolproof algorithm to determine | ||
| # if one directory is within another (a particularly perverse example | ||
| # would be a single network share mounted in one location via NFS, and | ||
| # in another location via CIFS), so we simply checks whether the | ||
| # other path is lexically equal to, or within, this path. | ||
| if self == other_path: | ||
| err = OSError(EINVAL, "Source and target are the same path") | ||
| elif self in other_path.parents: | ||
| err = OSError(EINVAL, "Source path is a parent of target path") | ||
| else: | ||
| return | ||
| err.filename = str(self) | ||
| err.filename2 = str(other_path) | ||
| raise err | ||
|
|
||
| def open(self, mode='r', buffering=-1, encoding=None, | ||
| errors=None, newline=None): | ||
| """ | ||
|
|
@@ -795,13 +869,6 @@ def symlink_to(self, target, target_is_directory=False): | |
| """ | ||
| raise UnsupportedOperation(self._unsupported_msg('symlink_to()')) | ||
|
|
||
| def _symlink_to_target_of(self, link): | ||
| """ | ||
| Make this path a symlink with the same target as the given link. This | ||
| is used by copy(). | ||
| """ | ||
| self.symlink_to(link.readlink()) | ||
|
|
||
| def hardlink_to(self, target): | ||
| """ | ||
| Make this path a hard link pointing to the same file as *target*. | ||
|
|
@@ -822,74 +889,31 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): | |
| """ | ||
| raise UnsupportedOperation(self._unsupported_msg('mkdir()')) | ||
|
|
||
| # Metadata keys supported by this path type. | ||
| _readable_metadata = _writable_metadata = frozenset() | ||
|
|
||
| def _read_metadata(self, keys=None, *, follow_symlinks=True): | ||
| def _read_metadata(self, metadata_keys, *, follow_symlinks=True, dir_entry=None): | ||
| """ | ||
| Returns path metadata as a dict with string keys. | ||
| """ | ||
| if not metadata_keys: | ||
| return {} | ||
| raise UnsupportedOperation(self._unsupported_msg('_read_metadata()')) | ||
|
|
||
| def _write_metadata(self, metadata, *, follow_symlinks=True): | ||
| """ | ||
| Sets path metadata from the given dict with string keys. | ||
| """ | ||
| if not metadata: | ||
| return | ||
| raise UnsupportedOperation(self._unsupported_msg('_write_metadata()')) | ||
|
|
||
| def _copy_metadata(self, target, *, follow_symlinks=True): | ||
| """ | ||
| Copies metadata (permissions, timestamps, etc) from this path to target. | ||
| """ | ||
| # Metadata types supported by both source and target. | ||
| keys = self._readable_metadata & target._writable_metadata | ||
| if keys: | ||
| metadata = self._read_metadata(keys, follow_symlinks=follow_symlinks) | ||
| target._write_metadata(metadata, follow_symlinks=follow_symlinks) | ||
|
|
||
| def _copy_file(self, target): | ||
| """ | ||
| Copy the contents of this file to the given target. | ||
| """ | ||
| self._ensure_different_file(target) | ||
| with self.open('rb') as source_f: | ||
| try: | ||
| with target.open('wb') as target_f: | ||
| copyfileobj(source_f, target_f) | ||
| except IsADirectoryError as e: | ||
| if not target.exists(): | ||
| # Raise a less confusing exception. | ||
| raise FileNotFoundError( | ||
| f'Directory does not exist: {target}') from e | ||
| else: | ||
| raise | ||
|
|
||
| def copy(self, target, *, follow_symlinks=True, dirs_exist_ok=False, | ||
| preserve_metadata=False): | ||
| """ | ||
| Recursively copy this file or directory tree to the given destination. | ||
| """ | ||
| if not isinstance(target, PathBase): | ||
| target = self.with_segments(target) | ||
| self._ensure_distinct_path(target) | ||
| stack = [(self, target)] | ||
| while stack: | ||
| src, dst = stack.pop() | ||
| if not follow_symlinks and src.is_symlink(): | ||
| dst._symlink_to_target_of(src) | ||
| if preserve_metadata: | ||
| src._copy_metadata(dst, follow_symlinks=False) | ||
| elif src.is_dir(): | ||
| children = src.iterdir() | ||
| dst.mkdir(exist_ok=dirs_exist_ok) | ||
| stack.extend((child, dst.joinpath(child.name)) | ||
| for child in children) | ||
| if preserve_metadata: | ||
| src._copy_metadata(dst) | ||
| else: | ||
| src._copy_file(dst) | ||
| if preserve_metadata: | ||
| src._copy_metadata(dst) | ||
| copier = target._copier(follow_symlinks, dirs_exist_ok, preserve_metadata) | ||
| copier.copy(self, target) | ||
| return target | ||
|
|
||
| def copy_into(self, target_dir, *, follow_symlinks=True, | ||
|
|
@@ -936,7 +960,7 @@ def move(self, target): | |
| """ | ||
| Recursively move this file or directory tree to the given destination. | ||
| """ | ||
| self._ensure_different_file(target) | ||
| target._copier.ensure_different_files(self, target) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is wrong - There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed, or a |
||
| try: | ||
| return self.replace(target) | ||
| except UnsupportedOperation: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,7 +19,7 @@ | |
|
|
||
| from pathlib._os import (copyfile, file_metadata_keys, read_file_metadata, | ||
| write_file_metadata) | ||
| from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase | ||
| from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase, CopierBase | ||
|
|
||
|
|
||
| __all__ = [ | ||
|
|
@@ -57,6 +57,33 @@ def __repr__(self): | |
| return "<{}.parents>".format(type(self._path).__name__) | ||
|
|
||
|
|
||
| class _Copier(CopierBase): | ||
| """Copier class that uses fast OS copy routine where possible, and ensures | ||
| symlinks' target_is_directory argument is properly set on Windows. | ||
| """ | ||
| __slots__ = () | ||
|
|
||
| if copyfile: | ||
| def copy_file(self, source, target, metadata_keys, dir_entry=None): | ||
| """Copy the given file to the given target.""" | ||
| try: | ||
| source = os.fspath(source) | ||
| except TypeError: | ||
| if not isinstance(source, PathBase): | ||
| raise | ||
| CopierBase.copy_file(self, source, target, metadata_keys, dir_entry) | ||
picnixz marked this conversation as resolved.
Show resolved
Hide resolved
Comment on lines
+69
to
+74
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we permit types inheriting from |
||
| else: | ||
| copyfile(source, os.fspath(target)) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given the above exception handling for |
||
|
|
||
| if os.name == 'nt': | ||
| def copy_symlink(self, source, target, metadata_keys, dir_entry=None): | ||
| """Copy the given symlink to the given target.""" | ||
| metadata = source._read_metadata( | ||
| metadata_keys, follow_symlinks=False, dir_entry=dir_entry) | ||
| target.symlink_to(source.readlink(), (dir_entry or source).is_dir()) | ||
| target._write_metadata(metadata, follow_symlinks=False) | ||
|
|
||
|
|
||
| class PurePath(PurePathBase): | ||
| """Base class for manipulating paths without I/O. | ||
|
|
||
|
|
@@ -512,6 +539,11 @@ class Path(PathBase, PurePath): | |
| but cannot instantiate a WindowsPath on a POSIX system or vice versa. | ||
| """ | ||
| __slots__ = () | ||
| _copier = _Copier | ||
| _readable_metadata = file_metadata_keys | ||
| _writable_metadata = file_metadata_keys | ||
| _read_metadata = read_file_metadata | ||
| _write_metadata = write_file_metadata | ||
| as_uri = PurePath.as_uri | ||
|
|
||
| @classmethod | ||
|
|
@@ -789,24 +821,6 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): | |
| if not exist_ok or not self.is_dir(): | ||
| raise | ||
|
|
||
| _readable_metadata = _writable_metadata = file_metadata_keys | ||
| _read_metadata = read_file_metadata | ||
| _write_metadata = write_file_metadata | ||
|
|
||
| if copyfile: | ||
| def _copy_file(self, target): | ||
| """ | ||
| Copy the contents of this file to the given target. | ||
| """ | ||
| try: | ||
| target = os.fspath(target) | ||
| except TypeError: | ||
| if not isinstance(target, PathBase): | ||
| raise | ||
| PathBase._copy_file(self, target) | ||
| else: | ||
| copyfile(os.fspath(self), target) | ||
|
|
||
| def chmod(self, mode, *, follow_symlinks=True): | ||
| """ | ||
| Change the permissions of the path, like os.chmod(). | ||
|
|
@@ -869,14 +883,6 @@ def symlink_to(self, target, target_is_directory=False): | |
| """ | ||
| os.symlink(target, self, target_is_directory) | ||
|
|
||
| if os.name == 'nt': | ||
| def _symlink_to_target_of(self, link): | ||
| """ | ||
| Make this path a symlink with the same target as the given link. | ||
| This is used by copy(). | ||
| """ | ||
| self.symlink_to(link.readlink(), link.is_dir()) | ||
|
|
||
| if hasattr(os, "link"): | ||
| def hardlink_to(self, target): | ||
| """ | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.