Skip to content

Commit bb1f1e4

Browse files
authored
WIP: Patch open, os and os.path builtins (#322)
* WIP * Working implementation * more WIP * Implement glob * WIP docs and tests * tests and docs * tests, docs, compatibility * copilot review * update history * code review changes * pin pytest-rerunfailures
1 parent 1200550 commit bb1f1e4

File tree

14 files changed

+1823
-9
lines changed

14 files changed

+1823
-9
lines changed

HISTORY.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# cloudpathlib Changelog
22

3-
## UNRELEASED
3+
## v0.22.0 (2025-08-29)
44

55
- Fixed issue with GS credentials, using default auth enables a wider set of authentication methods in GS (Issue [#390](https://github.com/drivendataorg/cloudpathlib/issues/390), PR [#514](https://github.com/drivendataorg/cloudpathlib/pull/514), thanks @ljyanesm)
66
- Added support for http(s) urls with `HttpClient`, `HttpPath`, `HttpsClient`, and `HttpsPath`. (Issue [#455](https://github.com/drivendataorg/cloudpathlib/issues/455), PR [#468](https://github.com/drivendataorg/cloudpathlib/pull/468))
7+
- Added experimental support for patching the builtins `open`, `os`, `os.path`, and `glob` to work with `CloudPath` objects. It is off by default; see the new "Compatibility" section in the docs for more information. (Issue [#128](https://github.com/drivendataorg/cloudpathlib/issues/128), PR [#322](https://github.com/drivendataorg/cloudpathlib/pull/322))
8+
- Added support for `CloudPath(*parts)` to create a `CloudPath` object from a list of parts (to match `pathlib.Path`). **This is a potentially breaking change for users that relied on the second arg being the `client` instead of making it an explicit kwarg.** (PR [#322](https://github.com/drivendataorg/cloudpathlib/pull/322))
79

810
## v0.21.1 (2025-05-14)
911

cloudpathlib/__init__.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
import os
12
import sys
23

34
from .anypath import AnyPath
45
from .azure.azblobclient import AzureBlobClient
56
from .azure.azblobpath import AzureBlobPath
67
from .cloudpath import CloudPath, implementation_registry
8+
from .patches import patch_open, patch_os_functions, patch_glob, patch_all_builtins
79
from .gs.gsclient import GSClient
810
from .gs.gspath import GSPath
911
from .http.httpclient import HttpClient, HttpsClient
@@ -33,6 +35,23 @@
3335
"HttpsClient",
3436
"HttpPath",
3537
"HttpsPath",
38+
"patch_open",
39+
"patch_glob",
40+
"patch_os_functions",
41+
"patch_all_builtins",
3642
"S3Client",
3743
"S3Path",
3844
]
45+
46+
47+
if bool(os.environ.get("CLOUDPATHLIB_PATCH_OPEN", "")):
48+
patch_open()
49+
50+
if bool(os.environ.get("CLOUDPATHLIB_PATCH_OS", "")):
51+
patch_os_functions()
52+
53+
if bool(os.environ.get("CLOUDPATHLIB_PATCH_GLOB", "")):
54+
patch_glob()
55+
56+
if bool(os.environ.get("CLOUDPATHLIB_PATCH_ALL", "")):
57+
patch_all_builtins()

cloudpathlib/client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,8 @@ def set_as_default_client(self) -> None:
109109
instances for this cloud without a client specified."""
110110
self.__class__._default_client = self
111111

112-
def CloudPath(self, cloud_path: Union[str, BoundedCloudPath]) -> BoundedCloudPath:
113-
return self._cloud_meta.path_class(cloud_path=cloud_path, client=self) # type: ignore
112+
def CloudPath(self, cloud_path: Union[str, BoundedCloudPath], *parts: str) -> BoundedCloudPath:
113+
return self._cloud_meta.path_class(cloud_path, *parts, client=self) # type: ignore
114114

115115
def clear_cache(self):
116116
"""Clears the contents of the cache folder.

cloudpathlib/cloudpath.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ def _make_selector(pattern_parts, _flavour, case_sensitive=True): # noqa: F811
8181
from .exceptions import (
8282
ClientMismatchError,
8383
CloudPathFileExistsError,
84+
CloudPathFileNotFoundError,
8485
CloudPathIsADirectoryError,
8586
CloudPathNotADirectoryError,
8687
CloudPathNotExistsError,
@@ -235,13 +236,21 @@ class CloudPath(metaclass=CloudPathMeta):
235236
def __init__(
236237
self,
237238
cloud_path: Union[str, Self, "CloudPath"],
239+
*parts: str,
238240
client: Optional["Client"] = None,
239241
) -> None:
240242
# handle if local file gets opened. must be set at the top of the method in case any code
241243
# below raises an exception, this prevents __del__ from raising an AttributeError
242244
self._handle: Optional[IO] = None
243245
self._client: Optional["Client"] = None
244246

247+
if parts:
248+
# ensure first part ends in "/"; (sometimes it is just prefix, sometimes a longer path)
249+
if not str(cloud_path).endswith("/"):
250+
cloud_path = str(cloud_path) + "/"
251+
252+
cloud_path = str(cloud_path) + "/".join(p.strip("/") for p in parts)
253+
245254
self.is_valid_cloudpath(cloud_path, raise_on_error=True)
246255
self._cloud_meta.validate_completeness()
247256

@@ -673,11 +682,18 @@ def open(
673682
force_overwrite_to_cloud: Optional[bool] = None, # extra kwarg not in pathlib
674683
) -> "IO[Any]":
675684
# if trying to call open on a directory that exists
676-
if self.exists() and not self.is_file():
685+
exists_on_cloud = self.exists()
686+
687+
if exists_on_cloud and not self.is_file():
677688
raise CloudPathIsADirectoryError(
678689
f"Cannot open directory, only files. Tried to open ({self})"
679690
)
680691

692+
if not exists_on_cloud and any(m in mode for m in ("r", "a")):
693+
raise CloudPathFileNotFoundError(
694+
f"File opened for read or append, but it does not exist on cloud: {self}"
695+
)
696+
681697
if mode == "x" and self.exists():
682698
raise CloudPathFileExistsError(f"Cannot open existing file ({self}) for creation.")
683699

@@ -1247,7 +1263,7 @@ def _local(self) -> Path:
12471263
"""Cached local version of the file."""
12481264
return self.client._local_cache_dir / self._no_prefix
12491265

1250-
def _new_cloudpath(self, path: Union[str, os.PathLike]) -> Self:
1266+
def _new_cloudpath(self, path: Union[str, os.PathLike], *parts: str) -> Self:
12511267
"""Use the scheme, client, cache dir of this cloudpath to instantiate
12521268
a new cloudpath of the same type with the path passed.
12531269
@@ -1263,7 +1279,7 @@ def _new_cloudpath(self, path: Union[str, os.PathLike]) -> Self:
12631279
if not path.startswith(self.anchor):
12641280
path = f"{self.anchor}{path}"
12651281

1266-
return self.client.CloudPath(path)
1282+
return self.client.CloudPath(path, *parts)
12671283

12681284
def _refresh_cache(self, force_overwrite_from_cloud: Optional[bool] = None) -> None:
12691285
try:

cloudpathlib/exceptions.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ class CloudPathNotExistsError(CloudPathException):
2424
pass
2525

2626

27+
class CloudPathFileNotFoundError(CloudPathException, FileNotFoundError):
28+
pass
29+
30+
2731
class CloudPathIsADirectoryError(CloudPathException, IsADirectoryError):
2832
pass
2933

@@ -77,3 +81,7 @@ class OverwriteNewerCloudError(CloudPathException):
7781

7882
class OverwriteNewerLocalError(CloudPathException):
7983
pass
84+
85+
86+
class InvalidGlobArgumentsError(CloudPathException):
87+
pass

cloudpathlib/http/httpclient.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,12 @@ def _get_metadata(self, cloud_path: HttpPath) -> dict:
7979
"content_type": response.headers.get("Content-Type", None),
8080
}
8181

82+
def _is_file_or_dir(self, cloud_path: HttpPath) -> Optional[str]:
83+
if self.dir_matcher(cloud_path.as_url()):
84+
return "dir"
85+
else:
86+
return "file"
87+
8288
def _download_file(self, cloud_path: HttpPath, local_path: Union[str, os.PathLike]) -> Path:
8389
local_path = Path(local_path)
8490
with self.opener.open(cloud_path.as_url()) as response:

cloudpathlib/http/httppath.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,10 @@ class HttpPath(CloudPath):
2121
def __init__(
2222
self,
2323
cloud_path: Union[str, "HttpPath"],
24+
*parts: str,
2425
client: Optional["HttpClient"] = None,
2526
) -> None:
26-
super().__init__(cloud_path, client)
27+
super().__init__(cloud_path, *parts, client=client)
2728

2829
self._path = (
2930
PurePosixPath(self._url.path)

cloudpathlib/local/localclient.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,14 @@ def _is_file(self, cloud_path: "LocalPath", follow_symlinks=True) -> bool:
118118

119119
return self._cloud_path_to_local(cloud_path).is_file(**kwargs)
120120

121+
def _is_file_or_dir(self, cloud_path: "LocalPath") -> Optional[str]:
122+
if self._is_dir(cloud_path):
123+
return "dir"
124+
elif self._is_file(cloud_path):
125+
return "file"
126+
else:
127+
raise FileNotFoundError(f"Path could not be identified as file or dir: {cloud_path}")
128+
121129
def _list_dir(
122130
self, cloud_path: "LocalPath", recursive=False
123131
) -> Iterable[Tuple["LocalPath", bool]]:

0 commit comments

Comments
 (0)