Skip to content

Commit 8207b3d

Browse files
pjbullkujenga
andauthored
use variable for client schemes, allowing override (drivendataorg#467) (drivendataorg#471)
* use variable for client schemes, allowing override This change is intended to make the default client implementations more flexible so that their scheme can be customized. This can be useful in scenarios where a subclass wants to implement a custom scheme on e.g. a S3 compatible API [1] but with a custom scheme so that the default S3 access is still also available. [1] https://cloudpathlib.drivendata.org/stable/authentication/#accessing-custom-s3-compatible-object-stores The tests have been updated to include a new s3-like rig which uses the new scheme override functionality. * use single cloud_prefix * tests: switch to lighter-weight custom scheme tests * update HISTORY file for custom scheme support * update custom scheme tests to utilize pytest fixture This isolates the implementation in response to PR feedback: drivendataorg#467 (comment) Co-authored-by: Aaron Taylor <[email protected]>
1 parent b776bee commit 8207b3d

File tree

5 files changed

+90
-15
lines changed

5 files changed

+90
-15
lines changed

HISTORY.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# cloudpathlib Changelog
22

3+
## UNRELEASED
4+
5+
- Added support for custom schemes in CloudPath and Client subclases. (Issue [#466](https://github.com/drivendataorg/cloudpathlib/issues/466), PR [#467](https://github.com/drivendataorg/cloudpathlib/pull/467))
6+
37
## v0.19.0 (2024-08-29)
48

59
- Fixed an error that occurred when loading and dumping `CloudPath` objects using pickle multiple times. (Issue [#450](https://github.com/drivendataorg/cloudpathlib/issues/450), PR [#454](https://github.com/drivendataorg/cloudpathlib/pull/454), thanks to [@kujenga](https://github.com/kujenga))

cloudpathlib/azure/azblobclient.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -276,12 +276,14 @@ def _list_dir(
276276
) -> Iterable[Tuple[AzureBlobPath, bool]]:
277277
if not cloud_path.container:
278278
for container in self.service_client.list_containers():
279-
yield self.CloudPath(f"az://{container.name}"), True
279+
yield self.CloudPath(f"{cloud_path.cloud_prefix}{container.name}"), True
280280

281281
if not recursive:
282282
continue
283283

284-
yield from self._list_dir(self.CloudPath(f"az://{container.name}"), recursive=True)
284+
yield from self._list_dir(
285+
self.CloudPath(f"{cloud_path.cloud_prefix}{container.name}"), recursive=True
286+
)
285287
return
286288

287289
container_client = self.service_client.get_container_client(cloud_path.container)
@@ -295,7 +297,9 @@ def _list_dir(
295297
paths = file_system_client.get_paths(path=cloud_path.blob, recursive=recursive)
296298

297299
for path in paths:
298-
yield self.CloudPath(f"az://{cloud_path.container}/{path.name}"), path.is_directory
300+
yield self.CloudPath(
301+
f"{cloud_path.cloud_prefix}{cloud_path.container}/{path.name}"
302+
), path.is_directory
299303

300304
else:
301305
if not recursive:
@@ -306,7 +310,9 @@ def _list_dir(
306310
for blob in blobs:
307311
# walk_blobs returns folders with a trailing slash
308312
blob_path = blob.name.rstrip("/")
309-
blob_cloud_path = self.CloudPath(f"az://{cloud_path.container}/{blob_path}")
313+
blob_cloud_path = self.CloudPath(
314+
f"{cloud_path.cloud_prefix}{cloud_path.container}/{blob_path}"
315+
)
310316

311317
yield blob_cloud_path, (
312318
isinstance(blob, BlobPrefix)

cloudpathlib/gs/gsclient.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,8 @@ def _list_dir(self, cloud_path: GSPath, recursive=False) -> Iterable[Tuple[GSPat
183183
)
184184

185185
yield from (
186-
(self.CloudPath(f"gs://{str(b)}"), True) for b in self.client.list_buckets()
186+
(self.CloudPath(f"{cloud_path.cloud_prefix}{str(b)}"), True)
187+
for b in self.client.list_buckets()
187188
)
188189
return
189190

@@ -200,25 +201,30 @@ def _list_dir(self, cloud_path: GSPath, recursive=False) -> Iterable[Tuple[GSPat
200201
# if we haven't surfaced this directory already
201202
if parent not in yielded_dirs and str(parent) != ".":
202203
yield (
203-
self.CloudPath(f"gs://{cloud_path.bucket}/{prefix}{parent}"),
204+
self.CloudPath(
205+
f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{prefix}{parent}"
206+
),
204207
True, # is a directory
205208
)
206209
yielded_dirs.add(parent)
207-
yield (self.CloudPath(f"gs://{cloud_path.bucket}/{o.name}"), False) # is a file
210+
yield (
211+
self.CloudPath(f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{o.name}"),
212+
False,
213+
) # is a file
208214
else:
209215
iterator = bucket.list_blobs(delimiter="/", prefix=prefix)
210216

211217
# files must be iterated first for `.prefixes` to be populated:
212218
# see: https://github.com/googleapis/python-storage/issues/863
213219
for file in iterator:
214220
yield (
215-
self.CloudPath(f"gs://{cloud_path.bucket}/{file.name}"),
221+
self.CloudPath(f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{file.name}"),
216222
False, # is a file
217223
)
218224

219225
for directory in iterator.prefixes:
220226
yield (
221-
self.CloudPath(f"gs://{cloud_path.bucket}/{directory}"),
227+
self.CloudPath(f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{directory}"),
222228
True, # is a directory
223229
)
224230

cloudpathlib/s3/s3client.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat
217217
)
218218

219219
yield from (
220-
(self.CloudPath(f"s3://{b['Name']}"), True)
220+
(self.CloudPath(f"{cloud_path.cloud_prefix}{b['Name']}"), True)
221221
for b in self.client.list_buckets().get("Buckets", [])
222222
)
223223
return
@@ -241,7 +241,9 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat
241241
canonical = result_prefix.get("Prefix").rstrip("/") # keep a canonical form
242242
if canonical not in yielded_dirs:
243243
yield (
244-
self.CloudPath(f"s3://{cloud_path.bucket}/{canonical}"),
244+
self.CloudPath(
245+
f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{canonical}"
246+
),
245247
True,
246248
)
247249
yielded_dirs.add(canonical)
@@ -254,7 +256,9 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat
254256
parent_canonical = prefix + str(parent).rstrip("/")
255257
if parent_canonical not in yielded_dirs and str(parent) != ".":
256258
yield (
257-
self.CloudPath(f"s3://{cloud_path.bucket}/{parent_canonical}"),
259+
self.CloudPath(
260+
f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{parent_canonical}"
261+
),
258262
True,
259263
)
260264
yielded_dirs.add(parent_canonical)
@@ -267,15 +271,19 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat
267271
# s3 fake directories have 0 size and end with "/"
268272
if result_key.get("Key").endswith("/") and result_key.get("Size") == 0:
269273
yield (
270-
self.CloudPath(f"s3://{cloud_path.bucket}/{canonical}"),
274+
self.CloudPath(
275+
f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{canonical}"
276+
),
271277
True,
272278
)
273279
yielded_dirs.add(canonical)
274280

275281
# yield object as file
276282
else:
277283
yield (
278-
self.CloudPath(f"s3://{cloud_path.bucket}/{result_key.get('Key')}"),
284+
self.CloudPath(
285+
f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{result_key.get('Key')}"
286+
),
279287
False,
280288
)
281289

tests/test_client.py

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
import mimetypes
22
import os
3-
from pathlib import Path
43
import random
54
import string
5+
from pathlib import Path
6+
7+
import pytest
68

79
from cloudpathlib import CloudPath
10+
from cloudpathlib.client import register_client_class
11+
from cloudpathlib.cloudpath import implementation_registry, register_path_class
812
from cloudpathlib.s3.s3client import S3Client
13+
from cloudpathlib.s3.s3path import S3Path
914

1015

1116
def test_default_client_instantiation(rig):
@@ -116,3 +121,49 @@ def my_content_type(path):
116121

117122
for suffix, content_type in mimes:
118123
_test_write_content_type(suffix, content_type, rig)
124+
125+
126+
@pytest.fixture
127+
def custom_s3_path():
128+
# A fixture isolates these classes as they modify the global registry of
129+
# implementations.
130+
@register_path_class("mys3")
131+
class MyS3Path(S3Path):
132+
cloud_prefix: str = "mys3://"
133+
134+
@register_client_class("mys3")
135+
class MyS3Client(S3Client):
136+
pass
137+
138+
yield (MyS3Path, MyS3Client)
139+
140+
# cleanup after use
141+
implementation_registry.pop("mys3")
142+
143+
144+
def test_custom_mys3path_instantiation(custom_s3_path):
145+
CustomPath, _ = custom_s3_path
146+
147+
path = CustomPath("mys3://bucket/dir/file.txt")
148+
assert isinstance(path, CustomPath)
149+
assert path.cloud_prefix == "mys3://"
150+
assert path.bucket == "bucket"
151+
assert path.key == "dir/file.txt"
152+
153+
154+
def test_custom_mys3client_instantiation(custom_s3_path):
155+
_, CustomClient = custom_s3_path
156+
157+
client = CustomClient()
158+
assert isinstance(client, CustomClient)
159+
assert client.CloudPath("mys3://bucket/dir/file.txt").cloud_prefix == "mys3://"
160+
161+
162+
def test_custom_mys3client_default_client(custom_s3_path):
163+
_, CustomClient = custom_s3_path
164+
165+
CustomClient().set_as_default_client()
166+
167+
path = CloudPath("mys3://bucket/dir/file.txt")
168+
assert isinstance(path.client, CustomClient)
169+
assert path.cloud_prefix == "mys3://"

0 commit comments

Comments
 (0)