Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Unreleased

- Added support for `timeout` and `retry` kwargs for `GSClient`. (Issue [#484](https://github.com/drivendataorg/cloudpathlib/issues/484), PR [#485](https://github.com/drivendataorg/cloudpathlib/pull/485), thanks @Mchristos)
- Fixed `CloudPath(...) / other` to correctly attempt to fall back on `other`'s `__rtruediv__` implementation, in order to support classes that explicitly support the `/` with a `CloudPath` instance. Previously, this would always raise a `TypeError` if `other` were not a `str` or `PurePosixPath`. (PR [#479](https://github.com/drivendataorg/cloudpathlib/pull/479))
- Add `md5` property to `GSPath`, updated LocalGSPath to include `md5` property, updated mock_gs.MockBlob to include `md5_hash` property.
- Fixed an uncaught exception on Azure Gen2 storage accounts with HNS enabled when used with `DefaultAzureCredential`. (Issue [#486](https://github.com/drivendataorg/cloudpathlib/issues/486))
Expand Down
19 changes: 15 additions & 4 deletions cloudpathlib/gs/gsclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
try:
if TYPE_CHECKING:
from google.auth.credentials import Credentials
from google.api_core.retry import Retry

Check warning on line 16 in cloudpathlib/gs/gsclient.py

View check run for this annotation

Codecov / codecov/patch

cloudpathlib/gs/gsclient.py#L16

Added line #L16 was not covered by tests

from google.auth.exceptions import DefaultCredentialsError
from google.cloud.storage import Client as StorageClient
Expand Down Expand Up @@ -45,6 +46,8 @@
local_cache_dir: Optional[Union[str, os.PathLike]] = None,
content_type_method: Optional[Callable] = mimetypes.guess_type,
download_chunks_concurrently_kwargs: Optional[Dict[str, Any]] = None,
timeout: Optional[float] = None,
retry: Optional["Retry"] = None,
):
"""Class constructor. Sets up a [`Storage
Client`](https://googleapis.dev/python/storage/latest/client.html).
Expand Down Expand Up @@ -85,6 +88,8 @@
download_chunks_concurrently_kwargs (Optional[Dict[str, Any]]): Keyword arguments to pass to
[`download_chunks_concurrently`](https://cloud.google.com/python/docs/reference/storage/latest/google.cloud.storage.transfer_manager#google_cloud_storage_transfer_manager_download_chunks_concurrently)
for sliced parallel downloads; Only available in `google-cloud-storage` version 2.7.0 or later, otherwise ignored and a warning is emitted.
timeout (Optional[float]): Cloud Storage [timeout value](https://cloud.google.com/python/docs/reference/storage/1.39.0/retry_timeout)
retry (Optional[google.api_core.retry.Retry]): Cloud Storage [retry configuration](https://cloud.google.com/python/docs/reference/storage/1.39.0/retry_timeout#configuring-retries)
"""
if application_credentials is None:
application_credentials = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
Expand All @@ -102,6 +107,13 @@
self.client = StorageClient.create_anonymous_client()

self.download_chunks_concurrently_kwargs = download_chunks_concurrently_kwargs
self.blob_kwargs: dict[str, Any] = {}
if timeout is not None:
self.timeout: float = timeout
self.blob_kwargs["timeout"] = self.timeout
if retry is not None:
self.retry: Retry = retry
self.blob_kwargs["retry"] = self.retry

super().__init__(
local_cache_dir=local_cache_dir,
Expand Down Expand Up @@ -129,7 +141,6 @@
blob = bucket.get_blob(cloud_path.blob)

local_path = Path(local_path)

if transfer_manager is not None and self.download_chunks_concurrently_kwargs is not None:
transfer_manager.download_chunks_concurrently(
blob, local_path, **self.download_chunks_concurrently_kwargs
Expand All @@ -140,7 +151,7 @@
"Ignoring `download_chunks_concurrently_kwargs` for version of google-cloud-storage that does not support them (<2.7.0)."
)

blob.download_to_filename(local_path)
blob.download_to_filename(local_path, **self.blob_kwargs)

return local_path

Expand Down Expand Up @@ -247,7 +258,7 @@
dst_bucket = self.client.bucket(dst.bucket)

src_blob = src_bucket.get_blob(src.blob)
src_bucket.copy_blob(src_blob, dst_bucket, dst.blob)
src_bucket.copy_blob(src_blob, dst_bucket, dst.blob, **self.blob_kwargs)

if remove_src:
src_blob.delete()
Expand Down Expand Up @@ -280,7 +291,7 @@
content_type, _ = self.content_type_method(str(local_path))
extra_args["content_type"] = content_type

blob.upload_from_filename(str(local_path), **extra_args)
blob.upload_from_filename(str(local_path), **extra_args, **self.blob_kwargs)
return cloud_path

def _get_public_url(self, cloud_path: GSPath) -> str:
Expand Down
33 changes: 28 additions & 5 deletions tests/mock_clients/mock_gs.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,19 @@ def delete(self):
path.unlink()
delete_empty_parents_up_to_root(path=path, root=self.bucket)

def download_to_filename(self, filename):
def download_to_filename(self, filename, timeout=None, retry=None):
# if timeout is not None, assume that the test wants a timeout and throw it
if timeout is not None:
raise TimeoutError("Download timed out")

# indicate that retry object made it through to the GS lib
if retry is not None:
retry.mocked_retries = 1

from_path = self.bucket / self.name
to_path = Path(filename)

to_path = Path(filename)
to_path.parent.mkdir(exist_ok=True, parents=True)

to_path.write_bytes(from_path.read_bytes())

def patch(self):
Expand All @@ -84,7 +91,15 @@ def reload(
):
pass

def upload_from_filename(self, filename, content_type=None):
def upload_from_filename(self, filename, content_type=None, timeout=None, retry=None):
# if timeout is not None, assume that the test wants a timeout and throw it
if timeout is not None:
raise TimeoutError("Upload timed out")

# indicate that retry object made it through to the GS lib
if retry is not None:
retry.mocked_retries = 1

data = Path(filename).read_bytes()
path = self.bucket / self.name
path.parent.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -131,7 +146,15 @@ def __init__(self, name, bucket_name, client=None):
def blob(self, blob):
return MockBlob(self.name, blob, client=self.client)

def copy_blob(self, blob, destination_bucket, new_name):
def copy_blob(self, blob, destination_bucket, new_name, timeout=None, retry=None):
# if timeout is not None, assume that the test wants a timeout and throw it
if timeout is not None:
raise TimeoutError("Copy timed out")

# indicate that retry object made it through to the GS lib
if retry is not None:
retry.mocked_retries = 1

data = (self.name / blob.name).read_bytes()
dst = destination_bucket.name / new_name
dst.parent.mkdir(exist_ok=True, parents=True)
Expand Down
36 changes: 35 additions & 1 deletion tests/test_gs_specific.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from urllib.parse import urlparse, parse_qs

from google.api_core import retry
from google.api_core import exceptions
import pytest

from urllib.parse import urlparse, parse_qs
from cloudpathlib import GSPath
from cloudpathlib.local import LocalGSPath

Expand Down Expand Up @@ -75,3 +78,34 @@ def _calculate_b64_wrapped_md5_hash(contents: str) -> str:
p: GSPath = gs_rig.create_cloud_path("dir_0/file0_0.txt")
p.write_text(contents)
assert p.md5 == expected_hash


def test_timeout_and_retry(gs_rig):
custom_retry = retry.Retry(
timeout=0.50,
predicate=retry.if_exception_type(exceptions.ServerError),
)

fast_timeout_client = gs_rig.client_class(timeout=0.00001, retry=custom_retry)

with pytest.raises(Exception) as exc_info:
p = gs_rig.create_cloud_path("dir_0/file0_0.txt", client=fast_timeout_client)
p.write_text("hello world " * 10000)

assert "timed out" in str(exc_info.value)

# can't force retries to happen in live cloud tests, so skip
if not gs_rig.live_server:
custom_retry = retry.Retry(
initial=1.0,
multiplier=1.0,
timeout=15.0,
predicate=retry.if_exception_type(exceptions.ServerError),
)

p = gs_rig.create_cloud_path(
"dir_0/file0_0.txt", client=gs_rig.client_class(retry=custom_retry)
)
p.write_text("hello world")

assert custom_retry.mocked_retries == 1