Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Unreleased

- Fixed `CloudPath(...) / other` to correctly attempt to fall back on `other`'s `__rtruediv__` implementation, in order to support classes that explicitly support the `/` with a `CloudPath` instance. Previously, this would always raise a `TypeError` if `other` were not a `str` or `PurePosixPath`. (PR [#479](https://github.com/drivendataorg/cloudpathlib/pull/479))
- Add `md5` property to `GSPath`, updated LocalGSPath to include `md5` property, updated mock_gs.MockBlob to include `md5_hash` property.
- Fixed an uncaught exception on Azure Gen2 storage accounts with HNS enabled when used with `DefaultAzureCredential`. (Issue [#486](https://github.com/drivendataorg/cloudpathlib/issues/486))

## v0.20.0 (2024-10-18)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ Most methods and properties from `pathlib.Path` are supported except for the one
| `bucket` | ❌ | ✅ | ✅ |
| `container` | ✅ | ❌ | ❌ |
| `key` | ❌ | ✅ | ❌ |
| `md5` | ✅ | ❌ | |
| `md5` | ✅ | ❌ | |

----

Expand Down
1 change: 1 addition & 0 deletions cloudpathlib/gs/gsclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def _get_metadata(self, cloud_path: GSPath) -> Optional[Dict[str, Any]]:
"size": blob.size,
"updated": blob.updated,
"content_type": blob.content_type,
"md5_hash": blob.md5_hash,
}

def _download_file(self, cloud_path: GSPath, local_path: Union[str, os.PathLike]) -> Path:
Expand Down
9 changes: 8 additions & 1 deletion cloudpathlib/gs/gspath.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Optional

from ..cloudpath import CloudPath, NoStatError, register_path_class

Expand Down Expand Up @@ -95,3 +95,10 @@
@property
def etag(self):
return self.client._get_metadata(self).get("etag")

@property
def md5(self) -> Optional[str]:
meta = self.client._get_metadata(self)
if not meta:
return None

Check warning on line 103 in cloudpathlib/gs/gspath.py

View check run for this annotation

Codecov / codecov/patch

cloudpathlib/gs/gspath.py#L103

Added line #L103 was not covered by tests
return meta.get("md5_hash", None)
4 changes: 4 additions & 0 deletions cloudpathlib/local/implementations/gs.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@
def etag(self):
return self.client._md5(self)

@property
def md5(self) -> str:
return self.client._md5(self)

Check warning on line 58 in cloudpathlib/local/implementations/gs.py

View check run for this annotation

Codecov / codecov/patch

cloudpathlib/local/implementations/gs.py#L58

Added line #L58 was not covered by tests


LocalGSPath.__name__ = "GSPath"

Expand Down
5 changes: 5 additions & 0 deletions tests/mock_clients/mock_gs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime, timedelta
import os
from pathlib import Path, PurePosixPath
import shutil
from tempfile import TemporaryDirectory
Expand Down Expand Up @@ -95,6 +96,10 @@ def upload_from_filename(self, filename, content_type=None):
def etag(self):
return "etag"

@property
def md5_hash(self):
return os.environ.get("MOCK_EXPECTED_MD5_HASH", "md5_hash")

@property
def size(self):
path = self.bucket / self.name
Expand Down
26 changes: 26 additions & 0 deletions tests/test_gs_specific.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,29 @@ def test_as_url(gs_rig):
assert "X-Goog-Date" in query_params
assert "X-Goog-SignedHeaders" in query_params
assert "X-Goog-Signature" in query_params


@pytest.mark.parametrize(
"contents",
[
"hello world",
"another test case",
],
)
def test_md5_property(contents, gs_rig, monkeypatch):
def _calculate_b64_wrapped_md5_hash(contents: str) -> str:
# https://cloud.google.com/storage/docs/json_api/v1/objects
from base64 import b64encode
from hashlib import md5

contents_md5_bytes = md5(contents.encode()).digest()
b64string = b64encode(contents_md5_bytes).decode()
return b64string

# if USE_LIVE_CLOUD this doesnt have any effect
expected_hash = _calculate_b64_wrapped_md5_hash(contents)
monkeypatch.setenv("MOCK_EXPECTED_MD5_HASH", expected_hash)

p: GSPath = gs_rig.create_cloud_path("dir_0/file0_0.txt")
p.write_text(contents)
assert p.md5 == expected_hash
Loading