Skip to content

Commit ddc3b94

Browse files
pjbullgeorgeboot
andauthored
Stream to and from Azure (#403)
* Stream to and from Azure (#334) * Stream uploads for azure * Download using a stream * Tests and partials * changelog * use pathlib open * use replace instead of rename * another try at flaky tests --------- Co-authored-by: George Boot <884482+georgeboot@users.noreply.github.com>
1 parent a242f68 commit ddc3b94

File tree

5 files changed

+68
-4
lines changed

5 files changed

+68
-4
lines changed

HISTORY.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## UNRELEASED
44
- Implement sliced downloads in GSClient. (Issue [#387](https://github.com/drivendataorg/cloudpathlib/issues/387), PR [#389](https://github.com/drivendataorg/cloudpathlib/pull/389))
55
- Implement `as_url` with presigned parameter for all backends. (Issue [#235](https://github.com/drivendataorg/cloudpathlib/issues/235), PR [#236](https://github.com/drivendataorg/cloudpathlib/pull/236))
6+
- Stream to and from Azure Blob Storage. (PR [#403](https://github.com/drivendataorg/cloudpathlib/pull/403))
67

78
## 0.17.0 (2023-12-21)
89

cloudpathlib/azure/azblobclient.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,10 @@ def _get_metadata(self, cloud_path: AzureBlobPath) -> Union["BlobProperties", Di
118118

119119
return properties
120120

121+
@staticmethod
122+
def _partial_filename(local_path) -> Path:
123+
return Path(str(local_path) + ".part")
124+
121125
def _download_file(
122126
self, cloud_path: AzureBlobPath, local_path: Union[str, os.PathLike]
123127
) -> Path:
@@ -131,7 +135,17 @@ def _download_file(
131135

132136
local_path.parent.mkdir(exist_ok=True, parents=True)
133137

134-
local_path.write_bytes(download_stream.content_as_bytes())
138+
try:
139+
partial_local_path = self._partial_filename(local_path)
140+
with partial_local_path.open("wb") as data:
141+
download_stream.readinto(data)
142+
143+
partial_local_path.replace(local_path)
144+
except: # noqa: E722
145+
# remove any partial download
146+
if partial_local_path.exists():
147+
partial_local_path.unlink()
148+
raise
135149

136150
return local_path
137151

@@ -273,7 +287,8 @@ def _upload_file(
273287

274288
content_settings = ContentSettings(**extra_args)
275289

276-
blob.upload_blob(Path(local_path).read_bytes(), overwrite=True, content_settings=content_settings) # type: ignore
290+
with Path(local_path).open("rb") as data:
291+
blob.upload_blob(data, overwrite=True, content_settings=content_settings) # type: ignore
277292

278293
return cloud_path
279294

cloudpathlib/local/localclient.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pathlib import Path, PurePosixPath
66
import shutil
77
from tempfile import TemporaryDirectory
8+
from time import sleep
89
from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union
910

1011
from ..client import Client
@@ -63,7 +64,17 @@ def _local_to_cloud_path(self, local_path: Union[str, os.PathLike]) -> "LocalPat
6364
def _download_file(self, cloud_path: "LocalPath", local_path: Union[str, os.PathLike]) -> Path:
6465
local_path = Path(local_path)
6566
local_path.parent.mkdir(exist_ok=True, parents=True)
66-
shutil.copyfile(self._cloud_path_to_local(cloud_path), local_path)
67+
68+
try:
69+
shutil.copyfile(self._cloud_path_to_local(cloud_path), local_path)
70+
except FileNotFoundError:
71+
# erroneous FileNotFoundError appears in tests sometimes; patiently insist on the parent directory existing
72+
sleep(1.0)
73+
local_path.parent.mkdir(exist_ok=True, parents=True)
74+
sleep(1.0)
75+
76+
shutil.copyfile(self._cloud_path_to_local(cloud_path), local_path)
77+
6778
return local_path
6879

6980
def _exists(self, cloud_path: "LocalPath") -> bool:

tests/mock_clients/mock_azureblob.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def delete_blob(self):
111111
def upload_blob(self, data, overwrite, content_settings=None):
112112
path = self.root / self.key
113113
path.parent.mkdir(parents=True, exist_ok=True)
114-
path.write_bytes(data)
114+
path.write_bytes(data.read())
115115

116116
if content_settings is not None:
117117
self.service_client.metadata_cache[self.root / self.key] = (
@@ -130,6 +130,9 @@ def readall(self):
130130
def content_as_bytes(self):
131131
return self.readall()
132132

133+
def readinto(self, buffer):
134+
buffer.write(self.readall())
135+
133136

134137
class MockContainerClient:
135138
def __init__(self, root, container_name):

tests/test_azure_specific.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
import os
22

3+
from azure.storage.blob import StorageStreamDownloader
34
import pytest
45

56
from urllib.parse import urlparse, parse_qs
67
from cloudpathlib import AzureBlobClient, AzureBlobPath
78
from cloudpathlib.exceptions import MissingCredentialsError
89
from cloudpathlib.local import LocalAzureBlobClient, LocalAzureBlobPath
910

11+
from .mock_clients.mock_azureblob import MockStorageStreamDownloader
12+
1013

1114
@pytest.mark.parametrize("path_class", [AzureBlobPath, LocalAzureBlobPath])
1215
def test_azureblobpath_properties(path_class, monkeypatch):
@@ -45,3 +48,34 @@ def test_as_url(azure_rig):
4548
assert "sp" in query_params
4649
assert "sr" in query_params
4750
assert "sig" in query_params
51+
52+
53+
def test_partial_download(azure_rig, monkeypatch):
54+
p: AzureBlobPath = azure_rig.create_cloud_path("dir_0/file0_0.txt")
55+
56+
# no partial after successful download
57+
p.read_text() # downloads
58+
assert p._local.exists()
59+
assert not p.client._partial_filename(p._local).exists()
60+
61+
# remove cache manually
62+
p._local.unlink()
63+
assert not p._local.exists()
64+
65+
# no partial after failed download
66+
with monkeypatch.context() as m:
67+
68+
def _patched(self, buffer):
69+
buffer.write(b"partial")
70+
raise Exception("boom")
71+
72+
if azure_rig.live_server:
73+
m.setattr(StorageStreamDownloader, "readinto", _patched)
74+
else:
75+
m.setattr(MockStorageStreamDownloader, "readinto", _patched)
76+
77+
with pytest.raises(Exception):
78+
p.read_text() # downloads; should raise
79+
80+
assert not p._local.exists()
81+
assert not p.client._partial_filename(p._local).exists()

0 commit comments

Comments
 (0)