Skip to content

Commit 11269a5

Browse files
TomAugspurgerJoseph HammanJoe Hammanjoshmoore
authored
Update ABSStore for compatibility with newer azure.storage.blob. (#759)
* initial attempt to update azure storage version in zarr * add getsize back and fixup listdir * overwrite existing blob in setitem * Updates for azure.storage.blob * deprecate old * fixup * doc * doc2 * linting * linting * fixup * avoid set * Rely on Zarr being present * skip for py36 * fix skip * flake8 * Fix coverage, deprecations * Fix contributing typo Co-authored-by: Joseph Hamman <[email protected]> Co-authored-by: Joe Hamman <[email protected]> Co-authored-by: jmoore <[email protected]> Co-authored-by: Josh Moore <[email protected]>
1 parent 755bb96 commit 11269a5

File tree

9 files changed

+156
-90
lines changed

9 files changed

+156
-90
lines changed

docs/contributing.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,11 @@ optional dependencies to be installed), run::
164164
Note that some tests also require storage services to be running
165165
locally. To run the Azure Blob Service storage tests, run an Azure
166166
storage emulator (e.g., azurite) and set the environment variable
167-
``ZARR_TEST_ABS=1``. To run the Mongo DB storage tests, run a Mongo
167+
``ZARR_TEST_ABS=1``. If you're using Docker to run azurite, start the service with::
168+
169+
docker run --rm -p 10000:10000 mcr.microsoft.com/azure-storage/azurite azurite-blob --loose --blobHost 0.0.0.0
170+
171+
To run the Mongo DB storage tests, run a Mongo
168172
server locally and set the environment variable ``ZARR_TEST_MONGO=1``.
169173
To run the Redis storage tests, run a Redis server locally on port
170174
6379 and set the environment variable ``ZARR_TEST_REDIS=1``.

docs/release.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ Bug fixes
1111

1212
* FSStore: default to normalize_keys=False
1313
By :user:`Josh Moore <joshmoore>`; :issue:`755`.
14+
* ABSStore: compatibility with ``azure.storage.python>=12``
15+
By :user:`Tom Augspurger <tomaugspurger>`; :issue:`618`
16+
1417

1518
.. _release_2.8.2:
1619

docs/tutorial.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -810,7 +810,9 @@ The class is :class:`zarr.storage.ABSStore` (requires
810810
`azure-storage-blob <https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python>`_
811811
to be installed)::
812812

813-
>>> store = zarr.ABSStore(container='test', prefix='zarr-testing', blob_service_kwargs={'is_emulated': True}) # doctest: +SKIP
813+
>>> import azure.storage.blob
814+
>>> container_client = azure.storage.blob.ContainerClient(...) # doctest: +SKIP
815+
>>> store = zarr.ABSStore(client=container_client, prefix='zarr-testing') # doctest: +SKIP
814816
>>> root = zarr.group(store=store, overwrite=True) # doctest: +SKIP
815817
>>> z = root.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4') # doctest: +SKIP
816818
>>> z[:] = 42 # doctest: +SKIP

requirements_dev_optional.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ipytree==0.2.1
66
# optional library requirements for services
77
# don't let pyup change pinning for azure-storage-blob, need to pin to older
88
# version to get compatibility with azure storage emulator on appveyor (FIXME)
9-
azure-storage-blob==2.0.1 # pyup: ignore
9+
azure-storage-blob==12.5.0 # pyup: ignore
1010
redis==3.5.3
1111
pymongo==3.11.4
1212
# optional test requirements

zarr/storage.py

Lines changed: 87 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -2215,50 +2215,84 @@ class ABSStore(MutableMapping):
22152215
----------
22162216
container : string
22172217
The name of the ABS container to use.
2218+
.. deprecated::
2219+
Use ``client`` instead.
22182220
prefix : string
22192221
Location of the "directory" to use as the root of the storage hierarchy
22202222
within the container.
22212223
account_name : string
22222224
The Azure blob storage account name.
2225+
.. deprecated:: 2.8.3
2226+
Use ``client`` instead.
22232227
account_key : string
22242228
The Azure blob storage account access key.
2229+
.. deprecated:: 2.8.3
2230+
Use ``client`` instead.
22252231
blob_service_kwargs : dictionary
22262232
Extra arguments to be passed into the azure blob client, for e.g. when
22272233
using the emulator, pass in blob_service_kwargs={'is_emulated': True}.
2234+
.. deprecated:: 2.8.3
2235+
Use ``client`` instead.
22282236
dimension_separator : {'.', '/'}, optional
22292237
Separator placed between the dimensions of a chunk.
2238+
client : azure.storage.blob.ContainerClient, optional
2239+
And ``azure.storage.blob.ContainerClient`` to connect with. See
2240+
`here <https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.containerclient?view=azure-python>`_ # noqa
2241+
for more.
2242+
2243+
.. versionadded:: 2.8.3
22302244
22312245
Notes
22322246
-----
2233-
In order to use this store, you must install the Microsoft Azure Storage SDK for Python.
2247+
In order to use this store, you must install the Microsoft Azure Storage SDK for Python,
2248+
``azure-storage-blob>=12.5.0``.
22342249
"""
22352250

2236-
def __init__(self, container, prefix='', account_name=None, account_key=None,
2237-
blob_service_kwargs=None, dimension_separator=None):
2238-
from azure.storage.blob import BlockBlobService
2239-
self.container = container
2240-
self.prefix = normalize_storage_path(prefix)
2241-
self.account_name = account_name
2242-
self.account_key = account_key
2251+
def __init__(self, container=None, prefix='', account_name=None, account_key=None,
2252+
blob_service_kwargs=None, dimension_separator=None,
2253+
client=None,
2254+
):
22432255
self._dimension_separator = dimension_separator
2244-
if blob_service_kwargs is not None:
2245-
self.blob_service_kwargs = blob_service_kwargs
2246-
else: # pragma: no cover
2247-
self.blob_service_kwargs = dict()
2248-
self.client = BlockBlobService(self.account_name, self.account_key,
2249-
**self.blob_service_kwargs)
2250-
2251-
# needed for pickling
2252-
def __getstate__(self):
2253-
state = self.__dict__.copy()
2254-
del state['client']
2255-
return state
2256+
self.prefix = normalize_storage_path(prefix)
2257+
if client is None:
2258+
# deprecated option, try to construct the client for them
2259+
msg = (
2260+
"Providing 'container', 'account_name', 'account_key', and 'blob_service_kwargs'"
2261+
"is deprecated. Provide and instance of 'azure.storage.blob.ContainerClient' "
2262+
"'client' instead."
2263+
)
2264+
warnings.warn(msg, FutureWarning, stacklevel=2)
2265+
from azure.storage.blob import ContainerClient
2266+
blob_service_kwargs = blob_service_kwargs or {}
2267+
client = ContainerClient(
2268+
"https://{}.blob.core.windows.net/".format(account_name), container,
2269+
credential=account_key, **blob_service_kwargs
2270+
)
22562271

2257-
def __setstate__(self, state):
2258-
from azure.storage.blob import BlockBlobService
2259-
self.__dict__.update(state)
2260-
self.client = BlockBlobService(self.account_name, self.account_key,
2261-
**self.blob_service_kwargs)
2272+
self.client = client
2273+
self._container = container
2274+
self._account_name = account_name
2275+
self._account_key = account_key
2276+
2277+
def _warn_deprecated(self, property_):
2278+
msg = ("The {} property is deprecated and will be removed in a future "
2279+
"version. Get the property from 'ABSStore.client' instead.")
2280+
warnings.warn(msg.format(property_), FutureWarning, stacklevel=3)
2281+
2282+
@property
2283+
def container(self):
2284+
self._warn_deprecated("container")
2285+
return self._container
2286+
2287+
@property
2288+
def account_name(self):
2289+
self._warn_deprecated("account_name")
2290+
return self._account_name
2291+
2292+
@property
2293+
def account_key(self):
2294+
self._warn_deprecated("account_key")
2295+
return self._account_key
22622296

22632297
def _append_path_to_prefix(self, path):
22642298
if self.prefix == '':
@@ -2277,30 +2311,29 @@ def _strip_prefix_from_path(path, prefix):
22772311
return path_norm
22782312

22792313
def __getitem__(self, key):
2280-
from azure.common import AzureMissingResourceHttpError
2314+
from azure.core.exceptions import ResourceNotFoundError
22812315
blob_name = self._append_path_to_prefix(key)
22822316
try:
2283-
blob = self.client.get_blob_to_bytes(self.container, blob_name)
2284-
return blob.content
2285-
except AzureMissingResourceHttpError:
2317+
return self.client.download_blob(blob_name).readall()
2318+
except ResourceNotFoundError:
22862319
raise KeyError('Blob %s not found' % blob_name)
22872320

22882321
def __setitem__(self, key, value):
22892322
value = ensure_bytes(value)
22902323
blob_name = self._append_path_to_prefix(key)
2291-
self.client.create_blob_from_bytes(self.container, blob_name, value)
2324+
self.client.upload_blob(blob_name, value, overwrite=True)
22922325

22932326
def __delitem__(self, key):
2294-
from azure.common import AzureMissingResourceHttpError
2327+
from azure.core.exceptions import ResourceNotFoundError
22952328
try:
2296-
self.client.delete_blob(self.container, self._append_path_to_prefix(key))
2297-
except AzureMissingResourceHttpError:
2329+
self.client.delete_blob(self._append_path_to_prefix(key))
2330+
except ResourceNotFoundError:
22982331
raise KeyError('Blob %s not found' % key)
22992332

23002333
def __eq__(self, other):
23012334
return (
23022335
isinstance(other, ABSStore) and
2303-
self.container == other.container and
2336+
self.client == other.client and
23042337
self.prefix == other.prefix
23052338
)
23062339

@@ -2312,63 +2345,53 @@ def __iter__(self):
23122345
list_blobs_prefix = self.prefix + '/'
23132346
else:
23142347
list_blobs_prefix = None
2315-
for blob in self.client.list_blobs(self.container, list_blobs_prefix):
2348+
for blob in self.client.list_blobs(list_blobs_prefix):
23162349
yield self._strip_prefix_from_path(blob.name, self.prefix)
23172350

23182351
def __len__(self):
23192352
return len(self.keys())
23202353

23212354
def __contains__(self, key):
23222355
blob_name = self._append_path_to_prefix(key)
2323-
assert len(blob_name) >= 1
2324-
if self.client.exists(self.container, blob_name):
2325-
return True
2326-
else:
2327-
return False
2356+
return self.client.get_blob_client(blob_name).exists()
23282357

23292358
def listdir(self, path=None):
2330-
from azure.storage.blob import Blob
23312359
dir_path = normalize_storage_path(self._append_path_to_prefix(path))
23322360
if dir_path:
23332361
dir_path += '/'
2334-
items = list()
2335-
for blob in self.client.list_blobs(self.container, prefix=dir_path, delimiter='/'):
2336-
if type(blob) == Blob:
2337-
items.append(self._strip_prefix_from_path(blob.name, dir_path))
2338-
else:
2339-
items.append(self._strip_prefix_from_path(
2340-
blob.name[:blob.name.find('/', len(dir_path))], dir_path))
2362+
items = [
2363+
self._strip_prefix_from_path(blob.name, dir_path)
2364+
for blob in self.client.walk_blobs(name_starts_with=dir_path, delimiter='/')
2365+
]
23412366
return items
23422367

23432368
def rmdir(self, path=None):
23442369
dir_path = normalize_storage_path(self._append_path_to_prefix(path))
23452370
if dir_path:
23462371
dir_path += '/'
2347-
for blob in self.client.list_blobs(self.container, prefix=dir_path):
2348-
assert len(blob.name) >= 1
2349-
self.client.delete_blob(self.container, blob.name)
2372+
for blob in self.client.list_blobs(name_starts_with=dir_path):
2373+
self.client.delete_blob(blob)
23502374

23512375
def getsize(self, path=None):
2352-
from azure.storage.blob import Blob
23532376
store_path = normalize_storage_path(path)
2354-
fs_path = self.prefix
2355-
if store_path:
2356-
fs_path = self._append_path_to_prefix(store_path)
2377+
fs_path = self._append_path_to_prefix(store_path)
2378+
if fs_path:
2379+
blob_client = self.client.get_blob_client(fs_path)
2380+
else:
2381+
blob_client = None
23572382

2358-
if fs_path != "" and self.client.exists(self.container, fs_path):
2359-
return self.client.get_blob_properties(
2360-
self.container, fs_path
2361-
).properties.content_length
2383+
if blob_client and blob_client.exists():
2384+
return blob_client.get_blob_properties().size
23622385
else:
23632386
size = 0
23642387
if fs_path == '':
23652388
fs_path = None
2366-
else:
2389+
elif not fs_path.endswith('/'):
23672390
fs_path += '/'
2368-
for blob in self.client.list_blobs(self.container, prefix=fs_path,
2369-
delimiter='/'):
2370-
if type(blob) == Blob:
2371-
size += blob.properties.content_length
2391+
for blob in self.client.walk_blobs(name_starts_with=fs_path, delimiter='/'):
2392+
blob_client = self.client.get_blob_client(blob)
2393+
if blob_client.exists():
2394+
size += blob_client.get_blob_properties().size
23722395
return size
23732396

23742397
def clear(self):

zarr/tests/test_core.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import atexit
22
import os
3+
import sys
34
import pickle
45
import shutil
56
import unittest
@@ -33,7 +34,7 @@
3334
init_group,
3435
)
3536
from zarr.util import buffer_size
36-
from zarr.tests.util import skip_test_env_var, have_fsspec
37+
from zarr.tests.util import abs_container, skip_test_env_var, have_fsspec
3738

3839
# noinspection PyMethodMayBeStatic
3940

@@ -1627,12 +1628,8 @@ class TestArrayWithABSStore(TestArray):
16271628

16281629
@staticmethod
16291630
def absstore():
1630-
asb = pytest.importorskip("azure.storage.blob")
1631-
blob_client = asb.BlockBlobService(is_emulated=True)
1632-
blob_client.delete_container('test')
1633-
blob_client.create_container('test')
1634-
store = ABSStore(container='test', account_name='foo', account_key='bar',
1635-
blob_service_kwargs={'is_emulated': True})
1631+
client = abs_container()
1632+
store = ABSStore(client=client)
16361633
store.rmdir()
16371634
return store
16381635

@@ -1649,6 +1646,11 @@ def create_array(self, read_only=False, **kwargs):
16491646
def test_nbytes_stored(self):
16501647
return super().test_nbytes_stored()
16511648

1649+
@pytest.mark.skipif(sys.version_info < (3, 7), reason="attr not serializable in py36")
1650+
def test_pickle(self):
1651+
# internal attribute on ContainerClient isn't serializable for py36 and earlier
1652+
super().test_pickle()
1653+
16521654

16531655
class TestArrayWithNestedDirectoryStore(TestArrayWithDirectoryStore):
16541656

zarr/tests/test_hierarchy.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import atexit
22
import os
3+
import sys
34
import pickle
45
import shutil
56
import tempfile
@@ -27,7 +28,7 @@
2728
array_meta_key, atexit_rmglob, atexit_rmtree,
2829
group_meta_key, init_array, init_group)
2930
from zarr.util import InfoReporter
30-
from zarr.tests.util import skip_test_env_var, have_fsspec
31+
from zarr.tests.util import skip_test_env_var, have_fsspec, abs_container
3132

3233

3334
# noinspection PyStatementEffect
@@ -951,15 +952,16 @@ class TestGroupWithABSStore(TestGroup):
951952

952953
@staticmethod
953954
def create_store():
954-
asb = pytest.importorskip("azure.storage.blob")
955-
blob_client = asb.BlockBlobService(is_emulated=True)
956-
blob_client.delete_container('test')
957-
blob_client.create_container('test')
958-
store = ABSStore(container='test', account_name='foo', account_key='bar',
959-
blob_service_kwargs={'is_emulated': True})
955+
container_client = abs_container()
956+
store = ABSStore(client=container_client)
960957
store.rmdir()
961958
return store, None
962959

960+
@pytest.mark.skipif(sys.version_info < (3, 7), reason="attr not serializable in py36")
961+
def test_pickle(self):
962+
# internal attribute on ContainerClient isn't serializable for py36 and earlier
963+
super().test_pickle()
964+
963965

964966
class TestGroupWithNestedDirectoryStore(TestGroup):
965967

0 commit comments

Comments
 (0)