Skip to content

Commit 1ddaa66

Browse files
authored
Merge pull request #72 from alimanfoo/misc_20160908
Various minor fixes and improvements
2 parents 96c34bb + 7b741ae commit 1ddaa66

File tree

14 files changed

+700
-433
lines changed

14 files changed

+700
-433
lines changed

docs/_static/donotdelete

Whitespace-only changes.

docs/release.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
Release notes
22
=============
33

4+
* Added ``overwrite`` keyword argument to array and group creation methods
5+
on the :class:`zarr.hierarchy.Group` class
6+
(`#71 <https://github.com/alimanfoo/zarr/issues/71>`_).
7+
* Added ``cache_metadata`` keyword argument to array creation methods.
8+
* The functions :func:`zarr.creation.open_array` and
9+
:func:`zarr.hierarchy.open_group` now accept any store as first argument
10+
(`#56 <https://github.com/alimanfoo/zarr/issues/56>`_).
11+
412
.. _release_2.0.1:
513

614
2.0.1

docs/tutorial.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ the delta filter::
230230
... chunks=(1000, 1000), compressor=compressor)
231231
>>> z
232232
Array((10000, 10000), int32, chunks=(1000, 1000), order=C)
233-
nbytes: 381.5M; nbytes_stored: 248.9K; ratio: 1569.6; initialized: 100/100
233+
nbytes: 381.5M; nbytes_stored: 248.9K; ratio: 1569.7; initialized: 100/100
234234
compressor: LZMA(format=1, check=-1, preset=None, filters=[{'dist': 4, 'id': 3}, {'preset': 1, 'id': 33}])
235235
store: dict
236236

@@ -327,7 +327,7 @@ provided that all processes have access to a shared file system. E.g.::
327327
... synchronizer=synchronizer)
328328
>>> z
329329
Array((10000, 10000), int32, chunks=(1000, 1000), order=C)
330-
nbytes: 381.5M; nbytes_stored: 326; ratio: 1226993.9; initialized: 0/100
330+
nbytes: 381.5M; nbytes_stored: 323; ratio: 1238390.1; initialized: 0/100
331331
compressor: Blosc(cname='lz4', clevel=5, shuffle=1)
332332
store: DirectoryStore; synchronizer: ProcessSynchronizer
333333

zarr/core.py

Lines changed: 69 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from zarr.storage import array_meta_key, attrs_key, listdir, getsize
1414
from zarr.meta import decode_array_metadata, encode_array_metadata
1515
from zarr.attrs import Attributes
16-
from zarr.errors import PermissionError
16+
from zarr.errors import PermissionError, err_read_only, err_array_not_found
1717
from zarr.compat import reduce
1818
from zarr.codecs import get_codec
1919

@@ -34,6 +34,11 @@ class Array(object):
3434
for storage of both chunks and metadata.
3535
synchronizer : object, optional
3636
Array synchronizer.
37+
cache_metadata : bool, optional
38+
If True, array configuration metadata will be cached for the
39+
lifetime of the object. If False, array metadata will be reloaded
40+
prior to all data access and modification operations (may incur
41+
overhead depending on storage and data access pattern).
3742
3843
Attributes
3944
----------
@@ -56,8 +61,9 @@ class Array(object):
5661
itemsize
5762
nbytes
5863
nbytes_stored
59-
initialized
6064
cdata_shape
65+
nchunks
66+
nchunks_initialized
6167
is_view
6268
6369
Methods
@@ -71,7 +77,7 @@ class Array(object):
7177
""" # flake8: noqa
7278

7379
def __init__(self, store, path=None, read_only=False, chunk_store=None,
74-
synchronizer=None):
80+
synchronizer=None, cache_metadata=True):
7581
# N.B., expect at this point store is fully initialized with all
7682
# configuration metadata fully specified and normalized
7783

@@ -87,13 +93,23 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
8793
else:
8894
self._chunk_store = chunk_store
8995
self._synchronizer = synchronizer
96+
self._cache_metadata = cache_metadata
97+
self._is_view = False
9098

9199
# initialize metadata
100+
self._load_metadata()
101+
102+
# initialize attributes
103+
akey = self._key_prefix + attrs_key
104+
self._attrs = Attributes(store, key=akey, read_only=read_only,
105+
synchronizer=synchronizer)
106+
107+
def _load_metadata(self):
92108
try:
93109
mkey = self._key_prefix + array_meta_key
94-
meta_bytes = store[mkey]
110+
meta_bytes = self._store[mkey]
95111
except KeyError:
96-
raise ValueError('store has no metadata')
112+
err_array_not_found(self._path)
97113
else:
98114

99115
# decode and store metadata
@@ -104,7 +120,6 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
104120
self._dtype = meta['dtype']
105121
self._fill_value = meta['fill_value']
106122
self._order = meta['order']
107-
self._is_view = False
108123

109124
# setup compressor
110125
config = meta['compressor']
@@ -119,14 +134,10 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
119134
filters = [get_codec(f) for f in filters]
120135
self._filters = filters
121136

122-
# initialize attributes
123-
akey = self._key_prefix + attrs_key
124-
self._attrs = Attributes(store, key=akey, read_only=read_only,
125-
synchronizer=synchronizer)
126-
127137
def _flush_metadata(self):
128138
if self._is_view:
129-
raise PermissionError('operation not permitted for views')
139+
raise PermissionError('not permitted for views')
140+
130141
if self._compressor:
131142
compressor_config = self._compressor.get_config()
132143
else:
@@ -253,12 +264,6 @@ def nbytes_stored(self):
253264
else:
254265
return m + n
255266

256-
@property
257-
def initialized(self):
258-
"""The number of chunks that have been initialized with some data."""
259-
return sum(1 for k in listdir(self._chunk_store, self._path)
260-
if k not in [array_meta_key, attrs_key])
261-
262267
@property
263268
def cdata_shape(self):
264269
"""A tuple of integers describing the number of chunks along each
@@ -267,6 +272,20 @@ def cdata_shape(self):
267272
int(np.ceil(s / c)) for s, c in zip(self._shape, self._chunks)
268273
)
269274

275+
@property
276+
def nchunks(self):
277+
"""Total number of chunks."""
278+
return reduce(operator.mul, self.cdata_shape)
279+
280+
@property
281+
def nchunks_initialized(self):
282+
"""The number of chunks that have been initialized with some data."""
283+
return sum(1 for k in listdir(self._chunk_store, self._path)
284+
if k not in [array_meta_key, attrs_key])
285+
286+
# backwards compability
287+
initialized = nchunks_initialized
288+
270289
@property
271290
def is_view(self):
272291
"""A boolean, True if this array is a view on another array."""
@@ -366,6 +385,10 @@ def __getitem__(self, item):
366385
367386
""" # flake8: noqa
368387

388+
# refresh metadata
389+
if not self._cache_metadata:
390+
self._load_metadata()
391+
369392
# normalize selection
370393
selection = normalize_array_selection(item, self._shape)
371394

@@ -482,7 +505,11 @@ def __setitem__(self, key, value):
482505

483506
# guard conditions
484507
if self._read_only:
485-
raise PermissionError('array is read-only')
508+
err_read_only()
509+
510+
# refresh metadata
511+
if not self._cache_metadata:
512+
self._load_metadata()
486513

487514
# normalize selection
488515
selection = normalize_array_selection(key, self._shape)
@@ -717,6 +744,10 @@ def _encode_chunk(self, chunk):
717744

718745
def __repr__(self):
719746

747+
# refresh metadata
748+
if not self._cache_metadata:
749+
self._load_metadata()
750+
720751
# main line
721752
r = '%s(' % type(self).__name__
722753
if self.name:
@@ -733,8 +764,8 @@ def __repr__(self):
733764
r += '; nbytes_stored: %s' % human_readable_size(
734765
self.nbytes_stored)
735766
r += '; ratio: %.1f' % (self.nbytes / self.nbytes_stored)
736-
n_chunks = reduce(operator.mul, self.cdata_shape)
737-
r += '; initialized: %s/%s' % (self.initialized, n_chunks)
767+
r += '; initialized: %s/%s' % (self.nchunks_initialized,
768+
self.nchunks)
738769

739770
# filters
740771
if self.filters:
@@ -768,15 +799,28 @@ def _write_op(self, f, *args, **kwargs):
768799

769800
# guard condition
770801
if self._read_only:
771-
raise PermissionError('array is read-only')
802+
err_read_only()
772803

773804
# synchronization
774805
if self._synchronizer is None:
806+
807+
# refresh metadata
808+
if not self._cache_metadata:
809+
self._load_metadata()
810+
775811
return f(*args, **kwargs)
812+
776813
else:
814+
777815
# synchronize on the array
778816
mkey = self._key_prefix + array_meta_key
817+
779818
with self._synchronizer[mkey]:
819+
820+
# refresh metadata
821+
if not self._cache_metadata:
822+
self._load_metadata()
823+
780824
return f(*args, **kwargs)
781825

782826
def resize(self, *args):
@@ -1022,7 +1066,7 @@ def view(self, shape=None, chunks=None, dtype=None,
10221066
... v.resize(20000)
10231067
... except PermissionError as e:
10241068
... print(e)
1025-
operation not permitted for views
1069+
not permitted for views
10261070
10271071
""" # flake8: noqa
10281072

@@ -1034,7 +1078,8 @@ def view(self, shape=None, chunks=None, dtype=None,
10341078
if synchronizer is None:
10351079
synchronizer = self._synchronizer
10361080
a = Array(store=store, path=path, chunk_store=chunk_store,
1037-
read_only=read_only, synchronizer=synchronizer)
1081+
read_only=read_only, synchronizer=synchronizer,
1082+
cache_metadata=True)
10381083
a._is_view = True
10391084

10401085
# allow override of some properties

0 commit comments

Comments
 (0)