Skip to content

Commit a00627e

Browse files
committed
cache_metadata
1 parent b7213a1 commit a00627e

File tree

7 files changed

+103
-28
lines changed

7 files changed

+103
-28
lines changed

docs/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ Release notes
44
* Added ``overwrite`` keyword argument to array and group creation methods
55
on the :class:`zarr.hierarchy.Group` class
66
(`#71 <https://github.com/alimanfoo/zarr/issues/71>`_).
7+
* Added ``cache_metadata`` keyword argument to array creation methods.
78

89
.. _release_2.0.1:
910

zarr/core.py

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ class Array(object):
3434
for storage of both chunks and metadata.
3535
synchronizer : object, optional
3636
Array synchronizer.
37+
cache_metadata : bool, optional
38+
If True, array configuration metadata will be cached for the
39+
lifetime of the object. If False, array metadata will be reloaded
40+
prior to all data access and modification operations (may incur
41+
overhead depending on storage and data access pattern).
3742
3843
Attributes
3944
----------
@@ -71,7 +76,7 @@ class Array(object):
7176
""" # flake8: noqa
7277

7378
def __init__(self, store, path=None, read_only=False, chunk_store=None,
74-
synchronizer=None):
79+
synchronizer=None, cache_metadata=True):
7580
# N.B., expect at this point store is fully initialized with all
7681
# configuration metadata fully specified and normalized
7782

@@ -87,11 +92,21 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
8792
else:
8893
self._chunk_store = chunk_store
8994
self._synchronizer = synchronizer
95+
self._cache_metadata = cache_metadata
96+
self._is_view = False
9097

9198
# initialize metadata
99+
self._load_metadata()
100+
101+
# initialize attributes
102+
akey = self._key_prefix + attrs_key
103+
self._attrs = Attributes(store, key=akey, read_only=read_only,
104+
synchronizer=synchronizer)
105+
106+
def _load_metadata(self):
92107
try:
93108
mkey = self._key_prefix + array_meta_key
94-
meta_bytes = store[mkey]
109+
meta_bytes = self._store[mkey]
95110
except KeyError:
96111
raise ValueError('store has no metadata')
97112
else:
@@ -104,7 +119,6 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
104119
self._dtype = meta['dtype']
105120
self._fill_value = meta['fill_value']
106121
self._order = meta['order']
107-
self._is_view = False
108122

109123
# setup compressor
110124
config = meta['compressor']
@@ -119,14 +133,10 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
119133
filters = [get_codec(f) for f in filters]
120134
self._filters = filters
121135

122-
# initialize attributes
123-
akey = self._key_prefix + attrs_key
124-
self._attrs = Attributes(store, key=akey, read_only=read_only,
125-
synchronizer=synchronizer)
126-
127136
def _flush_metadata(self):
128137
if self._is_view:
129138
raise PermissionError('operation not permitted for views')
139+
130140
if self._compressor:
131141
compressor_config = self._compressor.get_config()
132142
else:
@@ -366,6 +376,10 @@ def __getitem__(self, item):
366376
367377
""" # flake8: noqa
368378

379+
# refresh metadata
380+
if not self._cache_metadata:
381+
self._load_metadata()
382+
369383
# normalize selection
370384
selection = normalize_array_selection(item, self._shape)
371385

@@ -484,6 +498,10 @@ def __setitem__(self, key, value):
484498
if self._read_only:
485499
raise PermissionError('array is read-only')
486500

501+
# refresh metadata
502+
if not self._cache_metadata:
503+
self._load_metadata()
504+
487505
# normalize selection
488506
selection = normalize_array_selection(key, self._shape)
489507

@@ -717,6 +735,10 @@ def _encode_chunk(self, chunk):
717735

718736
def __repr__(self):
719737

738+
# refresh metadata
739+
if not self._cache_metadata:
740+
self._load_metadata()
741+
720742
# main line
721743
r = '%s(' % type(self).__name__
722744
if self.name:
@@ -772,11 +794,24 @@ def _write_op(self, f, *args, **kwargs):
772794

773795
# synchronization
774796
if self._synchronizer is None:
797+
798+
# refresh metadata
799+
if not self._cache_metadata:
800+
self._load_metadata()
801+
775802
return f(*args, **kwargs)
803+
776804
else:
805+
777806
# synchronize on the array
778807
mkey = self._key_prefix + array_meta_key
808+
779809
with self._synchronizer[mkey]:
810+
811+
# refresh metadata
812+
if not self._cache_metadata:
813+
self._load_metadata()
814+
780815
return f(*args, **kwargs)
781816

782817
def resize(self, *args):
@@ -1034,7 +1069,8 @@ def view(self, shape=None, chunks=None, dtype=None,
10341069
if synchronizer is None:
10351070
synchronizer = self._synchronizer
10361071
a = Array(store=store, path=path, chunk_store=chunk_store,
1037-
read_only=read_only, synchronizer=synchronizer)
1072+
read_only=read_only, synchronizer=synchronizer,
1073+
cache_metadata=True)
10381074
a._is_view = True
10391075

10401076
# allow override of some properties

zarr/creation.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
def create(shape, chunks=None, dtype=None, compressor='default',
1616
fill_value=None, order='C', store=None, synchronizer=None,
1717
overwrite=False, path=None, chunk_store=None, filters=None,
18-
**kwargs):
18+
cache_metadata=True, **kwargs):
1919
"""Create an array.
2020
2121
Parameters
@@ -47,6 +47,11 @@ def create(shape, chunks=None, dtype=None, compressor='default',
4747
for storage of both chunks and metadata.
4848
filters : sequence of Codecs, optional
4949
Sequence of filters to use to encode chunk data prior to compression.
50+
cache_metadata : bool, optional
51+
If True, array configuration metadata will be cached for the
52+
lifetime of the object. If False, array metadata will be reloaded
53+
prior to all data access and modification operations (may incur
54+
overhead depending on storage and data access pattern).
5055
5156
Returns
5257
-------
@@ -82,7 +87,7 @@ def create(shape, chunks=None, dtype=None, compressor='default',
8287

8388
# instantiate array
8489
z = Array(store, path=path, chunk_store=chunk_store,
85-
synchronizer=synchronizer)
90+
synchronizer=synchronizer, cache_metadata=cache_metadata)
8691

8792
return z
8893

@@ -277,7 +282,7 @@ def array(data, **kwargs):
277282

278283
def open_array(path, mode='a', shape=None, chunks=None, dtype=None,
279284
compressor='default', fill_value=None, order='C',
280-
synchronizer=None, filters=None, **kwargs):
285+
synchronizer=None, filters=None, cache_metadata=True, **kwargs):
281286
"""Convenience function to instantiate an array stored in a
282287
directory on the file system.
283288
@@ -306,6 +311,11 @@ def open_array(path, mode='a', shape=None, chunks=None, dtype=None,
306311
Array synchronizer.
307312
filters : sequence, optional
308313
Sequence of filters to use to encode chunk data prior to compression.
314+
cache_metadata : bool, optional
315+
If True, array configuration metadata will be cached for the
316+
lifetime of the object. If False, array metadata will be reloaded
317+
prior to all data access and modification operations (may incur
318+
overhead depending on storage and data access pattern).
309319
310320
Returns
311321
-------
@@ -388,7 +398,8 @@ def open_array(path, mode='a', shape=None, chunks=None, dtype=None,
388398
read_only = mode == 'r'
389399

390400
# instantiate array
391-
z = Array(store, read_only=read_only, synchronizer=synchronizer)
401+
z = Array(store, read_only=read_only, synchronizer=synchronizer,
402+
cache_metadata=cache_metadata)
392403

393404
return z
394405

zarr/hierarchy.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ def require_groups(self, *names):
496496
def create_dataset(self, name, data=None, shape=None, chunks=None,
497497
dtype=None, compressor='default', fill_value=None,
498498
order='C', synchronizer=None, filters=None,
499-
overwrite=False, **kwargs):
499+
overwrite=False, cache_metadata=True, **kwargs):
500500
"""Create an array.
501501
502502
Parameters
@@ -525,6 +525,11 @@ def create_dataset(self, name, data=None, shape=None, chunks=None,
525525
compression.
526526
overwrite : bool, optional
527527
If True, replace any existing array or group with the given name.
528+
cache_metadata : bool, optional
529+
If True, array configuration metadata will be cached for the
530+
lifetime of the object. If False, array metadata will be reloaded
531+
prior to all data access and modification operations (may incur
532+
overhead depending on storage and data access pattern).
528533
529534
Returns
530535
-------
@@ -548,12 +553,14 @@ def create_dataset(self, name, data=None, shape=None, chunks=None,
548553
shape=shape, chunks=chunks, dtype=dtype,
549554
compressor=compressor, fill_value=fill_value,
550555
order=order, synchronizer=synchronizer,
551-
filters=filters, overwrite=overwrite, **kwargs)
556+
filters=filters, overwrite=overwrite,
557+
cache_metadata=cache_metadata, **kwargs)
552558

553559
def _create_dataset_nosync(self, name, data=None, shape=None, chunks=None,
554560
dtype=None, compressor='default',
555561
fill_value=None, order='C', synchronizer=None,
556-
filters=None, overwrite=False, **kwargs):
562+
filters=None, overwrite=False,
563+
cache_metadata=True, **kwargs):
557564

558565
path = self._item_path(name)
559566

@@ -568,15 +575,17 @@ def _create_dataset_nosync(self, name, data=None, shape=None, chunks=None,
568575
order=order, synchronizer=synchronizer,
569576
store=self._store, path=path,
570577
chunk_store=self._chunk_store, filters=filters,
571-
overwrite=overwrite, **kwargs)
578+
overwrite=overwrite, cache_metadata=cache_metadata,
579+
**kwargs)
572580

573581
else:
574582
a = create(shape=shape, chunks=chunks, dtype=dtype,
575583
compressor=compressor, fill_value=fill_value,
576584
order=order, synchronizer=synchronizer,
577585
store=self._store, path=path,
578586
chunk_store=self._chunk_store, filters=filters,
579-
overwrite=overwrite, **kwargs)
587+
overwrite=overwrite, cache_metadata=cache_metadata,
588+
**kwargs)
580589

581590
return a
582591

@@ -608,8 +617,10 @@ def _require_dataset_nosync(self, name, shape, dtype=None, exact=False,
608617

609618
if contains_array(self._store, path):
610619
synchronizer = kwargs.get('synchronizer', self._synchronizer)
620+
cache_metadata = kwargs.get('cache_metadata', True)
611621
a = Array(self._store, path=path, read_only=self._read_only,
612-
chunk_store=self._chunk_store, synchronizer=synchronizer)
622+
chunk_store=self._chunk_store,
623+
synchronizer=synchronizer, cache_metadata=cache_metadata)
613624
shape = normalize_shape(shape)
614625
if shape != a.shape:
615626
raise TypeError('shapes do not match')

zarr/storage.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def getsize(store, path=None):
123123

124124

125125
def _require_parent_group(path, store, chunk_store, overwrite):
126-
path = normalize_storage_path(path)
126+
# assume path is normalized
127127
if path:
128128
segments = path.split('/')
129129
for i in range(len(segments)):
@@ -249,6 +249,17 @@ def init_array(store, shape, chunks, dtype=None, compressor='default',
249249
_require_parent_group(path, store=store, chunk_store=chunk_store,
250250
overwrite=overwrite)
251251

252+
_init_array_metadata(store, shape=shape, chunks=chunks, dtype=dtype,
253+
compressor=compressor, fill_value=fill_value,
254+
order=order, overwrite=overwrite, path=path,
255+
chunk_store=chunk_store, filters=filters)
256+
257+
258+
def _init_array_metadata(store, shape, chunks, dtype=None,
259+
compressor='default',
260+
fill_value=None, order='C', overwrite=False,
261+
path=None, chunk_store=None, filters=None):
262+
252263
# guard conditions
253264
if overwrite:
254265
# attempt to delete any pre-existing items in store

zarr/tests/test_core.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import shutil
77
import pickle
88
from collections import MutableMapping
9-
import os
109

1110

1211
import numpy as np
@@ -711,16 +710,13 @@ def test_repr(self):
711710

712711

713712
# custom store, does not support getsize()
714-
class CustomMapping(MutableMapping):
713+
class CustomMapping(object):
715714

716715
def __init__(self):
717716
self.inner = dict()
718717

719-
def __iter__(self):
720-
return iter(self.inner)
721-
722-
def __len__(self):
723-
return len(self.inner)
718+
def keys(self):
719+
return self.inner.keys()
724720

725721
def __getitem__(self, item):
726722
return self.inner[item]
@@ -764,3 +760,11 @@ def test_repr(self):
764760
eq(l1, l2)
765761

766762

763+
class TestArrayNoCacheMetadata(TestArray):
764+
765+
@staticmethod
766+
def create_array(read_only=False, **kwargs):
767+
store = dict()
768+
kwargs.setdefault('compressor', Zlib(level=1))
769+
init_array(store, **kwargs)
770+
return Array(store, read_only=read_only, cache_metadata=False)

zarr/tests/test_sync.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ def create_array(self, store=None, path=None, read_only=False,
7878
atexit.register(shutil.rmtree, sync_path)
7979
synchronizer = ProcessSynchronizer(sync_path)
8080
return Array(store, path=path, synchronizer=synchronizer,
81-
read_only=read_only, chunk_store=chunk_store)
81+
read_only=read_only, chunk_store=chunk_store,
82+
cache_metadata=False)
8283

8384
def test_repr(self):
8485
if not PY2:

0 commit comments

Comments
 (0)