Skip to content

Commit 5af7ace

Browse files
authored
Merge pull request #220 from alimanfoo/cache-attrs-20171224b
Cache attributes
2 parents c4e2e96 + 45c0fd1 commit 5af7ace

File tree

10 files changed

+384
-77
lines changed

10 files changed

+384
-77
lines changed

docs/api.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ API reference
88
api/core
99
api/hierarchy
1010
api/storage
11+
api/convenience
1112
api/codecs
13+
api/attrs
1214
api/sync
13-
api/convenience

docs/api/attrs.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
The Attributes class (``zarr.attrs``)
2+
=====================================
3+
.. module:: zarr.attrs
4+
5+
.. autoclass:: Attributes
6+
7+
.. automethod:: __getitem__
8+
.. automethod:: __setitem__
9+
.. automethod:: __delitem__
10+
.. automethod:: __iter__
11+
.. automethod:: __len__
12+
.. automethod:: keys
13+
.. automethod:: asdict
14+
.. automethod:: put
15+
.. automethod:: update
16+
.. automethod:: refresh

docs/release.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,10 @@ Enhancements
127127
* **Added support for ``datetime64`` and ``timedelta64`` data types**;
128128
:issue:`85`, :issue:`215`.
129129

130+
* **Array and group attributes are now cached by default** to improve performance with
131+
slow stores, e.g., stores accessing data via the network; :issue:`220`, :issue:`218`,
132+
:issue:`204`.
133+
130134
* **New LRUStoreCache class**. The class :class:`zarr.storage.LRUStoreCache` has been
131135
added and provides a means to locally cache data in memory from a store that may be
132136
slow, e.g., a store that retrieves data from a remote server via the network;

zarr/attrs.py

Lines changed: 65 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,24 +9,63 @@
99

1010

1111
class Attributes(MutableMapping):
12-
13-
def __init__(self, store, key='.zattrs', read_only=False,
12+
"""Class providing access to user attributes on an array or group. Should not be
13+
instantiated directly, will be available via the `.attrs` property of an array or
14+
group.
15+
16+
Parameters
17+
----------
18+
store : MutableMapping
19+
The store in which to store the attributes.
20+
key : str, optional
21+
The key under which the attributes will be stored.
22+
read_only : bool, optional
23+
If True, attributes cannot be modified.
24+
cache : bool, optional
25+
If True (default), attributes will be cached locally.
26+
synchronizer : Synchronizer
27+
Only necessary if attributes may be modified from multiple threads or processes.
28+
29+
"""
30+
31+
def __init__(self, store, key='.zattrs', read_only=False, cache=True,
1432
synchronizer=None):
1533
self.store = store
1634
self.key = key
1735
self.read_only = read_only
36+
self.cache = cache
37+
self._cached_asdict = None
1838
self.synchronizer = synchronizer
1939

40+
def _get_nosync(self):
41+
try:
42+
data = self.store[self.key]
43+
except KeyError:
44+
d = dict()
45+
else:
46+
d = json.loads(text_type(data, 'ascii'))
47+
return d
48+
49+
def asdict(self):
50+
"""Retrieve all attributes as a dictionary."""
51+
if self.cache and self._cached_asdict is not None:
52+
return self._cached_asdict
53+
d = self._get_nosync()
54+
if self.cache:
55+
self._cached_asdict = d
56+
return d
57+
58+
def refresh(self):
59+
"""Refresh cached attributes from the store."""
60+
if self.cache:
61+
self._cached_asdict = self._get_nosync()
62+
2063
def __contains__(self, x):
2164
return x in self.asdict()
2265

2366
def __getitem__(self, item):
2467
return self.asdict()[item]
2568

26-
def _put(self, d):
27-
s = json.dumps(d, indent=4, sort_keys=True, ensure_ascii=True, separators=(',', ': '))
28-
self.store[self.key] = s.encode('ascii')
29-
3069
def _write_op(self, f, *args, **kwargs):
3170

3271
# guard condition
@@ -46,48 +85,56 @@ def __setitem__(self, item, value):
4685
def _setitem_nosync(self, item, value):
4786

4887
# load existing data
49-
d = self.asdict()
88+
d = self._get_nosync()
5089

5190
# set key value
5291
d[item] = value
5392

5493
# _put modified data
55-
self._put(d)
94+
self._put_nosync(d)
5695

5796
def __delitem__(self, item):
5897
self._write_op(self._delitem_nosync, item)
5998

6099
def _delitem_nosync(self, key):
61100

62101
# load existing data
63-
d = self.asdict()
102+
d = self._get_nosync()
64103

65104
# delete key value
66105
del d[key]
67106

68107
# _put modified data
69-
self._put(d)
108+
self._put_nosync(d)
70109

71-
def asdict(self):
72-
if self.key in self.store:
73-
return json.loads(text_type(self.store[self.key], 'ascii'))
74-
else:
75-
return dict()
110+
def put(self, d):
111+
"""Overwrite all attributes with the key/value pairs in the provided dictionary
112+
`d` in a single operation."""
113+
self._write_op(self._put_nosync, d)
114+
115+
def _put_nosync(self, d):
116+
s = json.dumps(d, indent=4, sort_keys=True, ensure_ascii=True, separators=(',', ': '))
117+
self.store[self.key] = s.encode('ascii')
118+
if self.cache:
119+
self._cached_asdict = d
76120

77121
def update(self, *args, **kwargs):
78-
# override to provide update in a single write
122+
"""Update the values of several attributes in a single operation."""
79123
self._write_op(self._update_nosync, *args, **kwargs)
80124

81125
def _update_nosync(self, *args, **kwargs):
82126

83127
# load existing data
84-
d = self.asdict()
128+
d = self._get_nosync()
85129

86130
# update
87131
d.update(*args, **kwargs)
88132

89133
# _put modified data
90-
self._put(d)
134+
self._put_nosync(d)
135+
136+
def keys(self):
137+
return self.asdict().keys()
91138

92139
def __iter__(self):
93140
return iter(self.asdict())

zarr/core.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,14 @@ class Array(object):
4343
synchronizer : object, optional
4444
Array synchronizer.
4545
cache_metadata : bool, optional
46-
If True, array configuration metadata will be cached for the
46+
If True (default), array configuration metadata will be cached for the
4747
lifetime of the object. If False, array metadata will be reloaded
4848
prior to all data access and modification operations (may incur
4949
overhead depending on storage and data access pattern).
50+
cache_attrs : bool, optional
51+
If True (default), user attributes will be cached for attribute read
52+
operations. If False, user attributes are reloaded from the store prior
53+
to all attribute read operations.
5054
5155
Attributes
5256
----------
@@ -99,7 +103,7 @@ class Array(object):
99103
"""
100104

101105
def __init__(self, store, path=None, read_only=False, chunk_store=None,
102-
synchronizer=None, cache_metadata=True):
106+
synchronizer=None, cache_metadata=True, cache_attrs=True):
103107
# N.B., expect at this point store is fully initialized with all
104108
# configuration metadata fully specified and normalized
105109

@@ -121,7 +125,7 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
121125
# initialize attributes
122126
akey = self._key_prefix + attrs_key
123127
self._attrs = Attributes(store, key=akey, read_only=read_only,
124-
synchronizer=synchronizer)
128+
synchronizer=synchronizer, cache=cache_attrs)
125129

126130
# initialize info reporter
127131
self._info_reporter = InfoReporter(self)
@@ -1920,7 +1924,7 @@ def hexdigest(self, hashname="sha1"):
19201924

19211925
def __getstate__(self):
19221926
return (self._store, self._path, self._read_only, self._chunk_store,
1923-
self._synchronizer, self._cache_metadata)
1927+
self._synchronizer, self._cache_metadata, self._attrs.cache)
19241928

19251929
def __setstate__(self, state):
19261930
self.__init__(*state)

zarr/creation.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
def create(shape, chunks=True, dtype=None, compressor='default',
1717
fill_value=0, order='C', store=None, synchronizer=None,
1818
overwrite=False, path=None, chunk_store=None, filters=None,
19-
cache_metadata=True, read_only=False, object_codec=None,
20-
**kwargs):
19+
cache_metadata=True, cache_attrs=True, read_only=False,
20+
object_codec=None, **kwargs):
2121
"""Create an array.
2222
2323
Parameters
@@ -54,6 +54,10 @@ def create(shape, chunks=True, dtype=None, compressor='default',
5454
lifetime of the object. If False, array metadata will be reloaded
5555
prior to all data access and modification operations (may incur
5656
overhead depending on storage and data access pattern).
57+
cache_attrs : bool, optional
58+
If True (default), user attributes will be cached for attribute read
59+
operations. If False, user attributes are reloaded from the store prior
60+
to all attribute read operations.
5761
read_only : bool, optional
5862
True if array should be protected against modification.
5963
object_codec : Codec, optional
@@ -115,7 +119,7 @@ def create(shape, chunks=True, dtype=None, compressor='default',
115119

116120
# instantiate array
117121
z = Array(store, path=path, chunk_store=chunk_store, synchronizer=synchronizer,
118-
cache_metadata=cache_metadata, read_only=read_only)
122+
cache_metadata=cache_metadata, cache_attrs=cache_attrs, read_only=read_only)
119123

120124
return z
121125

@@ -342,8 +346,9 @@ def array(data, **kwargs):
342346

343347

344348
def open_array(store, mode='a', shape=None, chunks=True, dtype=None, compressor='default',
345-
fill_value=0, order='C', synchronizer=None, filters=None, cache_metadata=True,
346-
path=None, object_codec=None, **kwargs):
349+
fill_value=0, order='C', synchronizer=None, filters=None,
350+
cache_metadata=True, cache_attrs=True, path=None, object_codec=None,
351+
**kwargs):
347352
"""Open an array using file-mode-like semantics.
348353
349354
Parameters
@@ -377,6 +382,10 @@ def open_array(store, mode='a', shape=None, chunks=True, dtype=None, compressor=
377382
lifetime of the object. If False, array metadata will be reloaded
378383
prior to all data access and modification operations (may incur
379384
overhead depending on storage and data access pattern).
385+
cache_attrs : bool, optional
386+
If True (default), user attributes will be cached for attribute read
387+
operations. If False, user attributes are reloaded from the store prior
388+
to all attribute read operations.
380389
path : string, optional
381390
Array path within store.
382391
object_codec : Codec, optional
@@ -465,7 +474,7 @@ def open_array(store, mode='a', shape=None, chunks=True, dtype=None, compressor=
465474

466475
# instantiate array
467476
z = Array(store, read_only=read_only, synchronizer=synchronizer,
468-
cache_metadata=cache_metadata, path=path)
477+
cache_metadata=cache_metadata, cache_attrs=cache_attrs, path=path)
469478

470479
return z
471480

0 commit comments

Comments
 (0)