Skip to content

Commit 3fa6c3a

Browse files
committed
cache attributes
1 parent 89f4107 commit 3fa6c3a

File tree

5 files changed

+157
-26
lines changed

5 files changed

+157
-26
lines changed

zarr/attrs.py

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,23 +10,42 @@
1010

1111
class Attributes(MutableMapping):
1212

13-
def __init__(self, store, key='.zattrs', read_only=False,
13+
def __init__(self, store, key='.zattrs', read_only=False, cache=True,
1414
synchronizer=None):
1515
self.store = store
1616
self.key = key
1717
self.read_only = read_only
18+
self.cache = cache
19+
self._cached_asdict = None
1820
self.synchronizer = synchronizer
1921

22+
def _get(self):
23+
if self.key in self.store:
24+
d = json.loads(text_type(self.store[self.key], 'ascii'))
25+
else:
26+
d = dict()
27+
return d
28+
29+
def _put(self, d):
30+
s = json.dumps(d, indent=4, sort_keys=True, ensure_ascii=True, separators=(',', ': '))
31+
self.store[self.key] = s.encode('ascii')
32+
if self.cache:
33+
self._cached_asdict = d
34+
35+
def asdict(self):
36+
if self.cache and self._cached_asdict is not None:
37+
return self._cached_asdict
38+
d = self._get()
39+
if self.cache:
40+
self._cached_asdict = d
41+
return d
42+
2043
def __contains__(self, x):
2144
return x in self.asdict()
2245

2346
def __getitem__(self, item):
2447
return self.asdict()[item]
2548

26-
def _put(self, d):
27-
s = json.dumps(d, indent=4, sort_keys=True, ensure_ascii=True, separators=(',', ': '))
28-
self.store[self.key] = s.encode('ascii')
29-
3049
def _write_op(self, f, *args, **kwargs):
3150

3251
# guard condition
@@ -46,7 +65,7 @@ def __setitem__(self, item, value):
4665
def _setitem_nosync(self, item, value):
4766

4867
# load existing data
49-
d = self.asdict()
68+
d = self._get()
5069

5170
# set key value
5271
d[item] = value
@@ -60,28 +79,22 @@ def __delitem__(self, item):
6079
def _delitem_nosync(self, key):
6180

6281
# load existing data
63-
d = self.asdict()
82+
d = self._get()
6483

6584
# delete key value
6685
del d[key]
6786

6887
# _put modified data
6988
self._put(d)
7089

71-
def asdict(self):
72-
if self.key in self.store:
73-
return json.loads(text_type(self.store[self.key], 'ascii'))
74-
else:
75-
return dict()
76-
7790
def update(self, *args, **kwargs):
7891
# override to provide update in a single write
7992
self._write_op(self._update_nosync, *args, **kwargs)
8093

8194
def _update_nosync(self, *args, **kwargs):
8295

8396
# load existing data
84-
d = self.asdict()
97+
d = self._get()
8598

8699
# update
87100
d.update(*args, **kwargs)

zarr/core.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ class Array(object):
4747
lifetime of the object. If False, array metadata will be reloaded
4848
prior to all data access and modification operations (may incur
4949
overhead depending on storage and data access pattern).
50+
cache_attrs : bool, optional
51+
If True (default), user attributes will be cached for attribute read
52+
operations. If False, user attributes are reloaded from the store prior
53+
to all attribute read operations.
5054
5155
Attributes
5256
----------
@@ -99,7 +103,7 @@ class Array(object):
99103
"""
100104

101105
def __init__(self, store, path=None, read_only=False, chunk_store=None,
102-
synchronizer=None, cache_metadata=True):
106+
synchronizer=None, cache_metadata=True, cache_attrs=True):
103107
# N.B., expect at this point store is fully initialized with all
104108
# configuration metadata fully specified and normalized
105109

@@ -121,7 +125,7 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
121125
# initialize attributes
122126
akey = self._key_prefix + attrs_key
123127
self._attrs = Attributes(store, key=akey, read_only=read_only,
124-
synchronizer=synchronizer)
128+
synchronizer=synchronizer, cache=cache_attrs)
125129

126130
# initialize info reporter
127131
self._info_reporter = InfoReporter(self)

zarr/tests/test_attrs.py

Lines changed: 100 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from __future__ import absolute_import, print_function, division
33
import json
44
import unittest
5+
import collections
56

67

78
from nose.tools import eq_ as eq, assert_raises
@@ -12,10 +13,38 @@
1213
from zarr.errors import PermissionError
1314

1415

16+
class CountingDict(collections.MutableMapping):
17+
18+
def __init__(self):
19+
self.wrapped = dict()
20+
self.counter = collections.Counter()
21+
22+
def __len__(self):
23+
return len(self.wrapped)
24+
25+
def __iter__(self):
26+
return iter(self.wrapped)
27+
28+
def __contains__(self, item):
29+
return item in self.wrapped
30+
31+
def __getitem__(self, item):
32+
self.counter['__getitem__', item] += 1
33+
return self.wrapped[item]
34+
35+
def __setitem__(self, key, value):
36+
self.counter['__setitem__', key] += 1
37+
self.wrapped[key] = value
38+
39+
def __delitem__(self, key):
40+
self.counter['__delitem__', key] += 1
41+
del self.wrapped[key]
42+
43+
1544
class TestAttributes(unittest.TestCase):
1645

17-
def init_attributes(self, store, read_only=False):
18-
return Attributes(store, key='attrs', read_only=read_only)
46+
def init_attributes(self, store, read_only=False, cache=True):
47+
return Attributes(store, key='attrs', read_only=read_only, cache=cache)
1948

2049
def test_storage(self):
2150

@@ -102,3 +131,72 @@ def test_key_completions(self):
102131
assert '123' in d
103132
assert 'asdf;' in d
104133
assert 'baz' not in d
134+
135+
def test_caching_on(self):
136+
# caching is turned on by default
137+
store = CountingDict()
138+
eq(0, store.counter['__getitem__', 'attrs'])
139+
eq(0, store.counter['__setitem__', 'attrs'])
140+
store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii')
141+
eq(0, store.counter['__getitem__', 'attrs'])
142+
eq(1, store.counter['__setitem__', 'attrs'])
143+
a = self.init_attributes(store)
144+
eq(a['foo'], 'xxx')
145+
eq(1, store.counter['__getitem__', 'attrs'])
146+
eq(a['bar'], 42)
147+
eq(1, store.counter['__getitem__', 'attrs'])
148+
eq(a['foo'], 'xxx')
149+
eq(1, store.counter['__getitem__', 'attrs'])
150+
a['foo'] = 'yyy'
151+
eq(2, store.counter['__getitem__', 'attrs'])
152+
eq(2, store.counter['__setitem__', 'attrs'])
153+
eq(a['foo'], 'yyy')
154+
eq(2, store.counter['__getitem__', 'attrs'])
155+
eq(2, store.counter['__setitem__', 'attrs'])
156+
a.update(foo='zzz', bar=84)
157+
eq(3, store.counter['__getitem__', 'attrs'])
158+
eq(3, store.counter['__setitem__', 'attrs'])
159+
eq(a['foo'], 'zzz')
160+
eq(a['bar'], 84)
161+
eq(3, store.counter['__getitem__', 'attrs'])
162+
eq(3, store.counter['__setitem__', 'attrs'])
163+
assert 'foo' in a
164+
eq(3, store.counter['__getitem__', 'attrs'])
165+
eq(3, store.counter['__setitem__', 'attrs'])
166+
assert 'spam' not in a
167+
eq(3, store.counter['__getitem__', 'attrs'])
168+
eq(3, store.counter['__setitem__', 'attrs'])
169+
170+
def test_caching_off(self):
171+
store = CountingDict()
172+
eq(0, store.counter['__getitem__', 'attrs'])
173+
eq(0, store.counter['__setitem__', 'attrs'])
174+
store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii')
175+
eq(0, store.counter['__getitem__', 'attrs'])
176+
eq(1, store.counter['__setitem__', 'attrs'])
177+
a = self.init_attributes(store, cache=False)
178+
eq(a['foo'], 'xxx')
179+
eq(1, store.counter['__getitem__', 'attrs'])
180+
eq(a['bar'], 42)
181+
eq(2, store.counter['__getitem__', 'attrs'])
182+
eq(a['foo'], 'xxx')
183+
eq(3, store.counter['__getitem__', 'attrs'])
184+
a['foo'] = 'yyy'
185+
eq(4, store.counter['__getitem__', 'attrs'])
186+
eq(2, store.counter['__setitem__', 'attrs'])
187+
eq(a['foo'], 'yyy')
188+
eq(5, store.counter['__getitem__', 'attrs'])
189+
eq(2, store.counter['__setitem__', 'attrs'])
190+
a.update(foo='zzz', bar=84)
191+
eq(6, store.counter['__getitem__', 'attrs'])
192+
eq(3, store.counter['__setitem__', 'attrs'])
193+
eq(a['foo'], 'zzz')
194+
eq(a['bar'], 84)
195+
eq(8, store.counter['__getitem__', 'attrs'])
196+
eq(3, store.counter['__setitem__', 'attrs'])
197+
assert 'foo' in a
198+
eq(9, store.counter['__getitem__', 'attrs'])
199+
eq(3, store.counter['__setitem__', 'attrs'])
200+
assert 'spam' not in a
201+
eq(10, store.counter['__getitem__', 'attrs'])
202+
eq(3, store.counter['__setitem__', 'attrs'])

zarr/tests/test_core.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1565,14 +1565,15 @@ def test_nbytes_stored(self):
15651565
eq(-1, z.nbytes_stored)
15661566

15671567

1568-
class TestArrayNoCacheMetadata(TestArray):
1568+
class TestArrayNoCache(TestArray):
15691569

15701570
@staticmethod
15711571
def create_array(read_only=False, **kwargs):
15721572
store = dict()
15731573
kwargs.setdefault('compressor', Zlib(level=1))
15741574
init_array(store, **kwargs)
1575-
return Array(store, read_only=read_only, cache_metadata=False)
1575+
return Array(store, read_only=read_only, cache_metadata=False,
1576+
cache_attrs=False)
15761577

15771578
def test_cache_metadata(self):
15781579
a1 = self.create_array(shape=100, chunks=10, dtype='i1')
@@ -1582,6 +1583,7 @@ def test_cache_metadata(self):
15821583
eq(a1.nbytes, a2.nbytes)
15831584
eq(a1.nchunks, a2.nchunks)
15841585

1586+
# a1 is not caching so *will* see updates made via other objects
15851587
a2.resize(200)
15861588
eq((200,), a2.shape)
15871589
eq(200, a2.size)
@@ -1602,6 +1604,7 @@ def test_cache_metadata(self):
16021604
eq(a1.nbytes, a2.nbytes)
16031605
eq(a1.nchunks, a2.nchunks)
16041606

1607+
# a2 is caching so *will not* see updates made via other objects
16051608
a1.resize(400)
16061609
eq((400,), a1.shape)
16071610
eq(400, a1.size)
@@ -1612,6 +1615,21 @@ def test_cache_metadata(self):
16121615
eq(300, a2.nbytes)
16131616
eq(30, a2.nchunks)
16141617

1618+
def test_cache_attrs(self):
1619+
a1 = self.create_array(shape=100, chunks=10, dtype='i1')
1620+
a2 = Array(a1.store, cache_attrs=True)
1621+
eq(a1.attrs.asdict(), a2.attrs.asdict())
1622+
1623+
# a1 is not caching so *will* see updates made via other objects
1624+
a2.attrs['foo'] = 'xxx'
1625+
a2.attrs['bar'] = 42
1626+
eq(a1.attrs.asdict(), a2.attrs.asdict())
1627+
1628+
# a2 is caching so *will not* see updates made via other objects
1629+
a1.attrs['foo'] = 'yyy'
1630+
assert 'yyy' == a1.attrs['foo']
1631+
assert 'xxx' == a2.attrs['foo']
1632+
16151633
def test_object_arrays_danger(self):
16161634
# skip this one as it only works if metadata are cached
16171635
pass

zarr/tests/test_sync.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,24 +27,22 @@
2727

2828
class TestAttributesWithThreadSynchronizer(TestAttributes):
2929

30-
def init_attributes(self, store, read_only=False):
30+
def init_attributes(self, store, read_only=False, cache=True):
3131
key = 'attrs'
32-
store[key] = json.dumps(dict()).encode('ascii')
3332
synchronizer = ThreadSynchronizer()
3433
return Attributes(store, synchronizer=synchronizer, key=key,
35-
read_only=read_only)
34+
read_only=read_only, cache=cache)
3635

3736

3837
class TestAttributesProcessSynchronizer(TestAttributes):
3938

40-
def init_attributes(self, store, read_only=False):
39+
def init_attributes(self, store, read_only=False, cache=True):
4140
key = 'attrs'
42-
store[key] = json.dumps(dict()).encode('ascii')
4341
sync_path = mkdtemp()
4442
atexit.register(shutil.rmtree, sync_path)
4543
synchronizer = ProcessSynchronizer(sync_path)
4644
return Attributes(store, synchronizer=synchronizer, key=key,
47-
read_only=read_only)
45+
read_only=read_only, cache=cache)
4846

4947

5048
def _append(arg):

0 commit comments

Comments
 (0)