Skip to content

Commit dfe1fa7

Browse files
committed
add tests for keys caching
1 parent efbec72 commit dfe1fa7

File tree

3 files changed

+87
-6
lines changed

3 files changed

+87
-6
lines changed

zarr/storage.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1696,18 +1696,21 @@ def __init__(self, store, max_size):
16961696
self._max_size = max_size
16971697
self._current_size = 0
16981698
self._keys_cache = None
1699+
self._contains_cache = None
16991700
self._listdir_cache = dict()
17001701
self._values_cache = OrderedDict()
17011702
self._mutex = Lock()
17021703
self.hits = self.misses = 0
17031704

17041705
def __getstate__(self):
17051706
return (self._store, self._max_size, self._current_size, self._keys_cache,
1706-
self._listdir_cache, self._values_cache, self.hits, self.misses)
1707+
self._contains_cache, self._listdir_cache, self._values_cache, self.hits,
1708+
self.misses)
17071709

17081710
def __setstate__(self, state):
17091711
(self._store, self._max_size, self._current_size, self._keys_cache,
1710-
self._listdir_cache, self._values_cache, self.hits, self.misses) = state
1712+
self._contains_cache, self._listdir_cache, self._values_cache, self.hits,
1713+
self.misses) = state
17111714
self._mutex = Lock()
17121715

17131716
def __len__(self):
@@ -1716,10 +1719,19 @@ def __len__(self):
17161719
def __iter__(self):
17171720
return self.keys()
17181721

1722+
def __contains__(self, key):
1723+
with self._mutex:
1724+
if self._contains_cache is None:
1725+
self._contains_cache = set(self._keys())
1726+
return key in self._contains_cache
1727+
17191728
def keys(self):
17201729
with self._mutex:
1721-
if self._keys_cache is None:
1722-
self._keys_cache = list(self._store.keys())
1730+
return self._keys()
1731+
1732+
def _keys(self):
1733+
if self._keys_cache is None:
1734+
self._keys_cache = list(self._store.keys())
17231735
return iter(self._keys_cache)
17241736

17251737
def listdir(self, path=None):
@@ -1768,6 +1780,7 @@ def clear_keys(self):
17681780

17691781
def _clear_keys(self):
17701782
self._keys_cache = None
1783+
self._contains_cache = None
17711784
self._listdir_cache.clear()
17721785

17731786
def _clear_value(self, key):

zarr/tests/test_storage.py

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,12 @@ def test_cache_values_no_max_size(self):
889889
assert 2 == cache.hits
890890
assert 1 == cache.misses
891891

892+
# manually clear all cached values
893+
cache.clear_values()
894+
assert b'zzz' == cache['foo']
895+
assert 2 == store.counter['__getitem__', 'foo']
896+
assert 2 == store.counter['__setitem__', 'foo']
897+
892898
# test __delitem__
893899
del cache['foo']
894900
with pytest.raises(KeyError):
@@ -902,7 +908,6 @@ def test_cache_values_no_max_size(self):
902908
assert 0 == store.counter['__getitem__', 'bar']
903909
assert 1 == store.counter['__setitem__', 'bar']
904910

905-
# TODO test max size
906911
def test_cache_values_with_max_size(self):
907912

908913
# setup store
@@ -999,7 +1004,63 @@ def test_cache_values_with_max_size(self):
9991004
assert 4 == cache.hits
10001005
assert 2 == cache.misses
10011006

1002-
# TODO test key caching
1007+
def test_cache_keys(self):
1008+
1009+
# setup
1010+
store = CountingDict()
1011+
store['foo'] = b'xxx'
1012+
store['bar'] = b'yyy'
1013+
assert 0 == store.counter['__contains__', 'foo']
1014+
assert 0 == store.counter['__iter__']
1015+
assert 0 == store.counter['keys']
1016+
cache = LRUStoreCache(store, max_size=None)
1017+
1018+
# keys should be cached on first call
1019+
keys = sorted(cache.keys())
1020+
assert keys == ['bar', 'foo']
1021+
assert 1 == store.counter['keys']
1022+
# keys should now be cached
1023+
assert keys == sorted(cache.keys())
1024+
assert 1 == store.counter['keys']
1025+
assert 'foo' in cache
1026+
assert 0 == store.counter['__contains__', 'foo']
1027+
assert keys == sorted(cache)
1028+
assert 0 == store.counter['__iter__']
1029+
assert 1 == store.counter['keys']
1030+
1031+
# cache should be cleared if store is modified - crude but simple for now
1032+
cache['baz'] = b'zzz'
1033+
keys = sorted(cache.keys())
1034+
assert keys == ['bar', 'baz', 'foo']
1035+
assert 2 == store.counter['keys']
1036+
# keys should now be cached
1037+
assert keys == sorted(cache.keys())
1038+
assert 2 == store.counter['keys']
1039+
1040+
# manually clear keys
1041+
cache.clear_keys()
1042+
keys = sorted(cache.keys())
1043+
assert keys == ['bar', 'baz', 'foo']
1044+
assert 3 == store.counter['keys']
1045+
assert 0 == store.counter['__contains__', 'foo']
1046+
assert 0 == store.counter['__iter__']
1047+
cache.clear_keys()
1048+
keys = sorted(cache)
1049+
assert keys == ['bar', 'baz', 'foo']
1050+
assert 4 == store.counter['keys']
1051+
assert 0 == store.counter['__contains__', 'foo']
1052+
assert 0 == store.counter['__iter__']
1053+
cache.clear_keys()
1054+
assert 'foo' in cache
1055+
assert 5 == store.counter['keys']
1056+
assert 0 == store.counter['__contains__', 'foo']
1057+
assert 0 == store.counter['__iter__']
1058+
1059+
# check these would get counted if called directly
1060+
assert 'foo' in store
1061+
assert 1 == store.counter['__contains__', 'foo']
1062+
assert keys == sorted(store)
1063+
assert 1 == store.counter['__iter__']
10031064

10041065

10051066
def test_getsize():

zarr/tests/util.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,19 @@ def __init__(self):
1010
self.counter = collections.Counter()
1111

1212
def __len__(self):
13+
self.counter['__len__'] += 1
1314
return len(self.wrapped)
1415

16+
def keys(self):
17+
self.counter['keys'] += 1
18+
return self.wrapped.keys()
19+
1520
def __iter__(self):
21+
self.counter['__iter__'] += 1
1622
return iter(self.wrapped)
1723

1824
def __contains__(self, item):
25+
self.counter['__contains__', item] += 1
1926
return item in self.wrapped
2027

2128
def __getitem__(self, item):

0 commit comments

Comments
 (0)