Skip to content

Commit 76dbb54

Browse files
committed
reimplement dictstore
1 parent 06c263f commit 76dbb54

File tree

4 files changed

+180
-20
lines changed

4 files changed

+180
-20
lines changed

zarr/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from zarr.creation import create, array, empty, zeros, ones, full, open, \
99
empty_like, zeros_like, ones_like, full_like, open_like
1010
from zarr.storage import init_store, init_array, init_group, contains_array, \
11-
contains_group, MemoryStore, DirectoryStore, ZipStore
11+
contains_group, DictStore, DirectoryStore, ZipStore
1212
from zarr.core import Array
1313
from zarr.hierarchy import Group, group, open_group
1414
from zarr.sync import ThreadSynchronizer, ProcessSynchronizer, \

zarr/core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from zarr.util import is_total_slice, normalize_array_selection, \
1313
get_chunk_range, human_readable_size, normalize_resize_args
1414
from zarr.storage import normalize_prefix, normalize_key, array_meta_key, \
15-
array_attrs_key, ls
15+
array_attrs_key, listdir
1616
from zarr.meta import decode_metadata, encode_metadata
1717
from zarr.attrs import Attributes
1818
from zarr.compat import itervalues
@@ -214,7 +214,7 @@ def nbytes_stored(self):
214214
def initialized(self):
215215
"""The number of chunks that have been initialized with some data."""
216216
n = 0
217-
for child in ls(self._store, self._prefix):
217+
for child in listdir(self._store, self._prefix):
218218
key = self._prefix + child
219219
if key in self._store:
220220
n += 1

zarr/hierarchy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from zarr.attrs import Attributes
77
from zarr.core import Array
88
from zarr.storage import contains_array, contains_group, init_group, \
9-
DirectoryStore, normalize_prefix, ls, group_attrs_key
9+
DictStore, DirectoryStore, normalize_prefix, listdir, group_attrs_key
1010
from zarr.creation import array, create
1111

1212

zarr/storage.py

Lines changed: 176 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ def normalize_key(key):
3939
previous_char = None
4040
normed = ''
4141
for char in key:
42-
if previous_char != '/':
42+
if char != '/':
43+
normed += char
44+
elif previous_char != '/':
4345
normed += char
4446
previous_char = char
4547

@@ -108,15 +110,21 @@ def contains_group(store, prefix=None):
108110
return group_attrs_key(prefix) in store
109111

110112

113+
def _rmdir_from_keys(store, prefix=None):
114+
for key in set(store.keys()):
115+
if key.startswith(prefix):
116+
del store[key]
117+
118+
111119
def rmdir(store, prefix=None):
112120
"""TODO"""
113121
prefix = normalize_prefix(prefix)
114122
if hasattr(store, 'rmdir'):
123+
# pass through
115124
store.rmdir(prefix)
116125
else:
117-
for key in set(store.keys()):
118-
if key.startswith(prefix):
119-
del store[key]
126+
# slow version, delete one key at a time
127+
_rmdir_from_keys(store, prefix)
120128

121129

122130
def _listdir_from_keys(store, prefix=None):
@@ -133,8 +141,10 @@ def listdir(store, prefix=None):
133141
"""TODO"""
134142
prefix = normalize_prefix(prefix)
135143
if hasattr(store, 'listdir'):
144+
# pass through
136145
return store.listdir(prefix)
137146
else:
147+
# slow version, iterate through all keys
138148
return _listdir_from_keys(store, prefix)
139149

140150

@@ -287,10 +297,153 @@ def ensure_bytes(s):
287297
return io.BytesIO(s).getvalue()
288298

289299

300+
def _dict_store_keys(d, prefix='', cls=dict):
301+
for k in d.keys():
302+
v = d[k]
303+
if isinstance(v, cls):
304+
for sk in _dict_store_keys(v, prefix + k + '/', cls):
305+
yield sk
306+
else:
307+
yield prefix + k
308+
309+
290310
class DictStore(MutableMapping):
291-
"""TODO"""
292-
# TODO
293-
pass
311+
"""Extended mutable mapping interface to a hierarchy of dicts.
312+
313+
Examples
314+
--------
315+
>>> import zarr
316+
>>> store = zarr.DictStore('example')
317+
>>> store['foo'] = b'bar'
318+
>>> store['foo']
319+
b'bar'
320+
>>> store['a/b/c'] = b'xxx'
321+
>>> store['a/b/c']
322+
b'xxx'
323+
>>> sorted(store.keys())
324+
['foo', 'a/b/c']
325+
>>> store.listdir()
326+
['a', 'foo']
327+
>>> store.listdir('a/b')
328+
['c']
329+
>>> store.rmdir('a')
330+
>>> sorted(store.keys())
331+
['foo']
332+
333+
""" # flake8: noqa
334+
335+
def __init__(self, cls=dict):
336+
self.root = cls()
337+
self.cls = cls
338+
339+
def __getitem__(self, key):
340+
key = normalize_key(key)
341+
c = self.root
342+
for k in key.split('/'):
343+
c = c[k]
344+
if isinstance(c, self.cls):
345+
raise KeyError(key)
346+
return c
347+
348+
def __setitem__(self, key, value):
349+
key = normalize_key(key)
350+
c = self.root
351+
keys = key.split('/')
352+
353+
# ensure intermediate containers
354+
for k in keys[:-1]:
355+
try:
356+
c = c[k]
357+
if not isinstance(c, self.cls):
358+
raise KeyError(key)
359+
except KeyError:
360+
c[k] = self.cls()
361+
c = c[k]
362+
363+
# set final value
364+
c[keys[-1]] = value
365+
366+
def __delitem__(self, key):
367+
key = normalize_key(key)
368+
c = self.root
369+
keys = key.split('/')
370+
371+
# obtain final container
372+
for k in keys[:-1]:
373+
c = c[k]
374+
375+
# delete item
376+
del c[keys[-1]]
377+
378+
def __contains__(self, key):
379+
key = normalize_key(key)
380+
keys = key.split('/')
381+
c = self.root
382+
for k in keys:
383+
try:
384+
c = c[k]
385+
except KeyError:
386+
return False
387+
return not isinstance(c, self.cls)
388+
389+
def __eq__(self, other):
390+
return (
391+
isinstance(other, DictStore) and
392+
self.root == other.root and
393+
self.cls == other.cls
394+
)
395+
396+
def keys(self):
397+
for k in _dict_store_keys(self.root, cls=self.cls):
398+
yield k
399+
400+
def __iter__(self):
401+
return self.keys()
402+
403+
def __len__(self):
404+
return sum(1 for _ in self.keys())
405+
406+
def listdir(self, prefix=None):
407+
prefix = normalize_prefix(prefix)
408+
c = self.root
409+
if prefix:
410+
# remove trailing slash
411+
prefix = prefix[:-1]
412+
# split prefix and find container
413+
for k in prefix.split('/'):
414+
c = c[k]
415+
return sorted(c.keys())
416+
417+
def rmdir(self, prefix=None):
418+
prefix = normalize_prefix(prefix)
419+
c = self.root
420+
if prefix:
421+
# remove trailing slash
422+
prefix = prefix[:-1]
423+
# split prefix and find container
424+
keys = prefix.split('/')
425+
for k in keys[:-1]:
426+
c = c[k]
427+
# remove final key
428+
del c[keys[-1]]
429+
430+
def getsize(self, prefix=None):
431+
prefix = normalize_prefix(prefix)
432+
c = self.root
433+
if prefix:
434+
# remove trailing slash
435+
prefix = prefix[:-1]
436+
# split prefix and find container
437+
for k in prefix.split('/'):
438+
c = c[k]
439+
size = 0
440+
for k, v in c.items():
441+
if not isinstance(v, self.cls):
442+
try:
443+
size += len(v)
444+
except TypeError:
445+
return -1
446+
return size
294447

295448

296449
class DirectoryStore(MutableMapping):
@@ -415,20 +568,27 @@ def __iter__(self):
415568
def __len__(self):
416569
return sum(1 for _ in self.keys())
417570

418-
def listdir(self, prefix):
419-
path = os.path.join(self.path, normalize_prefix(prefix))
571+
def listdir(self, prefix=None):
572+
path = self.path
573+
prefix = normalize_prefix(prefix)
574+
if prefix:
575+
path = os.path.join(path, prefix)
420576
return sorted(os.listdir(path))
421577

422-
def rmdir(self, prefix):
423-
path = os.path.join(self.path, normalize_prefix(prefix))
578+
def rmdir(self, prefix=None):
579+
path = self.path
580+
prefix = normalize_prefix(prefix)
581+
if prefix:
582+
path = os.path.join(path, prefix)
424583
if os.path.isdir(path):
425584
shutil.rmtree(path)
426585

427-
def getsize(self, prefix):
586+
def getsize(self, prefix=None):
587+
prefix = normalize_prefix(prefix)
428588
children = self.listdir(prefix)
429589
size = 0
430590
for child in children:
431-
path = os.path.join(self.path, child)
591+
path = os.path.join(self.path, prefix, child)
432592
if os.path.isfile(path):
433593
size += os.path.getsize(path)
434594
return size
@@ -500,14 +660,14 @@ def __contains__(self, key):
500660
else:
501661
return True
502662

503-
def listdir(self, prefix):
663+
def listdir(self, prefix=None):
504664
prefix = normalize_prefix(prefix)
505665
return _listdir_from_keys(self, prefix)
506666

507-
def rmdir(self, prefix):
667+
def rmdir(self, prefix=None):
508668
raise NotImplementedError
509669

510-
def getsize(self, prefix):
670+
def getsize(self, prefix=None):
511671
prefix = normalize_prefix(prefix)
512672
children = self.listdir(prefix)
513673
size = 0

0 commit comments

Comments
 (0)