Skip to content

Commit 4968961

Browse files
committed
WIP rewrite to use MutableMapping and prefix
1 parent ae838a2 commit 4968961

File tree

3 files changed

+201
-223
lines changed

3 files changed

+201
-223
lines changed

zarr/core.py

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from zarr.compressors import get_compressor_cls
1212
from zarr.util import is_total_slice, normalize_array_selection, \
1313
get_chunk_range, human_readable_size, normalize_resize_args
14+
from zarr.storage import normalize_prefix, normalize_key, array_meta_key, \
15+
array_attrs_key, ls
1416
from zarr.meta import decode_metadata, encode_metadata
1517
from zarr.attrs import Attributes
1618
from zarr.compat import itervalues
@@ -73,12 +75,13 @@ def __init__(self, store, name=None, readonly=False):
7375
# configuration metadata fully specified and normalised
7476

7577
self._store = store
76-
self._name = name
78+
self._prefix = normalize_prefix(name)
7779
self._readonly = readonly
7880

7981
# initialise metadata
82+
meta_key = array_meta_key(self._prefix)
8083
try:
81-
meta_bytes = store['meta']
84+
meta_bytes = store[meta_key]
8285
except KeyError:
8386
raise ValueError('store has no metadata')
8487
else:
@@ -95,14 +98,16 @@ def __init__(self, store, name=None, readonly=False):
9598
self._compressor = compressor_cls(self._compression_opts)
9699

97100
# initialise attributes
98-
self._attrs = Attributes(store, readonly=readonly)
101+
attrs_key = array_attrs_key(self._prefix)
102+
self._attrs = Attributes(store, key=attrs_key, readonly=readonly)
99103

100104
def flush_metadata(self):
101105
meta = dict(shape=self._shape, chunks=self._chunks, dtype=self._dtype,
102106
compression=self._compression,
103107
compression_opts=self._compression_opts,
104108
fill_value=self._fill_value, order=self._order)
105-
self._store['meta'] = encode_metadata(meta)
109+
meta_key = array_meta_key(self._prefix)
110+
self._store[meta_key] = encode_metadata(meta)
106111

107112
@property
108113
def store(self):
@@ -112,7 +117,10 @@ def store(self):
112117
@property
113118
def name(self):
114119
"""TODO"""
115-
return self._name
120+
if self._prefix:
121+
# follow h5py convention: add leading slash, remove trailing slash
122+
return '/' + self._prefix[:-1]
123+
return None
116124

117125
@property
118126
def readonly(self):
@@ -186,20 +194,32 @@ def nbytes_stored(self):
186194
"""The total number of stored bytes of data for the array. This
187195
includes storage required for configuration metadata and user
188196
attributes encoded as JSON."""
189-
if hasattr(self._store, 'nbytes_stored'):
197+
if hasattr(self._store, 'getsize'):
190198
# pass through
191-
return self._store.nbytes_stored
199+
return self._store.getsize(self._prefix)
192200
elif isinstance(self._store, dict):
193201
# cheap to compute by summing length of values
194-
return sum(len(v) for v in itervalues(self._store))
202+
size = 0
203+
for child in ls(self._store, self._prefix):
204+
key = self._prefix + child
205+
try:
206+
size += len(self._store[key])
207+
except KeyError:
208+
pass
209+
return size
195210
else:
196211
return -1
197212

198213
@property
199214
def initialized(self):
200215
"""The number of chunks that have been initialized with some data."""
216+
n = 0
217+
for child in ls(self._store, self._prefix):
218+
key = self._prefix + child
219+
if key in self._store:
220+
n += 1
201221
# N.B., expect 'meta' and 'attrs' keys in store also, so subtract 2
202-
return len(self._store) - 2
222+
return n - 2
203223

204224
@property
205225
def cdata_shape(self):
@@ -489,7 +509,7 @@ def _chunk_getitem(self, cidx, item, dest):
489509
try:
490510

491511
# obtain compressed data for chunk
492-
ckey = '.'.join(map(str, cidx))
512+
ckey = self._prefix + '.'.join(map(str, cidx))
493513
cdata = self._store[ckey]
494514

495515
except KeyError:
@@ -564,7 +584,7 @@ def _chunk_setitem(self, cidx, key, value):
564584
try:
565585

566586
# obtain compressed data for chunk
567-
ckey = '.'.join(map(str, cidx))
587+
ckey = self._prefix + '.'.join(map(str, cidx))
568588
cdata = self._store[ckey]
569589

570590
except KeyError:
@@ -594,28 +614,28 @@ def _chunk_setitem(self, cidx, key, value):
594614

595615
def __repr__(self):
596616
r = '%s.%s(' % (type(self).__module__, type(self).__name__)
597-
if self._name:
598-
r += '%s, ' % self._name
599-
r += '%s, ' % str(self._shape)
600-
r += '%s, ' % str(self._dtype)
601-
r += 'chunks=%s, ' % str(self._chunks)
602-
r += 'order=%s' % self._order
617+
if self.name:
618+
r += '%s, ' % self.name
619+
r += '%s, ' % str(self.shape)
620+
r += '%s, ' % str(self.dtype)
621+
r += 'chunks=%s, ' % str(self.chunks)
622+
r += 'order=%s' % self.order
603623
r += ')'
604-
r += '\n compression: %s' % self._compression
605-
r += '; compression_opts: %s' % str(self._compression_opts)
624+
r += '\n compression: %s' % self.compression
625+
r += '; compression_opts: %s' % str(self.compression_opts)
606626
r += '\n nbytes: %s' % human_readable_size(self.nbytes)
607627
if self.nbytes_stored > 0:
608628
r += '; nbytes_stored: %s' % human_readable_size(
609629
self.nbytes_stored)
610630
r += '; ratio: %.1f' % (self.nbytes / self.nbytes_stored)
611631
n_chunks = reduce(operator.mul, self.cdata_shape)
612632
r += '; initialized: %s/%s' % (self.initialized, n_chunks)
613-
r += '\n store: %s.%s' % (type(self._store).__module__,
614-
type(self._store).__name__)
633+
r += '\n store: %s.%s' % (type(self.store).__module__,
634+
type(self.store).__name__)
615635
return r
616636

617637
def __getstate__(self):
618-
return self._store, self._readonly
638+
return self._store, self._prefix, self._readonly
619639

620640
def __setstate__(self, state):
621641
self.__init__(*state)

zarr/hierarchy.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from zarr.attrs import Attributes
77
from zarr.core import Array
88
from zarr.storage import contains_array, contains_group, init_group, \
9-
DirectoryStore, normalize_name, join_names
9+
DirectoryStore, normalize_prefix, ls, group_attrs_key
1010
from zarr.creation import array, create
1111

1212

@@ -62,15 +62,15 @@ class Group(object):
6262
def __init__(self, store, name=None, readonly=False):
6363

6464
self._store = store
65-
self._name = normalize_name(name)
65+
self._prefix = normalize_prefix(name)
6666
self._readonly = readonly
6767

6868
# guard conditions
69-
if contains_array(store, key=self._name):
69+
if contains_array(store, prefix=self._prefix):
7070
raise ValueError('store contains an array')
7171

7272
# setup attributes
73-
attrs_key = join_names(self._name, 'attrs')
73+
attrs_key = group_attrs_key(self._prefix)
7474
self._attrs = Attributes(store, key=attrs_key, readonly=readonly)
7575

7676
@property
@@ -86,7 +86,10 @@ def readonly(self):
8686
@property
8787
def name(self):
8888
"""TODO"""
89-
return self._name
89+
if self._prefix:
90+
# follow h5py convention: add leading slash, remove trailing slash
91+
return '/' + self._prefix[:-1]
92+
return '/'
9093

9194
@property
9295
def attrs(self):
@@ -111,8 +114,7 @@ def __len__(self):
111114

112115
def __repr__(self):
113116
r = '%s.%s(' % (type(self).__module__, type(self).__name__)
114-
if self.name:
115-
r += self.name + ', '
117+
r += self.name + ', '
116118
r += str(len(self))
117119
r += ')'
118120
array_keys = list(self.array_keys())
@@ -138,6 +140,8 @@ def __contains__(self, key):
138140
pass
139141

140142
def __getitem__(self, key):
143+
# TODO recode to use prefix
144+
141145
names = [s for s in key.split('/') if s]
142146
if not names:
143147
raise KeyError(key)
@@ -167,14 +171,17 @@ def __setitem__(self, key, value):
167171
raise NotImplementedError()
168172

169173
def keys(self):
174+
# TODO recode to use prefix
170175
for key, store in self.store.stores():
171176
if contains_array(store) or contains_group(store):
172177
yield key
173178

174179
def values(self):
180+
# TODO recode to use prefix
175181
return (v for k, v in self.items())
176182

177183
def items(self):
184+
# TODO recode to use prefix
178185
for key, store in self.store.stores():
179186
if contains_array(store):
180187
# TODO what about synchronizer?
@@ -183,27 +190,32 @@ def items(self):
183190
yield key, Group(store, readonly=self.readonly)
184191

185192
def group_keys(self):
193+
# TODO recode to use prefix
186194
for key, store in self.store.stores():
187195
if contains_group(store):
188196
yield key
189197

190198
def groups(self):
199+
# TODO recode to use prefix
191200
for key, store in self.store.stores():
192201
if contains_group(store):
193202
yield key, Group(store, readonly=self.readonly)
194203

195204
def array_keys(self):
205+
# TODO recode to use prefix
196206
for key, store in self.store.stores():
197207
if contains_array(store):
198208
yield key
199209

200210
def arrays(self):
211+
# TODO recode to use prefix
201212
for key, store in self.store.stores():
202213
if contains_array(store):
203214
# TODO what about synchronizer?
204215
yield key, Array(store, readonly=self.readonly)
205216

206217
def _require_store(self, name):
218+
# TODO recode to use prefix
207219

208220
# handle compound request
209221
names = [s for s in name.split('/') if s]
@@ -234,6 +246,7 @@ def _require_store(self, name):
234246
return store, absname
235247

236248
def create_group(self, name):
249+
# TODO recode to use prefix
237250

238251
# obtain store
239252
store, absname = self._require_store(name)
@@ -249,6 +262,7 @@ def create_group(self, name):
249262
return Group(store, readonly=self.readonly, name=absname)
250263

251264
def require_group(self, name):
265+
# TODO recode to use prefix
252266

253267
# obtain store
254268
store, absname = self._require_store(name)
@@ -267,6 +281,7 @@ def create_dataset(self, name, data=None, shape=None, chunks=None,
267281
dtype=None, compression='default',
268282
compression_opts=None, fill_value=None, order='C',
269283
synchronizer=None, **kwargs):
284+
# TODO recode to use prefix
270285

271286
# obtain store
272287
store, absname = self._require_store(name)
@@ -345,14 +360,16 @@ def copy(self, source, dest, name, shallow=False):
345360
def group(store=None, readonly=False):
346361
"""TODO"""
347362
if store is None:
348-
store = MemoryStore()
363+
store = DictStore()
349364
init_group(store)
350365
return Group(store, readonly=readonly)
351366

352367

353368
def open_group(path, mode='a'):
354369
"""TODO"""
355370

371+
# TODO recode to use prefix
372+
356373
# ensure directory exists
357374
if not os.path.exists(path):
358375
if mode in ['w', 'w-', 'x', 'a']:

0 commit comments

Comments
 (0)