Skip to content

Commit 60a272b

Browse files
committed
implement chunk_store
1 parent 8646227 commit 60a272b

File tree

11 files changed

+569
-242
lines changed

11 files changed

+569
-242
lines changed

tox.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ setenv =
1212
commands =
1313
python setup.py build_ext --inplace
1414
py27: nosetests -v zarr
15-
py34,py35: nosetests -v --with-coverage --cover-erase --cover-min-percentage=100 --cover-package=zarr --with-doctest --doctest-options=+NORMALIZE_WHITESPACE zarr
15+
py34,py35: nosetests -v --with-coverage --cover-erase --cover-min-percentage=100 --cover-package=zarr --cover-tests --with-doctest --doctest-options=+NORMALIZE_WHITESPACE zarr
1616
py34,py35: python -m doctest -o NORMALIZE_WHITESPACE -o ELLIPSIS docs/tutorial.rst docs/spec/v2.rst
1717
py35: flake8 zarr
1818
python setup.py bdist_wheel

zarr/core.py

Lines changed: 37 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@
1111
from zarr.util import is_total_slice, normalize_array_selection, \
1212
get_chunk_range, human_readable_size, normalize_resize_args, \
1313
normalize_storage_path
14-
from zarr.storage import array_meta_key, attrs_key, listdir, contains_group, \
15-
buffersize
14+
from zarr.storage import array_meta_key, attrs_key, listdir, getsize
1615
from zarr.meta import decode_array_metadata, encode_array_metadata
1716
from zarr.attrs import Attributes
1817
from zarr.errors import ReadOnlyError
@@ -30,13 +29,17 @@ class Array(object):
3029
Storage path.
3130
readonly : bool, optional
3231
True if array should be protected against modification.
32+
chunk_store : MutableMapping, optional
33+
Separate storage for chunks. If not provided, `store` will be used
34+
for storage of both chunks and metadata.
3335
3436
Attributes
3537
----------
3638
store
3739
path
3840
name
3941
readonly
42+
chunk_store
4043
shape
4144
chunks
4245
dtype
@@ -61,7 +64,7 @@ class Array(object):
6164
6265
""" # flake8: noqa
6366

64-
def __init__(self, store, path=None, readonly=False):
67+
def __init__(self, store, path=None, readonly=False, chunk_store=None):
6568
# N.B., expect at this point store is fully initialized with all
6669
# configuration metadata fully specified and normalized
6770

@@ -72,6 +75,10 @@ def __init__(self, store, path=None, readonly=False):
7275
else:
7376
self._key_prefix = ''
7477
self._readonly = readonly
78+
if chunk_store is None:
79+
self._chunk_store = store
80+
else:
81+
self._chunk_store = chunk_store
7582

7683
# initialize metadata
7784
try:
@@ -130,6 +137,12 @@ def readonly(self):
130137
"""A boolean, True if modification operations are not permitted."""
131138
return self._readonly
132139

140+
@property
141+
def chunk_store(self):
142+
"""A MutableMapping providing the underlying storage for array
143+
chunks."""
144+
return self._chunk_store
145+
133146
@property
134147
def shape(self):
135148
"""A tuple of integers describing the length of each dimension of
@@ -196,29 +209,22 @@ def nbytes(self):
196209
def nbytes_stored(self):
197210
"""The total number of stored bytes of data for the array. This
198211
includes storage required for configuration metadata and user
199-
attributes encoded as JSON."""
200-
if hasattr(self._store, 'getsize'):
201-
# pass through
202-
return self._store.getsize(self._path)
203-
elif isinstance(self._store, dict):
204-
# compute from size of values
205-
size = 0
206-
for k in listdir(self._store, self._path):
207-
v = self._store[self._key_prefix + k]
208-
try:
209-
size += buffersize(v)
210-
except TypeError:
211-
return -1
212-
return size
212+
attributes."""
213+
m = getsize(self._store, self._path)
214+
if self._store == self._chunk_store:
215+
return m
213216
else:
214-
return -1
217+
n = getsize(self._chunk_store, self._path)
218+
if m < 0 or n < 0:
219+
return -1
220+
else:
221+
return m + n
215222

216223
@property
217224
def initialized(self):
218225
"""The number of chunks that have been initialized with some data."""
219-
n = sum(1 for _ in listdir(self._store, self._path))
220-
# N.B., expect meta and attrs keys in store also, so subtract 2
221-
return n - 2
226+
return sum(1 for k in listdir(self._chunk_store, self._path)
227+
if k not in [array_meta_key, attrs_key])
222228

223229
@property
224230
def cdata_shape(self):
@@ -509,7 +515,7 @@ def _chunk_getitem(self, cidx, item, dest):
509515

510516
# obtain compressed data for chunk
511517
ckey = self._ckey(cidx)
512-
cdata = self._store[ckey]
518+
cdata = self._chunk_store[ckey]
513519

514520
except KeyError:
515521

@@ -584,7 +590,7 @@ def _chunk_setitem(self, cidx, key, value):
584590

585591
# obtain compressed data for chunk
586592
ckey = self._ckey(cidx)
587-
cdata = self._store[ckey]
593+
cdata = self._chunk_store[ckey]
588594

589595
except KeyError:
590596

@@ -609,7 +615,7 @@ def _chunk_setitem(self, cidx, key, value):
609615

610616
# store
611617
ckey = self._ckey(cidx)
612-
self._store[ckey] = cdata
618+
self._chunk_store[ckey] = cdata
613619

614620
def _ckey(self, cidx):
615621
return self._key_prefix + '.'.join(map(str, cidx))
@@ -634,10 +640,14 @@ def __repr__(self):
634640
r += '; initialized: %s/%s' % (self.initialized, n_chunks)
635641
r += '\n store: %s.%s' % (type(self.store).__module__,
636642
type(self.store).__name__)
643+
if self._store != self._chunk_store:
644+
r += '\n chunk_store: %s.%s' % \
645+
(type(self._chunk_store).__module__,
646+
type(self._chunk_store).__name__)
637647
return r
638648

639649
def __getstate__(self):
640-
return self._store, self._path, self._readonly
650+
return self._store, self._path, self._readonly, self._chunk_store
641651

642652
def __setstate__(self, state):
643653
self.__init__(*state)
@@ -691,13 +701,13 @@ def resize(self, *args):
691701
for s, c in zip(new_shape, chunks))
692702

693703
# remove any chunks not within range
694-
for key in list(self._store):
704+
for key in listdir(self._chunk_store, self._path):
695705
if key not in [array_meta_key, attrs_key]:
696706
cidx = map(int, key.split('.'))
697707
if all(i < c for i, c in zip(cidx, new_cdata_shape)):
698708
pass # keep the chunk
699709
else:
700-
del self._store[key]
710+
del self._chunk_store[self._key_prefix + key]
701711

702712
# update metadata
703713
self._shape = new_shape

zarr/creation.py

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
def create(shape, chunks, dtype=None, compression='default',
1515
compression_opts=None, fill_value=None, order='C', store=None,
16-
synchronizer=None, overwrite=False, path=None):
16+
synchronizer=None, overwrite=False, path=None, chunk_store=None):
1717
"""Create an array.
1818
1919
Parameters
@@ -44,6 +44,9 @@ def create(shape, chunks, dtype=None, compression='default',
4444
array.
4545
path : string, optional
4646
Path under which array is stored.
47+
chunk_store : MutableMapping, optional
48+
Separate storage for chunks. If not provided, `store` will be used
49+
for storage of both chunks and metadata.
4750
4851
Returns
4952
-------
@@ -70,20 +73,21 @@ def create(shape, chunks, dtype=None, compression='default',
7073
init_array(store, shape=shape, chunks=chunks, dtype=dtype,
7174
compression=compression, compression_opts=compression_opts,
7275
fill_value=fill_value, order=order, overwrite=overwrite,
73-
path=path)
76+
path=path, chunk_store=chunk_store)
7477

7578
# instantiate array
7679
if synchronizer is not None:
77-
z = SynchronizedArray(store, synchronizer, path=path)
80+
z = SynchronizedArray(store, synchronizer, path=path,
81+
chunk_store=chunk_store)
7882
else:
79-
z = Array(store, path=path)
83+
z = Array(store, path=path, chunk_store=chunk_store)
8084

8185
return z
8286

8387

8488
def empty(shape, chunks, dtype=None, compression='default',
8589
compression_opts=None, order='C', store=None, synchronizer=None,
86-
path=None, overwrite=False):
90+
path=None, overwrite=False, chunk_store=None):
8791
"""Create an empty array.
8892
8993
For parameter definitions see :func:`zarr.creation.create`.
@@ -98,12 +102,13 @@ def empty(shape, chunks, dtype=None, compression='default',
98102
return create(shape=shape, chunks=chunks, dtype=dtype,
99103
compression=compression, compression_opts=compression_opts,
100104
fill_value=None, order=order, store=store,
101-
synchronizer=synchronizer, path=path, overwrite=overwrite)
105+
synchronizer=synchronizer, path=path, overwrite=overwrite,
106+
chunk_store=chunk_store)
102107

103108

104109
def zeros(shape, chunks, dtype=None, compression='default',
105110
compression_opts=None, order='C', store=None, synchronizer=None,
106-
path=None, overwrite=False):
111+
path=None, overwrite=False, chunk_store=None):
107112
"""Create an array, with zero being used as the default value for
108113
uninitialized portions of the array.
109114
@@ -128,12 +133,12 @@ def zeros(shape, chunks, dtype=None, compression='default',
128133
compression=compression,
129134
compression_opts=compression_opts, fill_value=0, order=order,
130135
store=store, synchronizer=synchronizer, path=path,
131-
overwrite=overwrite)
136+
overwrite=overwrite, chunk_store=chunk_store)
132137

133138

134139
def ones(shape, chunks, dtype=None, compression='default',
135140
compression_opts=None, order='C', store=None, synchronizer=None,
136-
path=None, overwrite=False):
141+
path=None, overwrite=False, chunk_store=None):
137142
"""Create an array, with one being used as the default value for
138143
uninitialized portions of the array.
139144
@@ -157,12 +162,13 @@ def ones(shape, chunks, dtype=None, compression='default',
157162
return create(shape=shape, chunks=chunks, dtype=dtype,
158163
compression=compression, compression_opts=compression_opts,
159164
fill_value=1, order=order, store=store,
160-
synchronizer=synchronizer, path=path, overwrite=overwrite)
165+
synchronizer=synchronizer, path=path, overwrite=overwrite,
166+
chunk_store=chunk_store)
161167

162168

163169
def full(shape, chunks, fill_value, dtype=None, compression='default',
164170
compression_opts=None, order='C', store=None, synchronizer=None,
165-
path=None, overwrite=False):
171+
path=None, overwrite=False, chunk_store=None):
166172
"""Create an array, with `fill_value` being used as the default value for
167173
uninitialized portions of the array.
168174
@@ -186,12 +192,13 @@ def full(shape, chunks, fill_value, dtype=None, compression='default',
186192
return create(shape=shape, chunks=chunks, dtype=dtype,
187193
compression=compression, compression_opts=compression_opts,
188194
fill_value=fill_value, order=order, store=store,
189-
synchronizer=synchronizer, path=path, overwrite=overwrite)
195+
synchronizer=synchronizer, path=path, overwrite=overwrite,
196+
chunk_store=chunk_store)
190197

191198

192199
def array(data, chunks=None, dtype=None, compression='default',
193200
compression_opts=None, fill_value=None, order='C', store=None,
194-
synchronizer=None, path=None, overwrite=False):
201+
synchronizer=None, path=None, overwrite=False, chunk_store=None):
195202
"""Create an array filled with `data`.
196203
197204
The `data` argument should be a NumPy array or array-like object. For
@@ -238,7 +245,8 @@ def array(data, chunks=None, dtype=None, compression='default',
238245
z = create(shape=shape, chunks=chunks, dtype=dtype,
239246
compression=compression, compression_opts=compression_opts,
240247
fill_value=fill_value, order=order, store=store,
241-
synchronizer=synchronizer, path=path, overwrite=overwrite)
248+
synchronizer=synchronizer, path=path, overwrite=overwrite,
249+
chunk_store=chunk_store)
242250

243251
# fill with data
244252
z[:] = data
@@ -402,46 +410,49 @@ def _like_args(a, shape, chunks, dtype, compression, compression_opts, order):
402410

403411
def empty_like(a, shape=None, chunks=None, dtype=None, compression=None,
404412
compression_opts=None, order=None, store=None,
405-
synchronizer=None, path=None, overwrite=False):
413+
synchronizer=None, path=None, overwrite=False,
414+
chunk_store=None):
406415
"""Create an empty array like `a`."""
407416
shape, chunks, dtype, compression, compression_opts, order = \
408417
_like_args(a, shape, chunks, dtype, compression, compression_opts,
409418
order)
410419
return empty(shape, chunks, dtype=dtype, compression=compression,
411420
compression_opts=compression_opts, order=order,
412421
store=store, synchronizer=synchronizer, path=path,
413-
overwrite=overwrite)
422+
overwrite=overwrite, chunk_store=chunk_store)
414423

415424

416425
def zeros_like(a, shape=None, chunks=None, dtype=None, compression=None,
417426
compression_opts=None, order=None, store=None,
418-
synchronizer=None, path=None, overwrite=False):
427+
synchronizer=None, path=None, overwrite=False,
428+
chunk_store=None):
419429
"""Create an array of zeros like `a`."""
420430
shape, chunks, dtype, compression, compression_opts, order = \
421431
_like_args(a, shape, chunks, dtype, compression, compression_opts,
422432
order)
423433
return zeros(shape, chunks, dtype=dtype, compression=compression,
424434
compression_opts=compression_opts, order=order,
425435
store=store, synchronizer=synchronizer, path=path,
426-
overwrite=overwrite)
436+
overwrite=overwrite, chunk_store=chunk_store)
427437

428438

429439
def ones_like(a, shape=None, chunks=None, dtype=None, compression=None,
430440
compression_opts=None, order=None, store=None,
431-
synchronizer=None, path=None, overwrite=False):
441+
synchronizer=None, path=None, overwrite=False, chunk_store=None):
432442
"""Create an array of ones like `a`."""
433443
shape, chunks, dtype, compression, compression_opts, order = \
434444
_like_args(a, shape, chunks, dtype, compression, compression_opts,
435445
order)
436446
return ones(shape, chunks, dtype=dtype, compression=compression,
437447
compression_opts=compression_opts, order=order,
438448
store=store, synchronizer=synchronizer, path=path,
439-
overwrite=overwrite)
449+
overwrite=overwrite, chunk_store=chunk_store)
440450

441451

442452
def full_like(a, shape=None, chunks=None, fill_value=None, dtype=None,
443453
compression=None, compression_opts=None, order=None,
444-
store=None, synchronizer=None, path=None, overwrite=False):
454+
store=None, synchronizer=None, path=None, overwrite=False,
455+
chunk_store=None):
445456
"""Create a filled array like `a`."""
446457
shape, chunks, dtype, compression, compression_opts, order = \
447458
_like_args(a, shape, chunks, dtype, compression, compression_opts,
@@ -454,7 +465,7 @@ def full_like(a, shape=None, chunks=None, fill_value=None, dtype=None,
454465
return full(shape, chunks, fill_value, dtype=dtype,
455466
compression=compression, compression_opts=compression_opts,
456467
order=order, store=store, synchronizer=synchronizer,
457-
path=path, overwrite=overwrite)
468+
path=path, overwrite=overwrite, chunk_store=chunk_store)
458469

459470

460471
def open_like(a, path, mode='a', shape=None, chunks=None, dtype=None,

0 commit comments

Comments
 (0)