Skip to content

Commit 3b2e04f

Browse files
committed
implement auto-chunking
1 parent faf30a8 commit 3b2e04f

File tree

6 files changed

+139
-67
lines changed

6 files changed

+139
-67
lines changed

zarr/creation.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from zarr.storage import DirectoryStore, init_array, contains_array, contains_group
1212

1313

14-
def create(shape, chunks, dtype=None, compression='default',
14+
def create(shape, chunks=None, dtype=None, compression='default',
1515
compression_opts=None, fill_value=None, order='C', store=None,
1616
synchronizer=None, overwrite=False, path=None, chunk_store=None):
1717
"""Create an array.
@@ -20,8 +20,8 @@ def create(shape, chunks, dtype=None, compression='default',
2020
----------
2121
shape : int or tuple of ints
2222
Array shape.
23-
chunks : int or tuple of ints
24-
Chunk shape.
23+
chunks : int or tuple of ints, optional
24+
Chunk shape. If not provided, will be guessed from `shape` and `dtype`.
2525
dtype : string or dtype, optional
2626
NumPy dtype.
2727
compression : string, optional
@@ -85,7 +85,7 @@ def create(shape, chunks, dtype=None, compression='default',
8585
return z
8686

8787

88-
def empty(shape, chunks, dtype=None, compression='default',
88+
def empty(shape, chunks=None, dtype=None, compression='default',
8989
compression_opts=None, order='C', store=None, synchronizer=None,
9090
path=None, overwrite=False, chunk_store=None):
9191
"""Create an empty array.
@@ -106,7 +106,7 @@ def empty(shape, chunks, dtype=None, compression='default',
106106
chunk_store=chunk_store)
107107

108108

109-
def zeros(shape, chunks, dtype=None, compression='default',
109+
def zeros(shape, chunks=None, dtype=None, compression='default',
110110
compression_opts=None, order='C', store=None, synchronizer=None,
111111
path=None, overwrite=False, chunk_store=None):
112112
"""Create an array, with zero being used as the default value for
@@ -136,7 +136,7 @@ def zeros(shape, chunks, dtype=None, compression='default',
136136
overwrite=overwrite, chunk_store=chunk_store)
137137

138138

139-
def ones(shape, chunks, dtype=None, compression='default',
139+
def ones(shape, chunks=None, dtype=None, compression='default',
140140
compression_opts=None, order='C', store=None, synchronizer=None,
141141
path=None, overwrite=False, chunk_store=None):
142142
"""Create an array, with one being used as the default value for
@@ -166,7 +166,7 @@ def ones(shape, chunks, dtype=None, compression='default',
166166
chunk_store=chunk_store)
167167

168168

169-
def full(shape, chunks, fill_value, dtype=None, compression='default',
169+
def full(shape, fill_value, chunks=None, dtype=None, compression='default',
170170
compression_opts=None, order='C', store=None, synchronizer=None,
171171
path=None, overwrite=False, chunk_store=None):
172172
"""Create an array, with `fill_value` being used as the default value for
@@ -271,8 +271,8 @@ def open_array(path, mode='a', shape=None, chunks=None, dtype=None,
271271
(fail if exists).
272272
shape : int or tuple of ints
273273
Array shape.
274-
chunks : int or tuple of ints
275-
Chunk shape.
274+
chunks : int or tuple of ints, optional
275+
Chunk shape. If not provided, will be guessed from `shape` and `dtype`.
276276
dtype : string or dtype, optional
277277
NumPy dtype.
278278
compression : string, optional
@@ -387,7 +387,8 @@ def _like_args(a, shape, chunks, dtype, compression, compression_opts, order):
387387
try:
388388
chunks = a.chunks
389389
except AttributeError:
390-
raise ValueError('chunks must be specified')
390+
# use auto-chunking
391+
pass
391392
if dtype is None:
392393
dtype = a.dtype
393394
if compression is None:
@@ -416,7 +417,7 @@ def empty_like(a, shape=None, chunks=None, dtype=None, compression=None,
416417
shape, chunks, dtype, compression, compression_opts, order = \
417418
_like_args(a, shape, chunks, dtype, compression, compression_opts,
418419
order)
419-
return empty(shape, chunks, dtype=dtype, compression=compression,
420+
return empty(shape, chunks=chunks, dtype=dtype, compression=compression,
420421
compression_opts=compression_opts, order=order,
421422
store=store, synchronizer=synchronizer, path=path,
422423
overwrite=overwrite, chunk_store=chunk_store)
@@ -430,7 +431,7 @@ def zeros_like(a, shape=None, chunks=None, dtype=None, compression=None,
430431
shape, chunks, dtype, compression, compression_opts, order = \
431432
_like_args(a, shape, chunks, dtype, compression, compression_opts,
432433
order)
433-
return zeros(shape, chunks, dtype=dtype, compression=compression,
434+
return zeros(shape, chunks=chunks, dtype=dtype, compression=compression,
434435
compression_opts=compression_opts, order=order,
435436
store=store, synchronizer=synchronizer, path=path,
436437
overwrite=overwrite, chunk_store=chunk_store)
@@ -443,7 +444,7 @@ def ones_like(a, shape=None, chunks=None, dtype=None, compression=None,
443444
shape, chunks, dtype, compression, compression_opts, order = \
444445
_like_args(a, shape, chunks, dtype, compression, compression_opts,
445446
order)
446-
return ones(shape, chunks, dtype=dtype, compression=compression,
447+
return ones(shape, chunks=chunks, dtype=dtype, compression=compression,
447448
compression_opts=compression_opts, order=order,
448449
store=store, synchronizer=synchronizer, path=path,
449450
overwrite=overwrite, chunk_store=chunk_store)
@@ -462,7 +463,7 @@ def full_like(a, shape=None, chunks=None, fill_value=None, dtype=None,
462463
fill_value = a.fill_value
463464
except AttributeError:
464465
raise ValueError('fill_value must be specified')
465-
return full(shape, chunks, fill_value, dtype=dtype,
466+
return full(shape, chunks=chunks, fill_value=fill_value, dtype=dtype,
466467
compression=compression, compression_opts=compression_opts,
467468
order=order, store=store, synchronizer=synchronizer,
468469
path=path, overwrite=overwrite, chunk_store=chunk_store)

zarr/hierarchy.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -487,8 +487,9 @@ def create_dataset(self, name, data=None, shape=None, chunks=None,
487487
Initial data.
488488
shape : int or tuple of ints
489489
Array shape.
490-
chunks : int or tuple of ints
491-
Chunk shape.
490+
chunks : int or tuple of ints, optional
491+
Chunk shape. If not provided, will be guessed from `shape` and
492+
`dtype`.
492493
dtype : string or dtype, optional
493494
NumPy dtype.
494495
compression : string, optional
@@ -642,15 +643,16 @@ def ones(self, name, **kwargs):
642643
return ones(store=self._store, path=path,
643644
chunk_store=self._chunk_store, **kwargs)
644645

645-
def full(self, name, **kwargs):
646+
def full(self, name, fill_value, **kwargs):
646647
"""Create an array. Keyword arguments as per
647648
:func:`zarr.creation.full`."""
648649
if self._readonly:
649650
raise ReadOnlyError('group is read-only')
650651
path = self._item_path(name)
651652
self._require_parent_group(path)
652653
return full(store=self._store, path=path,
653-
chunk_store=self._chunk_store, **kwargs)
654+
chunk_store=self._chunk_store,
655+
fill_value=fill_value, **kwargs)
654656

655657
def array(self, name, data, **kwargs):
656658
"""Create an array. Keyword arguments as per

zarr/storage.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,9 @@ def getsize(store, path=None):
116116
return -1
117117

118118

119-
def init_array(store, shape, chunks, dtype=None, compression='default',
120-
compression_opts=None, fill_value=None,
121-
order='C', overwrite=False, path=None, chunk_store=None):
119+
def init_array(store, shape, chunks=None, dtype=None, compression='default',
120+
compression_opts=None, fill_value=None, order='C',
121+
overwrite=False, path=None, chunk_store=None):
122122
"""initialize an array store with the given configuration.
123123
124124
Parameters
@@ -127,8 +127,8 @@ def init_array(store, shape, chunks, dtype=None, compression='default',
127127
A mapping that supports string keys and bytes-like values.
128128
shape : int or tuple of ints
129129
Array shape.
130-
chunks : int or tuple of ints
131-
Chunk shape.
130+
chunks : int or tuple of ints, optional
131+
Chunk shape. If not provided, will be guessed from `shape` and `dtype`.
132132
dtype : string or dtype, optional
133133
NumPy dtype.
134134
compression : string, optional
@@ -238,8 +238,8 @@ def init_array(store, shape, chunks, dtype=None, compression='default',
238238

239239
# normalize metadata
240240
shape = normalize_shape(shape)
241-
chunks = normalize_chunks(chunks, shape)
242241
dtype = np.dtype(dtype)
242+
chunks = normalize_chunks(chunks, shape, dtype.itemsize)
243243
compressor_cls = get_compressor_cls(compression)
244244
compression = compressor_cls.canonical_name
245245
compression_opts = compressor_cls.normalize_opts(

zarr/tests/test_creation.py

Lines changed: 16 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -68,27 +68,27 @@ def __getitem__(self, item):
6868

6969

7070
def test_empty():
71-
z = empty(100, 10)
71+
z = empty(100, chunks=10)
7272
eq((100,), z.shape)
7373
eq((10,), z.chunks)
7474

7575

7676
def test_zeros():
77-
z = zeros(100, 10)
77+
z = zeros(100, chunks=10)
7878
eq((100,), z.shape)
7979
eq((10,), z.chunks)
8080
assert_array_equal(np.zeros(100), z[:])
8181

8282

8383
def test_ones():
84-
z = ones(100, 10)
84+
z = ones(100, chunks=10)
8585
eq((100,), z.shape)
8686
eq((10,), z.chunks)
8787
assert_array_equal(np.ones(100), z[:])
8888

8989

9090
def test_full():
91-
z = full(100, 10, fill_value=42, dtype='i4')
91+
z = full(100, chunks=10, fill_value=42, dtype='i4')
9292
eq((100,), z.shape)
9393
eq((10,), z.chunks)
9494
assert_array_equal(np.full(100, fill_value=42, dtype='i4'), z[:])
@@ -165,7 +165,7 @@ def test_open_array():
165165

166166
def test_empty_like():
167167
# zarr array
168-
z = empty(100, 10, dtype='f4', compression='zlib',
168+
z = empty(100, chunks=10, dtype='f4', compression='zlib',
169169
compression_opts=5, order='F')
170170
z2 = empty_like(z)
171171
eq(z.shape, z2.shape)
@@ -177,19 +177,16 @@ def test_empty_like():
177177
eq(z.order, z2.order)
178178
# numpy array
179179
a = np.empty(100, dtype='f4')
180-
z3 = empty_like(a, chunks=10)
180+
z3 = empty_like(a)
181181
eq(a.shape, z3.shape)
182-
eq((10,), z3.chunks)
182+
eq((100,), z3.chunks)
183183
eq(a.dtype, z3.dtype)
184184
assert_is_none(z3.fill_value)
185-
with assert_raises(ValueError):
186-
# chunks missing
187-
empty_like(a)
188185

189186

190187
def test_zeros_like():
191188
# zarr array
192-
z = zeros(100, 10, dtype='f4', compression='zlib',
189+
z = zeros(100, chunks=10, dtype='f4', compression='zlib',
193190
compression_opts=5, order='F')
194191
z2 = zeros_like(z)
195192
eq(z.shape, z2.shape)
@@ -206,14 +203,11 @@ def test_zeros_like():
206203
eq((10,), z3.chunks)
207204
eq(a.dtype, z3.dtype)
208205
eq(0, z3.fill_value)
209-
with assert_raises(ValueError):
210-
# chunks missing
211-
zeros_like(a)
212206

213207

214208
def test_ones_like():
215209
# zarr array
216-
z = ones(100, 10, dtype='f4', compression='zlib',
210+
z = ones(100, chunks=10, dtype='f4', compression='zlib',
217211
compression_opts=5, order='F')
218212
z2 = ones_like(z)
219213
eq(z.shape, z2.shape)
@@ -230,13 +224,10 @@ def test_ones_like():
230224
eq((10,), z3.chunks)
231225
eq(a.dtype, z3.dtype)
232226
eq(1, z3.fill_value)
233-
with assert_raises(ValueError):
234-
# chunks missing
235-
ones_like(a)
236227

237228

238229
def test_full_like():
239-
z = full(100, 10, dtype='f4', compression='zlib',
230+
z = full(100, chunks=10, dtype='f4', compression='zlib',
240231
compression_opts=5, fill_value=42, order='F')
241232
z2 = full_like(z)
242233
eq(z.shape, z2.shape)
@@ -253,9 +244,6 @@ def test_full_like():
253244
eq((10,), z3.chunks)
254245
eq(a.dtype, z3.dtype)
255246
eq(42, z3.fill_value)
256-
with assert_raises(ValueError):
257-
# chunks missing
258-
full_like(a)
259247
with assert_raises(ValueError):
260248
# fill_value missing
261249
full_like(a, chunks=10)
@@ -265,7 +253,7 @@ def test_open_like():
265253
# zarr array
266254
path = tempfile.mktemp()
267255
atexit.register(shutil.rmtree, path)
268-
z = full(100, 10, dtype='f4', compression='zlib',
256+
z = full(100, chunks=10, dtype='f4', compression='zlib',
269257
compression_opts=5, fill_value=42, order='F')
270258
z2 = open_like(z, path)
271259
eq(z.shape, z2.shape)
@@ -284,24 +272,22 @@ def test_open_like():
284272
eq((10,), z3.chunks)
285273
eq(a.dtype, z3.dtype)
286274
assert_is_none(z3.fill_value)
287-
with assert_raises(ValueError):
288-
# chunks missing
289-
open_like(a, path)
290275

291276

292277
def test_create():
293278

294279
# defaults
295-
z = create(100, 10)
280+
z = create(100)
296281
assert_is_instance(z, Array)
297282
eq((100,), z.shape)
298-
eq((10,), z.chunks)
283+
eq((100,), z.chunks) # auto-chunks
299284
eq(np.dtype(None), z.dtype)
300285
eq('blosc', z.compression)
301286
assert_is_none(z.fill_value)
302287

303288
# all specified
304-
z = create(100, 10, dtype='i4', compression='zlib', compression_opts=1,
289+
z = create(100, chunks=10, dtype='i4', compression='zlib',
290+
compression_opts=1,
305291
fill_value=42, order='F')
306292
assert_is_instance(z, Array)
307293
eq((100,), z.shape)
@@ -314,7 +300,7 @@ def test_create():
314300

315301
# with synchronizer
316302
synchronizer = ThreadSynchronizer()
317-
z = create(100, 10, synchronizer=synchronizer)
303+
z = create(100, chunks=10, synchronizer=synchronizer)
318304
assert_is_instance(z, SynchronizedArray)
319305
eq((100,), z.shape)
320306
eq((10,), z.chunks)

0 commit comments

Comments
 (0)