Skip to content

Commit 89659b2

Browse files
jrbourbeaualimanfoo
authored andcommitted
Use uniform chunking when specifying chunks as an integer (#456)
* Updates normalize_chunks behavior for integer chunks * Return early for integer chunks case * Updates hard coded hashes * Adds item to release notes * Updates docstrings * Adds support for auto-chunking with -1 * Reference -1 in chunks in tutorial * Updates release notes * Updates s3fs.S3Map store type in tutorial
1 parent d330cdf commit 89659b2

File tree

7 files changed

+32
-22
lines changed

7 files changed

+32
-22
lines changed

docs/release.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ Release notes
44
Upcoming Release
55
----------------
66

7+
* Use uniform chunking for all dimensions when specifying ``chunks`` as an integer.
8+
Also adds support for specifying ``-1`` to chunk across an entire dimension.
9+
By :user:`James Bourbeau <jrbourbeau>`; :issue:`456`
10+
711
* Rename ``DictStore`` to ``MemoryStore``.
812
By :user:`James Bourbeau <jrbourbeau>`; :issue:`455`
913

docs/tutorial.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,7 @@ Here is an example using S3Map to read an array created previously::
791791
Order : C
792792
Read-only : False
793793
Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
794-
Store type : s3fs.mapping.S3Map
794+
Store type : fsspec.mapping.FSMap
795795
No. bytes : 21
796796
Chunks initialized : 3/3
797797
>>> z[:]
@@ -1170,8 +1170,8 @@ better performance, at least when using the Blosc compression library.
11701170
The optimal chunk shape will depend on how you want to access the data. E.g.,
11711171
for a 2-dimensional array, if you only ever take slices along the first
11721172
dimension, then chunk across the second dimenson. If you know you want to chunk
1173-
across an entire dimension you can use ``None`` within the ``chunks`` argument,
1174-
e.g.::
1173+
across an entire dimension you can use ``None`` or ``-1`` within the ``chunks``
1174+
argument, e.g.::
11751175

11761176
>>> z1 = zarr.zeros((10000, 10000), chunks=(100, None), dtype='i4')
11771177
>>> z1.chunks

zarr/creation.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ def create(shape, chunks=True, dtype=None, compressor='default',
2828
chunks : int or tuple of ints, optional
2929
Chunk shape. If True, will be guessed from `shape` and `dtype`. If
3030
False, will be set to `shape`, i.e., single chunk for the whole array.
31+
If an int, the chunk size in each dimension will be given by the value
32+
of `chunks`. Default is True.
3133
dtype : string or dtype, optional
3234
NumPy dtype.
3335
compressor : Codec, optional
@@ -369,6 +371,8 @@ def open_array(store=None, mode='a', shape=None, chunks=True, dtype=None,
369371
chunks : int or tuple of ints, optional
370372
Chunk shape. If True, will be guessed from `shape` and `dtype`. If
371373
False, will be set to `shape`, i.e., single chunk for the whole array.
374+
If an int, the chunk size in each dimension will be given by the value
375+
of `chunks`. Default is True.
372376
dtype : string or dtype, optional
373377
NumPy dtype.
374378
compressor : Codec, optional

zarr/tests/test_core.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ def test_hexdigest(self):
512512

513513
# Check basic 2-D array
514514
z = self.create_array(shape=(20, 35,), chunks=10, dtype='<i4')
515-
assert '4f797d7bdad0fa1c9fa8c80832efb891a68de104' == z.hexdigest()
515+
assert 'c7190ad2bea1e9d2e73eaa2d3ca9187be1ead261' == z.hexdigest()
516516

517517
# Check basic 1-D array with some data
518518
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')
@@ -1302,7 +1302,7 @@ def test_hexdigest(self):
13021302

13031303
# Check basic 2-D array
13041304
z = self.create_array(shape=(20, 35,), chunks=10, dtype='<i4')
1305-
assert 'dde44c72cc530bd6aae39b629eb15a2da627e5f9' == z.hexdigest()
1305+
assert '6c530b6b9d73e108cc5ee7b6be3d552cc994bdbe' == z.hexdigest()
13061306

13071307
# Check basic 1-D array with some data
13081308
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')
@@ -1357,7 +1357,7 @@ def test_hexdigest(self):
13571357

13581358
# Check basic 2-D array
13591359
z = self.create_array(shape=(20, 35,), chunks=10, dtype='<i4')
1360-
assert 'dde44c72cc530bd6aae39b629eb15a2da627e5f9' == z.hexdigest()
1360+
assert '6c530b6b9d73e108cc5ee7b6be3d552cc994bdbe' == z.hexdigest()
13611361

13621362
# Check basic 1-D array with some data
13631363
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')
@@ -1713,7 +1713,7 @@ def test_hexdigest(self):
17131713

17141714
# Check basic 2-D array
17151715
z = self.create_array(shape=(20, 35,), chunks=10, dtype='<i4')
1716-
assert '189690c5701d33a41cd7ce9aa0ac8dac49a69c51' == z.hexdigest()
1716+
assert 'ec2e008525ae09616dbc1d2408cbdb42532005c8' == z.hexdigest()
17171717

17181718
# Check basic 1-D array with some data
17191719
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')
@@ -1863,7 +1863,7 @@ def test_hexdigest(self):
18631863

18641864
# Check basic 2-D array
18651865
z = self.create_array(shape=(20, 35,), chunks=10, dtype='<i4')
1866-
assert 'de841ca276042993da53985de1e7769f5d0fc54d' == z.hexdigest()
1866+
assert 'b75eb90f68aa8ee1e29f2c542e851d3945066c54' == z.hexdigest()
18671867

18681868
# Check basic 1-D array with some data
18691869
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')
@@ -1899,7 +1899,7 @@ def test_hexdigest(self):
18991899

19001900
# Check basic 2-D array
19011901
z = self.create_array(shape=(20, 35,), chunks=10, dtype='<i4')
1902-
assert 'f57a9a73a4004490fe1b871688651b8a298a5db7' == z.hexdigest()
1902+
assert '37c7c46e5730bba37da5e518c9d75f0d774c5098' == z.hexdigest()
19031903

19041904
# Check basic 1-D array with some data
19051905
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')
@@ -1935,7 +1935,7 @@ def test_hexdigest(self):
19351935

19361936
# Check basic 2-D array
19371937
z = self.create_array(shape=(20, 35,), chunks=10, dtype='<i4')
1938-
assert 'deb675ff91dd26dba11b65aab5f19a1f21a5645b' == z.hexdigest()
1938+
assert '74ed339cfe84d544ac023d085ea0cd6a63f56c4b' == z.hexdigest()
19391939

19401940
# Check basic 1-D array with some data
19411941
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')
@@ -1978,7 +1978,7 @@ def test_hexdigest(self):
19781978

19791979
# Check basic 2-D array
19801980
z = self.create_array(shape=(20, 35,), chunks=10, dtype='<i4')
1981-
assert 'b93b163a21e8500519250a6defb821d03eb5d9e0' == z.hexdigest()
1981+
assert '9de97b5c49b38e68583ed701d7e8f4c94b6a8406' == z.hexdigest()
19821982

19831983
# Check basic 1-D array with some data
19841984
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')
@@ -2021,7 +2021,7 @@ def test_hexdigest(self):
20212021

20222022
# Check basic 2-D array
20232023
z = self.create_array(shape=(20, 35,), chunks=10, dtype='<i4')
2024-
assert '9abf3ad54413ab11855d88a5e0087cd416657e02' == z.hexdigest()
2024+
assert '7300f1eb130cff5891630038fd99c28ef23d3a01' == z.hexdigest()
20252025

20262026
# Check basic 1-D array with some data
20272027
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')

zarr/tests/test_sync.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def test_hexdigest(self):
124124

125125
# Check basic 2-D array
126126
z = self.create_array(shape=(20, 35,), chunks=10, dtype='<i4')
127-
assert 'dde44c72cc530bd6aae39b629eb15a2da627e5f9' == z.hexdigest()
127+
assert '6c530b6b9d73e108cc5ee7b6be3d552cc994bdbe' == z.hexdigest()
128128

129129
# Check basic 1-D array with some data
130130
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')
@@ -168,7 +168,7 @@ def test_hexdigest(self):
168168

169169
# Check basic 2-D array
170170
z = self.create_array(shape=(20, 35,), chunks=10, dtype='<i4')
171-
assert 'dde44c72cc530bd6aae39b629eb15a2da627e5f9' == z.hexdigest()
171+
assert '6c530b6b9d73e108cc5ee7b6be3d552cc994bdbe' == z.hexdigest()
172172

173173
# Check basic 1-D array with some data
174174
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')

zarr/tests/test_util.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def test_normalize_chunks():
2929
assert (10, 10) == normalize_chunks((10, 10), (100, 10), 1)
3030
assert (10, 10) == normalize_chunks(10, (100, 10), 1)
3131
assert (10, 10) == normalize_chunks((10, None), (100, 10), 1)
32-
assert (30, 20, 10) == normalize_chunks(30, (100, 20, 10), 1)
32+
assert (30, 30, 30) == normalize_chunks(30, (100, 20, 10), 1)
3333
assert (30, 20, 10) == normalize_chunks((30,), (100, 20, 10), 1)
3434
assert (30, 20, 10) == normalize_chunks((30, None), (100, 20, 10), 1)
3535
assert (30, 20, 10) == normalize_chunks((30, None, None), (100, 20, 10), 1)
@@ -41,8 +41,9 @@ def test_normalize_chunks():
4141
normalize_chunks((100, 10), (100,), 1)
4242

4343
# test auto-chunking
44-
chunks = normalize_chunks(None, (100,), 1)
45-
assert (100,) == chunks
44+
assert (100,) == normalize_chunks(None, (100,), 1)
45+
assert (100,) == normalize_chunks(-1, (100,), 1)
46+
assert (30, 20, 10) == normalize_chunks((30, -1, None), (100, 20, 10), 1)
4647

4748

4849
def test_is_total_slice():

zarr/util.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def normalize_chunks(chunks, shape, typesize):
128128

129129
# handle 1D convenience form
130130
if isinstance(chunks, numbers.Integral):
131-
chunks = (int(chunks),)
131+
chunks = tuple(int(chunks) for _ in shape)
132132

133133
# handle bad dimensionality
134134
if len(chunks) > len(shape):
@@ -139,11 +139,12 @@ def normalize_chunks(chunks, shape, typesize):
139139
# assume chunks across remaining dimensions
140140
chunks += shape[len(chunks):]
141141

142-
# handle None in chunks
143-
chunks = tuple(s if c is None else int(c)
144-
for s, c in zip(shape, chunks))
142+
# handle None or -1 in chunks
143+
if -1 in chunks or None in chunks:
144+
chunks = tuple(s if c == -1 or c is None else int(c)
145+
for s, c in zip(shape, chunks))
145146

146-
return chunks
147+
return tuple(chunks)
147148

148149

149150
def normalize_dtype(dtype, object_codec):

0 commit comments

Comments
 (0)