Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions changes/3103.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
When creating arrays without explicitly specifying a chunk size using `zarr.create` and other
array creation routines, the chunk size will now set automatically instead of defaulting to the data shape.
For large arrays this will result in smaller default chunk sizes.
To retain previous behaviour, explicitly set the chunk shape to the data shape.

This fix matches the existing chunking behaviour of
`zarr.save_array` and `zarr.api.asynchronous.AsyncArray.create`.
8 changes: 0 additions & 8 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -992,19 +992,11 @@ async def create(
)

if zarr_format == 2:
if chunks is None:
chunks = shape
dtype = parse_dtype(dtype, zarr_format)
if not filters:
filters = _default_filters(dtype)
if compressor == "auto":
compressor = _default_compressor(dtype)
elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr]
if chunks is not None:
chunk_shape = chunks
chunks = None
else:
chunk_shape = shape

if synchronizer is not None:
warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2)
Expand Down
3 changes: 3 additions & 0 deletions src/zarr/core/chunk_grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@
if isinstance(shape, int):
shape = (shape,)

if typesize == 0:
return shape

Check warning on line 68 in src/zarr/core/chunk_grids.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/chunk_grids.py#L68

Added line #L68 was not covered by tests

ndims = len(shape)
# require chunks to have non-zero length for all dimensions
chunks = np.maximum(np.array(shape, dtype="=f8"), 1)
Expand Down
38 changes: 38 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1301,3 +1301,41 @@ def test_no_overwrite_load(tmp_path: Path) -> None:
with contextlib.suppress(NotImplementedError):
zarr.load(store)
assert existing_fpath.exists()


@pytest.mark.parametrize(
"f",
[
zarr.array,
zarr.create,
zarr.create_array,
zarr.ones,
zarr.ones_like,
zarr.empty,
zarr.empty_like,
zarr.full,
zarr.full_like,
zarr.zeros,
zarr.zeros_like,
],
)
def test_auto_chunks(f: Callable[..., Array]) -> None:
# Make sure chunks are set automatically across the public API
# TODO: test shards with this test too
shape = (1000, 1000)
dtype = np.uint8
kwargs = {"shape": shape, "dtype": dtype}
array = np.zeros(shape, dtype=dtype)
store = zarr.storage.MemoryStore()

if f in [zarr.full, zarr.full_like]:
kwargs["fill_value"] = 0
if f in [zarr.array]:
kwargs["data"] = array
if f in [zarr.empty_like, zarr.full_like, zarr.empty_like, zarr.ones_like, zarr.zeros_like]:
kwargs["a"] = array
if f in [zarr.create_array]:
kwargs["store"] = store

a = f(**kwargs)
assert a.chunks == (500, 500)