diff --git a/changes/3103.bugfix.rst b/changes/3103.bugfix.rst new file mode 100644 index 0000000000..93aecce908 --- /dev/null +++ b/changes/3103.bugfix.rst @@ -0,0 +1,7 @@ +When creating arrays without explicitly specifying a chunk size using `zarr.create` and other +array creation routines, the chunk size will now set automatically instead of defaulting to the data shape. +For large arrays this will result in smaller default chunk sizes. +To retain previous behaviour, explicitly set the chunk shape to the data shape. + +This fix matches the existing chunking behaviour of +`zarr.save_array` and `zarr.api.asynchronous.AsyncArray.create`. diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index aae7d28d15..b262ced29b 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -992,19 +992,11 @@ async def create( ) if zarr_format == 2: - if chunks is None: - chunks = shape dtype = parse_dtype(dtype, zarr_format) if not filters: filters = _default_filters(dtype) if compressor == "auto": compressor = _default_compressor(dtype) - elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr] - if chunks is not None: - chunk_shape = chunks - chunks = None - else: - chunk_shape = shape if synchronizer is not None: warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2) diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index d3e40c26ed..b5a581b8a4 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -64,6 +64,9 @@ def _guess_chunks( if isinstance(shape, int): shape = (shape,) + if typesize == 0: + return shape + ndims = len(shape) # require chunks to have non-zero length for all dimensions chunks = np.maximum(np.array(shape, dtype="=f8"), 1) diff --git a/tests/test_api.py b/tests/test_api.py index 640478e9c1..8cd4ab6b60 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1301,3 +1301,41 @@ def test_no_overwrite_load(tmp_path: Path) -> None: with contextlib.suppress(NotImplementedError): zarr.load(store) assert existing_fpath.exists() + + +@pytest.mark.parametrize( + "f", + [ + zarr.array, + zarr.create, + zarr.create_array, + zarr.ones, + zarr.ones_like, + zarr.empty, + zarr.empty_like, + zarr.full, + zarr.full_like, + zarr.zeros, + zarr.zeros_like, + ], +) +def test_auto_chunks(f: Callable[..., Array]) -> None: + # Make sure chunks are set automatically across the public API + # TODO: test shards with this test too + shape = (1000, 1000) + dtype = np.uint8 + kwargs = {"shape": shape, "dtype": dtype} + array = np.zeros(shape, dtype=dtype) + store = zarr.storage.MemoryStore() + + if f in [zarr.full, zarr.full_like]: + kwargs["fill_value"] = 0 + if f in [zarr.array]: + kwargs["data"] = array + if f in [zarr.empty_like, zarr.full_like, zarr.empty_like, zarr.ones_like, zarr.zeros_like]: + kwargs["a"] = array + if f in [zarr.create_array]: + kwargs["store"] = store + + a = f(**kwargs) + assert a.chunks == (500, 500)