Skip to content

Commit 8b77464

Browse files
brokkoli71normanrzjhammandstansby
authored
Deterministic chunk padding (#2755)
* test deterministic memory store * deterministic memory store * simplify test * document changes * Update src/zarr/core/buffer/cpu.py Co-authored-by: Joe Hamman <[email protected]> * lint * handle fill_value==None * better test * improve changes documentation Co-authored-by: David Stansby <[email protected]> * update docstrings * document changed `zarr.empty` * add notes to empty() and empty_like() --------- Co-authored-by: Norman Rzepka <[email protected]> Co-authored-by: Joe Hamman <[email protected]> Co-authored-by: David Stansby <[email protected]>
1 parent f4278a5 commit 8b77464

File tree

6 files changed

+67
-13
lines changed

6 files changed

+67
-13
lines changed

changes/2755.bugfix.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
The array returned by ``zarr.empty`` and an empty ``zarr.core.buffer.cpu.NDBuffer`` will now be filled with the
2+
specified fill value, or with zeros if no fill value is provided.
3+
This fixes a bug where Zarr format 2 data with no fill value was written with un-predictable chunk sizes.

src/zarr/api/asynchronous.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,7 +1065,8 @@ async def create(
10651065
async def empty(
10661066
shape: ChunkCoords, **kwargs: Any
10671067
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
1068-
"""Create an empty array.
1068+
"""Create an empty array with the specified shape. The contents will be filled with the
1069+
array's fill value or zeros if no fill value is provided.
10691070
10701071
Parameters
10711072
----------
@@ -1087,7 +1088,8 @@ async def empty(
10871088
async def empty_like(
10881089
a: ArrayLike, **kwargs: Any
10891090
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
1090-
"""Create an empty array like `a`.
1091+
"""Create an empty array like `a`. The contents will be filled with the
1092+
array's fill value or zeros if no fill value is provided.
10911093
10921094
Parameters
10931095
----------
@@ -1100,6 +1102,12 @@ async def empty_like(
11001102
-------
11011103
Array
11021104
The new array.
1105+
1106+
Notes
1107+
-----
1108+
The contents of an empty Zarr array are not defined. On attempting to
1109+
retrieve data from an empty Zarr array, any values may be returned,
1110+
and these are not guaranteed to be stable from one access to the next.
11031111
"""
11041112
like_kwargs = _like_args(a, kwargs)
11051113
return await empty(**like_kwargs)

src/zarr/api/synchronous.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -902,7 +902,8 @@ def create_array(
902902

903903
# TODO: add type annotations for kwargs
904904
def empty(shape: ChunkCoords, **kwargs: Any) -> Array:
905-
"""Create an empty array.
905+
"""Create an empty array with the specified shape. The contents will be filled with the
906+
array's fill value or zeros if no fill value is provided.
906907
907908
Parameters
908909
----------
@@ -928,7 +929,8 @@ def empty(shape: ChunkCoords, **kwargs: Any) -> Array:
928929
# TODO: move ArrayLike to common module
929930
# TODO: add type annotations for kwargs
930931
def empty_like(a: ArrayLike, **kwargs: Any) -> Array:
931-
"""Create an empty array like another array.
932+
"""Create an empty array like another array. The contents will be filled with the
933+
array's fill value or zeros if no fill value is provided.
932934
933935
Parameters
934936
----------
@@ -941,6 +943,12 @@ def empty_like(a: ArrayLike, **kwargs: Any) -> Array:
941943
-------
942944
Array
943945
The new array.
946+
947+
Notes
948+
-----
949+
The contents of an empty Zarr array are not defined. On attempting to
950+
retrieve data from an empty Zarr array, any values may be returned,
951+
and these are not guaranteed to be stable from one access to the next.
944952
"""
945953
return Array(sync(async_api.empty_like(a, **kwargs)))
946954

src/zarr/core/buffer/cpu.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,10 +154,10 @@ def create(
154154
order: Literal["C", "F"] = "C",
155155
fill_value: Any | None = None,
156156
) -> Self:
157-
ret = cls(np.empty(shape=tuple(shape), dtype=dtype, order=order))
158-
if fill_value is not None:
159-
ret.fill(fill_value)
160-
return ret
157+
if fill_value is None:
158+
return cls(np.zeros(shape=tuple(shape), dtype=dtype, order=order))
159+
else:
160+
return cls(np.full(shape=tuple(shape), fill_value=fill_value, dtype=dtype, order=order))
161161

162162
@classmethod
163163
def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self:

src/zarr/core/group.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1498,7 +1498,8 @@ async def tree(self, expand: bool | None = None, level: int | None = None) -> An
14981498
async def empty(
14991499
self, *, name: str, shape: ChunkCoords, **kwargs: Any
15001500
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
1501-
"""Create an empty array in this Group.
1501+
"""Create an empty array with the specified shape in this Group. The contents will
1502+
be filled with the array's fill value or zeros if no fill value is provided.
15021503
15031504
Parameters
15041505
----------
@@ -1515,7 +1516,6 @@ async def empty(
15151516
retrieve data from an empty Zarr array, any values may be returned,
15161517
and these are not guaranteed to be stable from one access to the next.
15171518
"""
1518-
15191519
return await async_api.empty(shape=shape, store=self.store_path, path=name, **kwargs)
15201520

15211521
async def zeros(
@@ -1592,7 +1592,8 @@ async def full(
15921592
async def empty_like(
15931593
self, *, name: str, data: async_api.ArrayLike, **kwargs: Any
15941594
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
1595-
"""Create an empty sub-array like `data`.
1595+
"""Create an empty sub-array like `data`. The contents will be filled with
1596+
the array's fill value or zeros if no fill value is provided.
15961597
15971598
Parameters
15981599
----------
@@ -2442,7 +2443,8 @@ def require_array(self, name: str, *, shape: ShapeLike, **kwargs: Any) -> Array:
24422443

24432444
@_deprecate_positional_args
24442445
def empty(self, *, name: str, shape: ChunkCoords, **kwargs: Any) -> Array:
2445-
"""Create an empty array in this Group.
2446+
"""Create an empty array with the specified shape in this Group. The contents will be filled with
2447+
the array's fill value or zeros if no fill value is provided.
24462448
24472449
Parameters
24482450
----------
@@ -2531,7 +2533,8 @@ def full(
25312533

25322534
@_deprecate_positional_args
25332535
def empty_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> Array:
2534-
"""Create an empty sub-array like `data`.
2536+
"""Create an empty sub-array like `data`. The contents will be filled
2537+
with the array's fill value or zeros if no fill value is provided.
25352538
25362539
Parameters
25372540
----------
@@ -2546,6 +2549,12 @@ def empty_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) ->
25462549
-------
25472550
Array
25482551
The new array.
2552+
2553+
Notes
2554+
-----
2555+
The contents of an empty Zarr array are not defined. On attempting to
2556+
retrieve data from an empty Zarr array, any values may be returned,
2557+
and these are not guaranteed to be stable from one access to the next.
25492558
"""
25502559
return Array(self._sync(self._async_group.empty_like(name=name, data=data, **kwargs)))
25512560

tests/test_store/test_memory.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,19 @@
11
from __future__ import annotations
22

3+
from typing import TYPE_CHECKING
4+
5+
import numpy as np
36
import pytest
47

8+
import zarr
59
from zarr.core.buffer import Buffer, cpu, gpu
610
from zarr.storage import GpuMemoryStore, MemoryStore
711
from zarr.testing.store import StoreTests
812
from zarr.testing.utils import gpu_test
913

14+
if TYPE_CHECKING:
15+
from zarr.core.common import ZarrFormat
16+
1017

1118
class TestMemoryStore(StoreTests[MemoryStore, cpu.Buffer]):
1219
store_cls = MemoryStore
@@ -46,6 +53,25 @@ def test_store_supports_partial_writes(self, store: MemoryStore) -> None:
4653
def test_list_prefix(self, store: MemoryStore) -> None:
4754
assert True
4855

56+
@pytest.mark.parametrize("dtype", ["uint8", "float32", "int64"])
57+
@pytest.mark.parametrize("zarr_format", [2, 3])
58+
async def test_deterministic_size(
59+
self, store: MemoryStore, dtype, zarr_format: ZarrFormat
60+
) -> None:
61+
a = zarr.empty(
62+
store=store,
63+
shape=(3,),
64+
chunks=(1000,),
65+
dtype=dtype,
66+
zarr_format=zarr_format,
67+
overwrite=True,
68+
)
69+
a[...] = 1
70+
a.resize((1000,))
71+
72+
np.testing.assert_array_equal(a[:3], 1)
73+
np.testing.assert_array_equal(a[3:], 0)
74+
4975

5076
@gpu_test
5177
class TestGpuMemoryStore(StoreTests[GpuMemoryStore, gpu.Buffer]):

0 commit comments

Comments
 (0)