Merge branch 'main' into well-known_labels

d-v-b · web-flow · commit 99f7266da9de · 2025-08-21T09:36:33.000+02:00
diff --git a/changes/3368.misc.rst b/changes/3368.misc.rst
@@ -0,0 +1,2 @@
+Improved performance of reading arrays by not unnecessarily using
+the fill value.
diff --git a/pyproject.toml b/pyproject.toml
@@ -352,14 +352,14 @@ module = [
     "tests.test_store.test_fsspec",
     "tests.test_store.test_memory",
     "tests.test_codecs.test_codecs",
+    "tests.test_metadata.*",
 ]
 strict = false
 
 # TODO: Move the next modules up to the strict = false section
 # and fix the errors
 [[tool.mypy.overrides]]
 module = [
-    "tests.test_metadata.*",
     "tests.test_store.test_core",
     "tests.test_store.test_logging",
     "tests.test_store.test_object",
diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py
@@ -138,7 +138,7 @@ def validate(
         """
 
     async def _decode_single(self, chunk_data: CodecOutput, chunk_spec: ArraySpec) -> CodecInput:
-        raise NotImplementedError
+        raise NotImplementedError  # pragma: no cover
 
     async def decode(
         self,
@@ -161,7 +161,7 @@ async def decode(
     async def _encode_single(
         self, chunk_data: CodecInput, chunk_spec: ArraySpec
     ) -> CodecOutput | None:
-        raise NotImplementedError
+        raise NotImplementedError  # pragma: no cover
 
     async def encode(
         self,
@@ -242,7 +242,7 @@ async def _encode_partial_single(
         selection: SelectorTuple,
         chunk_spec: ArraySpec,
     ) -> None:
-        raise NotImplementedError
+        raise NotImplementedError  # pragma: no cover
 
     async def encode_partial(
         self,
@@ -427,6 +427,11 @@ async def read(
             The second slice selection determines where in the output array the chunk data will be written.
             The ByteGetter is used to fetch the necessary bytes.
             The chunk spec contains information about the construction of an array from the bytes.
+
+            If the Store returns ``None`` for a chunk, then the chunk was not
+            written and the implementation must set the values of that chunk (or
+            ``out``) to the fill value for the array.
+
         out : NDBuffer
         """
         ...
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
@@ -451,11 +451,10 @@ async def _decode_single(
         )
 
         # setup output array
-        out = chunk_spec.prototype.nd_buffer.create(
+        out = chunk_spec.prototype.nd_buffer.empty(
             shape=shard_shape,
             dtype=shard_spec.dtype.to_native_dtype(),
             order=shard_spec.order,
-            fill_value=0,
         )
         shard_dict = await _ShardReader.from_bytes(shard_bytes, self, chunks_per_shard)
 
@@ -498,11 +497,10 @@ async def _decode_partial_single(
         )
 
         # setup output array
-        out = shard_spec.prototype.nd_buffer.create(
+        out = shard_spec.prototype.nd_buffer.empty(
             shape=indexer.shape,
             dtype=shard_spec.dtype.to_native_dtype(),
             order=shard_spec.order,
-            fill_value=0,
         )
 
         indexed_chunks = list(indexer)
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
@@ -3,7 +3,7 @@
 import json
 import warnings
 from asyncio import gather
-from collections.abc import Iterable
+from collections.abc import Iterable, Mapping
 from dataclasses import dataclass, field, replace
 from itertools import starmap
 from logging import getLogger
@@ -1349,11 +1349,10 @@ async def _get_selection(
                     f"shape of out argument doesn't match. Expected {indexer.shape}, got {out.shape}"
                 )
         else:
-            out_buffer = prototype.nd_buffer.create(
+            out_buffer = prototype.nd_buffer.empty(
                 shape=indexer.shape,
                 dtype=out_dtype,
                 order=self.order,
-                fill_value=self.metadata.fill_value,
             )
         if product(indexer.shape) > 0:
             # need to use the order from the metadata for v2
@@ -3908,7 +3907,7 @@ def _build_parents(
 
 CompressorsLike: TypeAlias = (
     Iterable[dict[str, JSON] | BytesBytesCodec | Numcodec]
-    | dict[str, JSON]
+    | Mapping[str, JSON]
     | BytesBytesCodec
     | Numcodec
     | Literal["auto"]
diff --git a/tests/test_api/test_asynchronous.py b/tests/test_api/test_asynchronous.py
@@ -0,0 +1,105 @@
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+import numpy as np
+import pytest
+
+from zarr import create_array
+from zarr.api.asynchronous import _get_shape_chunks, _like_args, open
+from zarr.core.buffer.core import default_buffer_prototype
+
+if TYPE_CHECKING:
+    from typing import Any
+
+    import numpy.typing as npt
+
+    from zarr.core.array import Array, AsyncArray
+    from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
+
+
+@dataclass
+class WithShape:
+    shape: tuple[int, ...]
+
+
+@dataclass
+class WithChunks(WithShape):
+    chunks: tuple[int, ...]
+
+
+@dataclass
+class WithChunkLen(WithShape):
+    chunklen: int
+
+
+@pytest.mark.parametrize(
+    ("observed", "expected"),
+    [
+        ({}, (None, None)),
+        (WithShape(shape=(1, 2)), ((1, 2), None)),
+        (WithChunks(shape=(1, 2), chunks=(1, 2)), ((1, 2), (1, 2))),
+        (WithChunkLen(shape=(10, 10), chunklen=1), ((10, 10), (1, 10))),
+    ],
+)
+def test_get_shape_chunks(
+    observed: object, expected: tuple[tuple[int, ...] | None, tuple[int, ...] | None]
+) -> None:
+    """
+    Test the _get_shape_chunks function
+    """
+    assert _get_shape_chunks(observed) == expected
+
+
+@pytest.mark.parametrize(
+    ("observed", "expected"),
+    [
+        (np.arange(10, dtype=np.dtype("int64")), {"shape": (10,), "dtype": np.dtype("int64")}),
+        (WithChunks(shape=(1, 2), chunks=(1, 2)), {"chunks": (1, 2), "shape": (1, 2)}),
+        (
+            create_array(
+                {},
+                chunks=(10,),
+                shape=(100,),
+                dtype="f8",
+                compressors=None,
+                filters=None,
+                zarr_format=2,
+            )._async_array,
+            {
+                "chunks": (10,),
+                "shape": (100,),
+                "dtype": np.dtype("f8"),
+                "compressor": None,
+                "filters": None,
+                "order": "C",
+            },
+        ),
+    ],
+)
+def test_like_args(
+    observed: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | Array | npt.NDArray[Any],
+    expected: object,
+) -> None:
+    """
+    Test the like_args function
+    """
+    assert _like_args(observed, {}) == expected
+
+
+async def test_open_no_array() -> None:
+    """
+    Test that zarr.api.asynchronous.open attempts to open a group when no array is found, but shape was specified in kwargs.
+    This behavior makes no sense but we should still test it.
+    """
+    store = {
+        "zarr.json": default_buffer_prototype().buffer.from_bytes(
+            json.dumps({"zarr_format": 3, "node_type": "group"}).encode("utf-8")
+        )
+    }
+    with pytest.raises(
+        TypeError, match=r"open_group\(\) got an unexpected keyword argument 'shape'"
+    ):
+        await open(store=store, shape=(1,))
diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py
diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+Improved performance of reading arrays by not unnecessarily using`
	`2`	`+the fill value.`
Original file line number	Diff line number	Diff line change
`@@ -451,11 +451,10 @@ async def _decode_single(`
`451`	`451`	`)`
`452`	`452`
`453`	`453`	`# setup output array`
`454`		`- out = chunk_spec.prototype.nd_buffer.create(`
	`454`	`+ out = chunk_spec.prototype.nd_buffer.empty(`
`455`	`455`	`shape=shard_shape,`
`456`	`456`	`dtype=shard_spec.dtype.to_native_dtype(),`
`457`	`457`	`order=shard_spec.order,`
`458`		`- fill_value=0,`
`459`	`458`	`)`
`460`	`459`	`shard_dict = await _ShardReader.from_bytes(shard_bytes, self, chunks_per_shard)`
`461`	`460`
`@@ -498,11 +497,10 @@ async def _decode_partial_single(`
`498`	`497`	`)`
`499`	`498`
`500`	`499`	`# setup output array`
`501`		`- out = shard_spec.prototype.nd_buffer.create(`
	`500`	`+ out = shard_spec.prototype.nd_buffer.empty(`
`502`	`501`	`shape=indexer.shape,`
`503`	`502`	`dtype=shard_spec.dtype.to_native_dtype(),`
`504`	`503`	`order=shard_spec.order,`
`505`		`- fill_value=0,`
`506`	`504`	`)`
`507`	`505`
`508`	`506`	`indexed_chunks = list(indexer)`