Skip to content

Commit 99f7266

Browse files
authored
Merge branch 'main' into well-known_labels
2 parents 2406ef0 + cb38926 commit 99f7266

File tree

9 files changed

+191
-70
lines changed

9 files changed

+191
-70
lines changed

changes/3368.misc.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Improved performance of reading arrays by not unnecessarily using
2+
the fill value.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,14 +352,14 @@ module = [
352352
"tests.test_store.test_fsspec",
353353
"tests.test_store.test_memory",
354354
"tests.test_codecs.test_codecs",
355+
"tests.test_metadata.*",
355356
]
356357
strict = false
357358

358359
# TODO: Move the next modules up to the strict = false section
359360
# and fix the errors
360361
[[tool.mypy.overrides]]
361362
module = [
362-
"tests.test_metadata.*",
363363
"tests.test_store.test_core",
364364
"tests.test_store.test_logging",
365365
"tests.test_store.test_object",

src/zarr/abc/codec.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def validate(
138138
"""
139139

140140
async def _decode_single(self, chunk_data: CodecOutput, chunk_spec: ArraySpec) -> CodecInput:
141-
raise NotImplementedError
141+
raise NotImplementedError # pragma: no cover
142142

143143
async def decode(
144144
self,
@@ -161,7 +161,7 @@ async def decode(
161161
async def _encode_single(
162162
self, chunk_data: CodecInput, chunk_spec: ArraySpec
163163
) -> CodecOutput | None:
164-
raise NotImplementedError
164+
raise NotImplementedError # pragma: no cover
165165

166166
async def encode(
167167
self,
@@ -242,7 +242,7 @@ async def _encode_partial_single(
242242
selection: SelectorTuple,
243243
chunk_spec: ArraySpec,
244244
) -> None:
245-
raise NotImplementedError
245+
raise NotImplementedError # pragma: no cover
246246

247247
async def encode_partial(
248248
self,
@@ -427,6 +427,11 @@ async def read(
427427
The second slice selection determines where in the output array the chunk data will be written.
428428
The ByteGetter is used to fetch the necessary bytes.
429429
The chunk spec contains information about the construction of an array from the bytes.
430+
431+
If the Store returns ``None`` for a chunk, then the chunk was not
432+
written and the implementation must set the values of that chunk (or
433+
``out``) to the fill value for the array.
434+
430435
out : NDBuffer
431436
"""
432437
...

src/zarr/codecs/sharding.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -451,11 +451,10 @@ async def _decode_single(
451451
)
452452

453453
# setup output array
454-
out = chunk_spec.prototype.nd_buffer.create(
454+
out = chunk_spec.prototype.nd_buffer.empty(
455455
shape=shard_shape,
456456
dtype=shard_spec.dtype.to_native_dtype(),
457457
order=shard_spec.order,
458-
fill_value=0,
459458
)
460459
shard_dict = await _ShardReader.from_bytes(shard_bytes, self, chunks_per_shard)
461460

@@ -498,11 +497,10 @@ async def _decode_partial_single(
498497
)
499498

500499
# setup output array
501-
out = shard_spec.prototype.nd_buffer.create(
500+
out = shard_spec.prototype.nd_buffer.empty(
502501
shape=indexer.shape,
503502
dtype=shard_spec.dtype.to_native_dtype(),
504503
order=shard_spec.order,
505-
fill_value=0,
506504
)
507505

508506
indexed_chunks = list(indexer)

src/zarr/core/array.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import json
44
import warnings
55
from asyncio import gather
6-
from collections.abc import Iterable
6+
from collections.abc import Iterable, Mapping
77
from dataclasses import dataclass, field, replace
88
from itertools import starmap
99
from logging import getLogger
@@ -1349,11 +1349,10 @@ async def _get_selection(
13491349
f"shape of out argument doesn't match. Expected {indexer.shape}, got {out.shape}"
13501350
)
13511351
else:
1352-
out_buffer = prototype.nd_buffer.create(
1352+
out_buffer = prototype.nd_buffer.empty(
13531353
shape=indexer.shape,
13541354
dtype=out_dtype,
13551355
order=self.order,
1356-
fill_value=self.metadata.fill_value,
13571356
)
13581357
if product(indexer.shape) > 0:
13591358
# need to use the order from the metadata for v2
@@ -3908,7 +3907,7 @@ def _build_parents(
39083907

39093908
CompressorsLike: TypeAlias = (
39103909
Iterable[dict[str, JSON] | BytesBytesCodec | Numcodec]
3911-
| dict[str, JSON]
3910+
| Mapping[str, JSON]
39123911
| BytesBytesCodec
39133912
| Numcodec
39143913
| Literal["auto"]
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
from __future__ import annotations
2+
3+
import json
4+
from dataclasses import dataclass
5+
from typing import TYPE_CHECKING
6+
7+
import numpy as np
8+
import pytest
9+
10+
from zarr import create_array
11+
from zarr.api.asynchronous import _get_shape_chunks, _like_args, open
12+
from zarr.core.buffer.core import default_buffer_prototype
13+
14+
if TYPE_CHECKING:
15+
from typing import Any
16+
17+
import numpy.typing as npt
18+
19+
from zarr.core.array import Array, AsyncArray
20+
from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
21+
22+
23+
@dataclass
24+
class WithShape:
25+
shape: tuple[int, ...]
26+
27+
28+
@dataclass
29+
class WithChunks(WithShape):
30+
chunks: tuple[int, ...]
31+
32+
33+
@dataclass
34+
class WithChunkLen(WithShape):
35+
chunklen: int
36+
37+
38+
@pytest.mark.parametrize(
39+
("observed", "expected"),
40+
[
41+
({}, (None, None)),
42+
(WithShape(shape=(1, 2)), ((1, 2), None)),
43+
(WithChunks(shape=(1, 2), chunks=(1, 2)), ((1, 2), (1, 2))),
44+
(WithChunkLen(shape=(10, 10), chunklen=1), ((10, 10), (1, 10))),
45+
],
46+
)
47+
def test_get_shape_chunks(
48+
observed: object, expected: tuple[tuple[int, ...] | None, tuple[int, ...] | None]
49+
) -> None:
50+
"""
51+
Test the _get_shape_chunks function
52+
"""
53+
assert _get_shape_chunks(observed) == expected
54+
55+
56+
@pytest.mark.parametrize(
57+
("observed", "expected"),
58+
[
59+
(np.arange(10, dtype=np.dtype("int64")), {"shape": (10,), "dtype": np.dtype("int64")}),
60+
(WithChunks(shape=(1, 2), chunks=(1, 2)), {"chunks": (1, 2), "shape": (1, 2)}),
61+
(
62+
create_array(
63+
{},
64+
chunks=(10,),
65+
shape=(100,),
66+
dtype="f8",
67+
compressors=None,
68+
filters=None,
69+
zarr_format=2,
70+
)._async_array,
71+
{
72+
"chunks": (10,),
73+
"shape": (100,),
74+
"dtype": np.dtype("f8"),
75+
"compressor": None,
76+
"filters": None,
77+
"order": "C",
78+
},
79+
),
80+
],
81+
)
82+
def test_like_args(
83+
observed: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | Array | npt.NDArray[Any],
84+
expected: object,
85+
) -> None:
86+
"""
87+
Test the like_args function
88+
"""
89+
assert _like_args(observed, {}) == expected
90+
91+
92+
async def test_open_no_array() -> None:
93+
"""
94+
Test that zarr.api.asynchronous.open attempts to open a group when no array is found, but shape was specified in kwargs.
95+
This behavior makes no sense but we should still test it.
96+
"""
97+
store = {
98+
"zarr.json": default_buffer_prototype().buffer.from_bytes(
99+
json.dumps({"zarr_format": 3, "node_type": "group"}).encode("utf-8")
100+
)
101+
}
102+
with pytest.raises(
103+
TypeError, match=r"open_group\(\) got an unexpected keyword argument 'shape'"
104+
):
105+
await open(store=store, shape=(1,))

0 commit comments

Comments
 (0)