Skip to content

Commit c31f8a1

Browse files
committed
Merge remote-tracking branch 'upstream/v3' into user/tom/feature/consolidated-metadata
2 parents f7e5b3f + a24e194 commit c31f8a1

File tree

14 files changed

+122
-123
lines changed

14 files changed

+122
-123
lines changed

.github/workflows/releases.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ jobs:
5555
with:
5656
name: releases
5757
path: dist
58-
- uses: pypa/[email protected].2
58+
- uses: pypa/[email protected].3
5959
with:
6060
user: __token__
6161
password: ${{ secrets.pypi_password }}

docs/_static/custom.css

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618
7474
.sd-card .sd-card-header {
7575
border: none;
7676
background-color: white;
77-
color: #150458 !important;
7877
font-size: var(--pst-font-size-h5);
7978
font-weight: bold;
8079
padding: 2.5rem 0rem 0.5rem 0rem;
@@ -107,7 +106,6 @@ html[data-theme=dark] .sd-shadow-sm {
107106

108107
html[data-theme=dark] .sd-card .sd-card-header {
109108
background-color:var(--pst-color-background);
110-
color: #150458 !important;
111109
}
112110

113111
html[data-theme=dark] .sd-card .sd-card-footer {

docs/_static/custom.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
"highlights": "getting_started.html#highlights",
77
"contributing": "contributing.html",
88
"projects-using-zarr": "getting_started.html#projects-using-zarr",
9-
"acknowledgments": "acknowledgments.html",
109
"contents": "getting_started.html#contents",
1110
"indices-and-tables": "api.html#indices-and-tables"
1211
}

docs/acknowledgments.rst

Lines changed: 0 additions & 76 deletions
This file was deleted.

docs/conf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,8 @@ def setup(app: sphinx.application.Sphinx) -> None:
255255
# Output file base name for HTML help builder.
256256
htmlhelp_basename = "zarrdoc"
257257

258+
maximum_signature_line_length = 80
259+
258260
# -- Options for LaTeX output ---------------------------------------------
259261

260262
latex_elements = {

docs/index.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ Zarr-Python
1515
spec
1616
release
1717
license
18-
acknowledgments
1918
contributing
2019

2120
**Version**: |version|

src/zarr/abc/codec.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from zarr.core.indexing import SelectorTuple
2121

2222
__all__ = [
23+
"BaseCodec",
2324
"ArrayArrayCodec",
2425
"ArrayBytesCodec",
2526
"ArrayBytesCodecPartialDecodeMixin",
@@ -34,11 +35,15 @@
3435
CodecOutput = TypeVar("CodecOutput", bound=NDBuffer | Buffer)
3536

3637

37-
class _Codec(Metadata, Generic[CodecInput, CodecOutput]):
38+
class BaseCodec(Metadata, Generic[CodecInput, CodecOutput]):
3839
"""Generic base class for codecs.
39-
Please use ArrayArrayCodec, ArrayBytesCodec or BytesBytesCodec for subclassing.
4040
4141
Codecs can be registered via zarr.codecs.registry.
42+
43+
Warnings
44+
--------
45+
This class is not intended to be directly, please use
46+
ArrayArrayCodec, ArrayBytesCodec or BytesBytesCodec for subclassing.
4247
"""
4348

4449
is_fixed_size: bool
@@ -148,19 +153,19 @@ async def encode(
148153
return await _batching_helper(self._encode_single, chunks_and_specs)
149154

150155

151-
class ArrayArrayCodec(_Codec[NDBuffer, NDBuffer]):
156+
class ArrayArrayCodec(BaseCodec[NDBuffer, NDBuffer]):
152157
"""Base class for array-to-array codecs."""
153158

154159
...
155160

156161

157-
class ArrayBytesCodec(_Codec[NDBuffer, Buffer]):
162+
class ArrayBytesCodec(BaseCodec[NDBuffer, Buffer]):
158163
"""Base class for array-to-bytes codecs."""
159164

160165
...
161166

162167

163-
class BytesBytesCodec(_Codec[Buffer, Buffer]):
168+
class BytesBytesCodec(BaseCodec[Buffer, Buffer]):
164169
"""Base class for bytes-to-bytes codecs."""
165170

166171
...

src/zarr/core/group.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import json
66
import logging
77
from collections import defaultdict
8-
from dataclasses import asdict, dataclass, field, replace
8+
from dataclasses import asdict, dataclass, field, fields, replace
99
from enum import Enum
1010
from typing import TYPE_CHECKING, Literal, cast, overload
1111

@@ -391,6 +391,15 @@ def from_dict(cls, data: dict[str, Any]) -> GroupMetadata:
391391
consolidated_metadata = data.pop("consolidated_metadata", None)
392392
if consolidated_metadata:
393393
data["consolidated_metadata"] = ConsolidatedMetadata.from_dict(consolidated_metadata)
394+
395+
zarr_format = data.get("zarr_format")
396+
if zarr_format == 2 or zarr_format is None:
397+
# zarr v2 allowed arbitrary keys here.
398+
# We don't want the GroupMetadata constructor to fail just because someone put an
399+
# extra key in the metadata.
400+
expected = {x.name for x in fields(cls)}
401+
data = {k: v for k, v in data.items() if k in expected}
402+
394403
return cls(**data)
395404

396405
def to_dict(self) -> dict[str, Any]:

src/zarr/core/metadata/v2.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from zarr.core.common import JSON, ChunkCoords
1414

1515
import json
16-
from dataclasses import dataclass, field, replace
16+
from dataclasses import dataclass, field, fields, replace
1717

1818
import numcodecs
1919
import numpy as np
@@ -140,6 +140,17 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
140140
_data = data.copy()
141141
# check that the zarr_format attribute is correct
142142
_ = parse_zarr_format(_data.pop("zarr_format"))
143+
144+
# zarr v2 allowed arbitrary keys here.
145+
# We don't want the ArrayV2Metadata constructor to fail just because someone put an
146+
# extra key in the metadata.
147+
expected = {x.name for x in fields(cls)}
148+
# https://github.com/zarr-developers/zarr-python/issues/2269
149+
# handle the renames
150+
expected |= {"dtype", "chunks"}
151+
152+
_data = {k: v for k, v in _data.items() if k in expected}
153+
143154
return cls(**_data)
144155

145156
def to_dict(self) -> dict[str, JSON]:

src/zarr/core/metadata/v3.py

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
if TYPE_CHECKING:
99
from typing import Self
1010

11-
import numpy.typing as npt
12-
1311
from zarr.core.buffer import Buffer, BufferPrototype
1412
from zarr.core.chunk_grids import ChunkGrid
1513
from zarr.core.common import JSON, ChunkCoords
@@ -22,6 +20,7 @@
2220

2321
import numcodecs.abc
2422
import numpy as np
23+
import numpy.typing as npt
2524

2625
from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
2726
from zarr.core.array_spec import ArraySpec
@@ -38,6 +37,8 @@
3837
from zarr.core.metadata.common import ArrayMetadata, parse_attributes
3938
from zarr.registry import get_codec_class
4039

40+
DEFAULT_DTYPE = "float64"
41+
4142

4243
def parse_zarr_format(data: object) -> Literal[3]:
4344
if data == 3:
@@ -159,7 +160,7 @@ def _replace_special_floats(obj: object) -> Any:
159160
@dataclass(frozen=True, kw_only=True)
160161
class ArrayV3Metadata(ArrayMetadata):
161162
shape: ChunkCoords
162-
data_type: np.dtype[Any]
163+
data_type: DataType
163164
chunk_grid: ChunkGrid
164165
chunk_key_encoding: ChunkKeyEncoding
165166
fill_value: Any
@@ -174,7 +175,7 @@ def __init__(
174175
self,
175176
*,
176177
shape: Iterable[int],
177-
data_type: npt.DTypeLike,
178+
data_type: npt.DTypeLike | DataType,
178179
chunk_grid: dict[str, JSON] | ChunkGrid,
179180
chunk_key_encoding: dict[str, JSON] | ChunkKeyEncoding,
180181
fill_value: Any,
@@ -187,18 +188,18 @@ def __init__(
187188
Because the class is a frozen dataclass, we set attributes using object.__setattr__
188189
"""
189190
shape_parsed = parse_shapelike(shape)
190-
data_type_parsed = parse_dtype(data_type)
191+
data_type_parsed = DataType.parse(data_type)
191192
chunk_grid_parsed = ChunkGrid.from_dict(chunk_grid)
192193
chunk_key_encoding_parsed = ChunkKeyEncoding.from_dict(chunk_key_encoding)
193194
dimension_names_parsed = parse_dimension_names(dimension_names)
194-
fill_value_parsed = parse_fill_value(fill_value, dtype=data_type_parsed)
195+
fill_value_parsed = parse_fill_value(fill_value, dtype=data_type_parsed.to_numpy())
195196
attributes_parsed = parse_attributes(attributes)
196197
codecs_parsed_partial = parse_codecs(codecs)
197198
storage_transformers_parsed = parse_storage_transformers(storage_transformers)
198199

199200
array_spec = ArraySpec(
200201
shape=shape_parsed,
201-
dtype=data_type_parsed,
202+
dtype=data_type_parsed.to_numpy(),
202203
fill_value=fill_value_parsed,
203204
order="C", # TODO: order is not needed here.
204205
prototype=default_buffer_prototype(), # TODO: prototype is not needed here.
@@ -231,11 +232,14 @@ def _validate_metadata(self) -> None:
231232
if self.fill_value is None:
232233
raise ValueError("`fill_value` is required.")
233234
for codec in self.codecs:
234-
codec.validate(shape=self.shape, dtype=self.data_type, chunk_grid=self.chunk_grid)
235+
codec.validate(
236+
shape=self.shape, dtype=self.data_type.to_numpy(), chunk_grid=self.chunk_grid
237+
)
235238

236239
@property
237240
def dtype(self) -> np.dtype[Any]:
238-
return self.data_type
241+
"""Interpret Zarr dtype as NumPy dtype"""
242+
return self.data_type.to_numpy()
239243

240244
@property
241245
def ndim(self) -> int:
@@ -273,13 +277,13 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
273277
_ = parse_node_type_array(_data.pop("node_type"))
274278

275279
# check that the data_type attribute is valid
276-
_ = DataType(_data["data_type"])
280+
data_type = DataType.parse(_data.pop("data_type"))
277281

278282
# dimension_names key is optional, normalize missing to `None`
279283
_data["dimension_names"] = _data.pop("dimension_names", None)
280284
# attributes key is optional, normalize missing to `None`
281285
_data["attributes"] = _data.pop("attributes", None)
282-
return cls(**_data) # type: ignore[arg-type]
286+
return cls(**_data, data_type=data_type) # type: ignore[arg-type]
283287

284288
def to_dict(self) -> dict[str, JSON]:
285289
out_dict = super().to_dict()
@@ -497,8 +501,11 @@ def to_numpy_shortname(self) -> str:
497501
}
498502
return data_type_to_numpy[self]
499503

504+
def to_numpy(self) -> np.dtype[Any]:
505+
return np.dtype(self.to_numpy_shortname())
506+
500507
@classmethod
501-
def from_dtype(cls, dtype: np.dtype[Any]) -> DataType:
508+
def from_numpy(cls, dtype: np.dtype[Any]) -> DataType:
502509
dtype_to_data_type = {
503510
"|b1": "bool",
504511
"bool": "bool",
@@ -518,16 +525,21 @@ def from_dtype(cls, dtype: np.dtype[Any]) -> DataType:
518525
}
519526
return DataType[dtype_to_data_type[dtype.str]]
520527

521-
522-
def parse_dtype(data: npt.DTypeLike) -> np.dtype[Any]:
523-
try:
524-
dtype = np.dtype(data)
525-
except (ValueError, TypeError) as e:
526-
raise ValueError(f"Invalid V3 data_type: {data}") from e
527-
# check that this is a valid v3 data_type
528-
try:
529-
_ = DataType.from_dtype(dtype)
530-
except KeyError as e:
531-
raise ValueError(f"Invalid V3 data_type: {dtype}") from e
532-
533-
return dtype
528+
@classmethod
529+
def parse(cls, dtype: None | DataType | Any) -> DataType:
530+
if dtype is None:
531+
# the default dtype
532+
return DataType[DEFAULT_DTYPE]
533+
if isinstance(dtype, DataType):
534+
return dtype
535+
else:
536+
try:
537+
dtype = np.dtype(dtype)
538+
except (ValueError, TypeError) as e:
539+
raise ValueError(f"Invalid V3 data_type: {dtype}") from e
540+
# check that this is a valid v3 data_type
541+
try:
542+
data_type = DataType.from_numpy(dtype)
543+
except KeyError as e:
544+
raise ValueError(f"Invalid V3 data_type: {dtype}") from e
545+
return data_type

0 commit comments

Comments
 (0)