Skip to content

Commit 9f82cd6

Browse files
authored
Merge branch 'main' into fix/unbreak-chunks-initialized
2 parents c57e862 + bb55f0c commit 9f82cd6

File tree

10 files changed

+58
-17
lines changed

10 files changed

+58
-17
lines changed

changes/2991.doc.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Updated the 3.0 migration guide to include the removal of "." syntax for getting group members.

changes/2996.bugfix.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fixes `ConsolidatedMetadata` serialization of `nan`, `inf`, and `-inf` to be
2+
consistent with the behavior of `ArrayMetadata`.
3+
4+

docs/user-guide/v3_migration.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ The Group class
117117

118118
- Use :func:`zarr.Group.create_array` in place of :func:`zarr.Group.create_dataset`
119119
- Use :func:`zarr.Group.require_array` in place of :func:`zarr.Group.require_dataset`
120+
3. Disallow "." syntax for getting group members. To get a member of a group named ``foo``,
121+
use ``group["foo"]`` in place of ``group.foo``.
120122

121123
The Store class
122124
~~~~~~~~~~~~~~~

src/zarr/codecs/bytes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ async def _encode_single(
114114

115115
nd_array = chunk_array.as_ndarray_like()
116116
# Flatten the nd-array (only copy if needed) and reinterpret as bytes
117-
nd_array = nd_array.ravel().view(dtype="b")
117+
nd_array = nd_array.ravel().view(dtype="B")
118118
return chunk_spec.prototype.buffer.from_array_like(nd_array)
119119

120120
def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:

src/zarr/codecs/crc32c_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ async def _encode_single(
5757
# Calculate the checksum and "cast" it to a numpy array
5858
checksum = np.array([crc32c(cast(typing_extensions.Buffer, data))], dtype=np.uint32)
5959
# Append the checksum (as bytes) to the data
60-
return chunk_spec.prototype.buffer.from_array_like(np.append(data, checksum.view("b")))
60+
return chunk_spec.prototype.buffer.from_array_like(np.append(data, checksum.view("B")))
6161

6262
def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
6363
return input_byte_length + 4

src/zarr/core/buffer/core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ class Buffer(ABC):
143143
def __init__(self, array_like: ArrayLike) -> None:
144144
if array_like.ndim != 1:
145145
raise ValueError("array_like: only 1-dim allowed")
146-
if array_like.dtype != np.dtype("b"):
146+
if array_like.dtype != np.dtype("B"):
147147
raise ValueError("array_like: only byte dtype allowed")
148148
self._data = array_like
149149

@@ -306,7 +306,7 @@ class NDBuffer:
306306
Notes
307307
-----
308308
The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
309-
is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
309+
is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
310310
in order to use Python's type system to differentiate between the contiguous
311311
Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
312312
two classes separate.

src/zarr/core/buffer/cpu.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def __init__(self, array_like: ArrayLike) -> None:
4949

5050
@classmethod
5151
def create_zero_length(cls) -> Self:
52-
return cls(np.array([], dtype="b"))
52+
return cls(np.array([], dtype="B"))
5353

5454
@classmethod
5555
def from_buffer(cls, buffer: core.Buffer) -> Self:
@@ -92,7 +92,7 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self:
9292
-------
9393
New buffer representing `bytes_like`
9494
"""
95-
return cls.from_array_like(np.frombuffer(bytes_like, dtype="b"))
95+
return cls.from_array_like(np.frombuffer(bytes_like, dtype="B"))
9696

9797
def as_numpy_array(self) -> npt.NDArray[Any]:
9898
"""Returns the buffer as a NumPy array (host memory).
@@ -111,7 +111,7 @@ def __add__(self, other: core.Buffer) -> Self:
111111
"""Concatenate two buffers"""
112112

113113
other_array = other.as_array_like()
114-
assert other_array.dtype == np.dtype("b")
114+
assert other_array.dtype == np.dtype("B")
115115
return self.__class__(
116116
np.concatenate((np.asanyarray(self._data), np.asanyarray(other_array)))
117117
)
@@ -131,7 +131,7 @@ class NDBuffer(core.NDBuffer):
131131
Notes
132132
-----
133133
The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
134-
is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
134+
is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
135135
in order to use Python's type system to differentiate between the contiguous
136136
Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
137137
two classes separate.

src/zarr/core/buffer/gpu.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def __init__(self, array_like: ArrayLike) -> None:
5959

6060
if array_like.ndim != 1:
6161
raise ValueError("array_like: only 1-dim allowed")
62-
if array_like.dtype != np.dtype("b"):
62+
if array_like.dtype != np.dtype("B"):
6363
raise ValueError("array_like: only byte dtype allowed")
6464

6565
if not hasattr(array_like, "__cuda_array_interface__"):
@@ -84,7 +84,7 @@ def create_zero_length(cls) -> Self:
8484
-------
8585
New empty 0-length buffer
8686
"""
87-
return cls(cp.array([], dtype="b"))
87+
return cls(cp.array([], dtype="B"))
8888

8989
@classmethod
9090
def from_buffer(cls, buffer: core.Buffer) -> Self:
@@ -100,14 +100,14 @@ def from_buffer(cls, buffer: core.Buffer) -> Self:
100100

101101
@classmethod
102102
def from_bytes(cls, bytes_like: BytesLike) -> Self:
103-
return cls.from_array_like(cp.frombuffer(bytes_like, dtype="b"))
103+
return cls.from_array_like(cp.frombuffer(bytes_like, dtype="B"))
104104

105105
def as_numpy_array(self) -> npt.NDArray[Any]:
106106
return cast(npt.NDArray[Any], cp.asnumpy(self._data))
107107

108108
def __add__(self, other: core.Buffer) -> Self:
109109
other_array = other.as_array_like()
110-
assert other_array.dtype == np.dtype("b")
110+
assert other_array.dtype == np.dtype("B")
111111
gpu_other = Buffer(other_array)
112112
gpu_other_array = gpu_other.as_array_like()
113113
return self.__class__(
@@ -129,7 +129,7 @@ class NDBuffer(core.NDBuffer):
129129
Notes
130130
-----
131131
The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
132-
is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
132+
is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
133133
in order to use Python's type system to differentiate between the contiguous
134134
Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
135135
two classes separate.

src/zarr/core/group.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
)
5050
from zarr.core.config import config
5151
from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
52-
from zarr.core.metadata.v3 import V3JsonEncoder
52+
from zarr.core.metadata.v3 import V3JsonEncoder, _replace_special_floats
5353
from zarr.core.sync import SyncMixin, sync
5454
from zarr.errors import ContainsArrayError, ContainsGroupError, MetadataValidationError
5555
from zarr.storage import StoreLike, StorePath
@@ -334,7 +334,7 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
334334
if self.zarr_format == 3:
335335
return {
336336
ZARR_JSON: prototype.buffer.from_bytes(
337-
json.dumps(self.to_dict(), cls=V3JsonEncoder).encode()
337+
json.dumps(_replace_special_floats(self.to_dict()), cls=V3JsonEncoder).encode()
338338
)
339339
}
340340
else:
@@ -355,10 +355,10 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
355355
assert isinstance(consolidated_metadata, dict)
356356
for k, v in consolidated_metadata.items():
357357
attrs = v.pop("attributes", None)
358-
d[f"{k}/{ZATTRS_JSON}"] = attrs
358+
d[f"{k}/{ZATTRS_JSON}"] = _replace_special_floats(attrs)
359359
if "shape" in v:
360360
# it's an array
361-
d[f"{k}/{ZARRAY_JSON}"] = v
361+
d[f"{k}/{ZARRAY_JSON}"] = _replace_special_floats(v)
362362
else:
363363
d[f"{k}/{ZGROUP_JSON}"] = {
364364
"zarr_format": self.zarr_format,

tests/test_metadata/test_consolidated.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,3 +573,37 @@ async def test_use_consolidated_false(
573573
assert len([x async for x in good.members()]) == 2
574574
assert good.metadata.consolidated_metadata
575575
assert sorted(good.metadata.consolidated_metadata.metadata) == ["a", "b"]
576+
577+
578+
@pytest.mark.parametrize("fill_value", [np.nan, np.inf, -np.inf])
579+
async def test_consolidated_metadata_encodes_special_chars(
580+
memory_store: Store, zarr_format: ZarrFormat, fill_value: float
581+
):
582+
root = await group(store=memory_store, zarr_format=zarr_format)
583+
_child = await root.create_group("child", attributes={"test": fill_value})
584+
_time = await root.create_array("time", shape=(12,), dtype=np.float64, fill_value=fill_value)
585+
await zarr.api.asynchronous.consolidate_metadata(memory_store)
586+
587+
root = await group(store=memory_store, zarr_format=zarr_format)
588+
root_buffer = root.metadata.to_buffer_dict(default_buffer_prototype())
589+
590+
if zarr_format == 2:
591+
root_metadata = json.loads(root_buffer[".zmetadata"].to_bytes().decode("utf-8"))["metadata"]
592+
elif zarr_format == 3:
593+
root_metadata = json.loads(root_buffer["zarr.json"].to_bytes().decode("utf-8"))[
594+
"consolidated_metadata"
595+
]["metadata"]
596+
597+
if np.isnan(fill_value):
598+
expected_fill_value = "NaN"
599+
elif np.isneginf(fill_value):
600+
expected_fill_value = "-Infinity"
601+
elif np.isinf(fill_value):
602+
expected_fill_value = "Infinity"
603+
604+
if zarr_format == 2:
605+
assert root_metadata["child/.zattrs"]["test"] == expected_fill_value
606+
assert root_metadata["time/.zarray"]["fill_value"] == expected_fill_value
607+
elif zarr_format == 3:
608+
assert root_metadata["child"]["attributes"]["test"] == expected_fill_value
609+
assert root_metadata["time"]["fill_value"] == expected_fill_value

0 commit comments

Comments
 (0)