Skip to content

Commit 5f2c925

Browse files
authored
Merge branch 'main' into mode-argument-on-zarr-save
2 parents 961eb60 + bc588a7 commit 5f2c925

File tree

4 files changed

+347
-23
lines changed

4 files changed

+347
-23
lines changed

src/zarr/core/array.py

Lines changed: 112 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import json
44
from asyncio import gather
5-
from dataclasses import dataclass, field, replace
5+
from dataclasses import dataclass, field
66
from itertools import starmap
77
from logging import getLogger
88
from typing import TYPE_CHECKING, Any, Generic, Literal, cast, overload
@@ -1104,15 +1104,15 @@ async def setitem(
11041104
)
11051105
return await self._set_selection(indexer, value, prototype=prototype)
11061106

1107-
async def resize(self, new_shape: ChunkCoords, delete_outside_chunks: bool = True) -> Self:
1107+
async def resize(self, new_shape: ShapeLike, delete_outside_chunks: bool = True) -> None:
1108+
new_shape = parse_shapelike(new_shape)
11081109
assert len(new_shape) == len(self.metadata.shape)
11091110
new_metadata = self.metadata.update_shape(new_shape)
11101111

1111-
# Remove all chunks outside of the new shape
1112-
old_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(self.metadata.shape))
1113-
new_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(new_shape))
1114-
11151112
if delete_outside_chunks:
1113+
# Remove all chunks outside of the new shape
1114+
old_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(self.metadata.shape))
1115+
new_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(new_shape))
11161116

11171117
async def _delete_key(key: str) -> None:
11181118
await (self.store_path / key).delete()
@@ -1128,7 +1128,63 @@ async def _delete_key(key: str) -> None:
11281128

11291129
# Write new metadata
11301130
await self._save_metadata(new_metadata)
1131-
return replace(self, metadata=new_metadata)
1131+
1132+
# Update metadata (in place)
1133+
object.__setattr__(self, "metadata", new_metadata)
1134+
1135+
async def append(self, data: npt.ArrayLike, axis: int = 0) -> ChunkCoords:
1136+
"""Append `data` to `axis`.
1137+
1138+
Parameters
1139+
----------
1140+
data : array-like
1141+
Data to be appended.
1142+
axis : int
1143+
Axis along which to append.
1144+
1145+
Returns
1146+
-------
1147+
new_shape : tuple
1148+
1149+
Notes
1150+
-----
1151+
The size of all dimensions other than `axis` must match between this
1152+
array and `data`.
1153+
"""
1154+
# ensure data is array-like
1155+
if not hasattr(data, "shape"):
1156+
data = np.asanyarray(data)
1157+
1158+
self_shape_preserved = tuple(s for i, s in enumerate(self.shape) if i != axis)
1159+
data_shape_preserved = tuple(s for i, s in enumerate(data.shape) if i != axis)
1160+
if self_shape_preserved != data_shape_preserved:
1161+
raise ValueError(
1162+
f"shape of data to append is not compatible with the array. "
1163+
f"The shape of the data is ({data_shape_preserved})"
1164+
f"and the shape of the array is ({self_shape_preserved})."
1165+
"All dimensions must match except for the dimension being "
1166+
"appended."
1167+
)
1168+
# remember old shape
1169+
old_shape = self.shape
1170+
1171+
# determine new shape
1172+
new_shape = tuple(
1173+
self.shape[i] if i != axis else self.shape[i] + data.shape[i]
1174+
for i in range(len(self.shape))
1175+
)
1176+
1177+
# resize
1178+
await self.resize(new_shape)
1179+
1180+
# store data
1181+
append_selection = tuple(
1182+
slice(None) if i != axis else slice(old_shape[i], new_shape[i])
1183+
for i in range(len(self.shape))
1184+
)
1185+
await self.setitem(append_selection, data)
1186+
1187+
return new_shape
11321188

11331189
async def update_attributes(self, new_attributes: dict[str, JSON]) -> Self:
11341190
# metadata.attributes is "frozen" so we simply clear and update the dict
@@ -1147,7 +1203,8 @@ async def info(self) -> None:
11471203
raise NotImplementedError
11481204

11491205

1150-
@dataclass(frozen=True)
1206+
# TODO: Array can be a frozen data class again once property setters (e.g. shape) are removed
1207+
@dataclass(frozen=False)
11511208
class Array:
11521209
"""Instantiate an array from an initialized store."""
11531210

@@ -1297,6 +1354,11 @@ def shape(self) -> ChunkCoords:
12971354
"""
12981355
return self._async_array.shape
12991356

1357+
@shape.setter
1358+
def shape(self, value: ChunkCoords) -> None:
1359+
"""Sets the shape of the array by calling resize."""
1360+
self.resize(value)
1361+
13001362
@property
13011363
def chunks(self) -> ChunkCoords:
13021364
"""Returns a tuple of integers describing the length of each dimension of a chunk of the array.
@@ -2754,18 +2816,18 @@ def blocks(self) -> BlockIndex:
27542816
:func:`set_block_selection` for documentation and examples."""
27552817
return BlockIndex(self)
27562818

2757-
def resize(self, new_shape: ChunkCoords) -> Array:
2819+
def resize(self, new_shape: ShapeLike) -> None:
27582820
"""
27592821
Change the shape of the array by growing or shrinking one or more
27602822
dimensions.
27612823
2762-
This method does not modify the original Array object. Instead, it returns a new Array
2763-
with the specified shape.
2824+
Parameters
2825+
----------
2826+
new_shape : tuple
2827+
New shape of the array.
27642828
27652829
Notes
27662830
-----
2767-
When resizing an array, the data are not rearranged in any way.
2768-
27692831
If one or more dimensions are shrunk, any chunks falling outside the
27702832
new array shape will be deleted from the underlying store.
27712833
However, it is noteworthy that the chunks partially falling inside the new array
@@ -2778,7 +2840,6 @@ def resize(self, new_shape: ChunkCoords) -> Array:
27782840
>>> import zarr
27792841
>>> z = zarr.zeros(shape=(10000, 10000),
27802842
>>> chunk_shape=(1000, 1000),
2781-
>>> store=StorePath(MemoryStore(mode="w")),
27822843
>>> dtype="i4",)
27832844
>>> z.shape
27842845
(10000, 10000)
@@ -2791,10 +2852,43 @@ def resize(self, new_shape: ChunkCoords) -> Array:
27912852
>>> z2.shape
27922853
(50, 50)
27932854
"""
2794-
resized = sync(self._async_array.resize(new_shape))
2795-
# TODO: remove this cast when type inference improves
2796-
_resized = cast(AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], resized)
2797-
return type(self)(_resized)
2855+
sync(self._async_array.resize(new_shape))
2856+
2857+
def append(self, data: npt.ArrayLike, axis: int = 0) -> ChunkCoords:
2858+
"""Append `data` to `axis`.
2859+
2860+
Parameters
2861+
----------
2862+
data : array-like
2863+
Data to be appended.
2864+
axis : int
2865+
Axis along which to append.
2866+
2867+
Returns
2868+
-------
2869+
new_shape : tuple
2870+
2871+
Notes
2872+
-----
2873+
The size of all dimensions other than `axis` must match between this
2874+
array and `data`.
2875+
2876+
Examples
2877+
--------
2878+
>>> import numpy as np
2879+
>>> import zarr
2880+
>>> a = np.arange(10000000, dtype='i4').reshape(10000, 1000)
2881+
>>> z = zarr.array(a, chunks=(1000, 100))
2882+
>>> z.shape
2883+
(10000, 1000)
2884+
>>> z.append(a)
2885+
(20000, 1000)
2886+
>>> z.append(np.vstack([a, a]), axis=1)
2887+
(20000, 2000)
2888+
>>> z.shape
2889+
(20000, 2000)
2890+
"""
2891+
return sync(self._async_array.append(data, axis=axis))
27982892

27992893
def update_attributes(self, new_attributes: dict[str, JSON]) -> Array:
28002894
# TODO: remove this cast when type inference improves

src/zarr/core/metadata/v3.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,13 @@
4343

4444
DEFAULT_DTYPE = "float64"
4545

46+
# Keep in sync with _replace_special_floats
47+
SPECIAL_FLOATS_ENCODED = {
48+
"Infinity": np.inf,
49+
"-Infinity": -np.inf,
50+
"NaN": np.nan,
51+
}
52+
4653

4754
def parse_zarr_format(data: object) -> Literal[3]:
4855
if data == 3:
@@ -149,7 +156,7 @@ def default(self, o: object) -> Any:
149156
if isinstance(out, complex):
150157
# python complex types are not JSON serializable, so we use the
151158
# serialization defined in the zarr v3 spec
152-
return [out.real, out.imag]
159+
return _replace_special_floats([out.real, out.imag])
153160
elif np.isnan(out):
154161
return "NaN"
155162
elif np.isinf(out):
@@ -447,8 +454,11 @@ def parse_fill_value(
447454
if isinstance(fill_value, Sequence) and not isinstance(fill_value, str):
448455
if data_type in (DataType.complex64, DataType.complex128):
449456
if len(fill_value) == 2:
457+
decoded_fill_value = tuple(
458+
SPECIAL_FLOATS_ENCODED.get(value, value) for value in fill_value
459+
)
450460
# complex datatypes serialize to JSON arrays with two elements
451-
return np_dtype.type(complex(*fill_value))
461+
return np_dtype.type(complex(*decoded_fill_value))
452462
else:
453463
msg = (
454464
f"Got an invalid fill value for complex data type {data_type.value}."
@@ -475,12 +485,20 @@ def parse_fill_value(
475485
pass
476486
elif fill_value in ["Infinity", "-Infinity"] and not np.isfinite(casted_value):
477487
pass
478-
elif np_dtype.kind in "cf":
488+
elif np_dtype.kind == "f":
479489
# float comparison is not exact, especially when dtype <float64
480-
# so we us np.isclose for this comparison.
490+
# so we use np.isclose for this comparison.
481491
# this also allows us to compare nan fill_values
482492
if not np.isclose(fill_value, casted_value, equal_nan=True):
483493
raise ValueError(f"fill value {fill_value!r} is not valid for dtype {data_type}")
494+
elif np_dtype.kind == "c":
495+
# confusingly np.isclose(np.inf, np.inf + 0j) is False on numpy<2, so compare real and imag parts
496+
# explicitly.
497+
if not (
498+
np.isclose(np.real(fill_value), np.real(casted_value), equal_nan=True)
499+
and np.isclose(np.imag(fill_value), np.imag(casted_value), equal_nan=True)
500+
):
501+
raise ValueError(f"fill value {fill_value!r} is not valid for dtype {data_type}")
484502
else:
485503
if fill_value != casted_value:
486504
raise ValueError(f"fill value {fill_value!r} is not valid for dtype {data_type}")

0 commit comments

Comments
 (0)