Skip to content

Commit a0fb41f

Browse files
authored
Merge branch 'main' into fix-complex-fill-value
2 parents 7e2b28b + 6ce0526 commit a0fb41f

File tree

9 files changed

+376
-35
lines changed

9 files changed

+376
-35
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ default_language_version:
77
python: python3
88
repos:
99
- repo: https://github.com/astral-sh/ruff-pre-commit
10-
rev: v0.6.9
10+
rev: v0.7.0
1111
hooks:
1212
- id: ruff
1313
args: ["--fix", "--show-fixes"]
@@ -22,7 +22,7 @@ repos:
2222
hooks:
2323
- id: check-yaml
2424
- repo: https://github.com/pre-commit/mirrors-mypy
25-
rev: v1.11.2
25+
rev: v1.12.1
2626
hooks:
2727
- id: mypy
2828
files: src|tests

docs/guide/storage.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ that implements the `AbstractFileSystem` API,
7272
.. code-block:: python
7373
7474
>>> import zarr
75-
>>> store = zarr.storage.RemoteStore("gs://foo/bar", mode="r")
75+
>>> store = zarr.storage.RemoteStore.from_url("gs://foo/bar", mode="r")
7676
>>> zarr.open(store=store)
7777
<Array <RemoteStore(GCSFileSystem, foo/bar)> shape=(10, 20) dtype=float32>
7878

src/zarr/abc/store.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def with_mode(self, mode: AccessModeLiteral) -> Self:
168168
169169
Returns
170170
-------
171-
store:
171+
store
172172
A new store of the same type with the new mode.
173173
174174
Examples

src/zarr/core/array.py

Lines changed: 112 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import json
44
from asyncio import gather
5-
from dataclasses import dataclass, field, replace
5+
from dataclasses import dataclass, field
66
from itertools import starmap
77
from logging import getLogger
88
from typing import TYPE_CHECKING, Any, Generic, Literal, cast, overload
@@ -1104,15 +1104,15 @@ async def setitem(
11041104
)
11051105
return await self._set_selection(indexer, value, prototype=prototype)
11061106

1107-
async def resize(self, new_shape: ChunkCoords, delete_outside_chunks: bool = True) -> Self:
1107+
async def resize(self, new_shape: ShapeLike, delete_outside_chunks: bool = True) -> None:
1108+
new_shape = parse_shapelike(new_shape)
11081109
assert len(new_shape) == len(self.metadata.shape)
11091110
new_metadata = self.metadata.update_shape(new_shape)
11101111

1111-
# Remove all chunks outside of the new shape
1112-
old_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(self.metadata.shape))
1113-
new_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(new_shape))
1114-
11151112
if delete_outside_chunks:
1113+
# Remove all chunks outside of the new shape
1114+
old_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(self.metadata.shape))
1115+
new_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(new_shape))
11161116

11171117
async def _delete_key(key: str) -> None:
11181118
await (self.store_path / key).delete()
@@ -1128,7 +1128,63 @@ async def _delete_key(key: str) -> None:
11281128

11291129
# Write new metadata
11301130
await self._save_metadata(new_metadata)
1131-
return replace(self, metadata=new_metadata)
1131+
1132+
# Update metadata (in place)
1133+
object.__setattr__(self, "metadata", new_metadata)
1134+
1135+
async def append(self, data: npt.ArrayLike, axis: int = 0) -> ChunkCoords:
1136+
"""Append `data` to `axis`.
1137+
1138+
Parameters
1139+
----------
1140+
data : array-like
1141+
Data to be appended.
1142+
axis : int
1143+
Axis along which to append.
1144+
1145+
Returns
1146+
-------
1147+
new_shape : tuple
1148+
1149+
Notes
1150+
-----
1151+
The size of all dimensions other than `axis` must match between this
1152+
array and `data`.
1153+
"""
1154+
# ensure data is array-like
1155+
if not hasattr(data, "shape"):
1156+
data = np.asanyarray(data)
1157+
1158+
self_shape_preserved = tuple(s for i, s in enumerate(self.shape) if i != axis)
1159+
data_shape_preserved = tuple(s for i, s in enumerate(data.shape) if i != axis)
1160+
if self_shape_preserved != data_shape_preserved:
1161+
raise ValueError(
1162+
f"shape of data to append is not compatible with the array. "
1163+
f"The shape of the data is ({data_shape_preserved})"
1164+
f"and the shape of the array is ({self_shape_preserved})."
1165+
"All dimensions must match except for the dimension being "
1166+
"appended."
1167+
)
1168+
# remember old shape
1169+
old_shape = self.shape
1170+
1171+
# determine new shape
1172+
new_shape = tuple(
1173+
self.shape[i] if i != axis else self.shape[i] + data.shape[i]
1174+
for i in range(len(self.shape))
1175+
)
1176+
1177+
# resize
1178+
await self.resize(new_shape)
1179+
1180+
# store data
1181+
append_selection = tuple(
1182+
slice(None) if i != axis else slice(old_shape[i], new_shape[i])
1183+
for i in range(len(self.shape))
1184+
)
1185+
await self.setitem(append_selection, data)
1186+
1187+
return new_shape
11321188

11331189
async def update_attributes(self, new_attributes: dict[str, JSON]) -> Self:
11341190
# metadata.attributes is "frozen" so we simply clear and update the dict
@@ -1147,7 +1203,8 @@ async def info(self) -> None:
11471203
raise NotImplementedError
11481204

11491205

1150-
@dataclass(frozen=True)
1206+
# TODO: Array can be a frozen data class again once property setters (e.g. shape) are removed
1207+
@dataclass(frozen=False)
11511208
class Array:
11521209
"""Instantiate an array from an initialized store."""
11531210

@@ -1297,6 +1354,11 @@ def shape(self) -> ChunkCoords:
12971354
"""
12981355
return self._async_array.shape
12991356

1357+
@shape.setter
1358+
def shape(self, value: ChunkCoords) -> None:
1359+
"""Sets the shape of the array by calling resize."""
1360+
self.resize(value)
1361+
13001362
@property
13011363
def chunks(self) -> ChunkCoords:
13021364
"""Returns a tuple of integers describing the length of each dimension of a chunk of the array.
@@ -2754,18 +2816,18 @@ def blocks(self) -> BlockIndex:
27542816
:func:`set_block_selection` for documentation and examples."""
27552817
return BlockIndex(self)
27562818

2757-
def resize(self, new_shape: ChunkCoords) -> Array:
2819+
def resize(self, new_shape: ShapeLike) -> None:
27582820
"""
27592821
Change the shape of the array by growing or shrinking one or more
27602822
dimensions.
27612823
2762-
This method does not modify the original Array object. Instead, it returns a new Array
2763-
with the specified shape.
2824+
Parameters
2825+
----------
2826+
new_shape : tuple
2827+
New shape of the array.
27642828
27652829
Notes
27662830
-----
2767-
When resizing an array, the data are not rearranged in any way.
2768-
27692831
If one or more dimensions are shrunk, any chunks falling outside the
27702832
new array shape will be deleted from the underlying store.
27712833
However, it is noteworthy that the chunks partially falling inside the new array
@@ -2778,7 +2840,6 @@ def resize(self, new_shape: ChunkCoords) -> Array:
27782840
>>> import zarr
27792841
>>> z = zarr.zeros(shape=(10000, 10000),
27802842
>>> chunk_shape=(1000, 1000),
2781-
>>> store=StorePath(MemoryStore(mode="w")),
27822843
>>> dtype="i4",)
27832844
>>> z.shape
27842845
(10000, 10000)
@@ -2791,10 +2852,43 @@ def resize(self, new_shape: ChunkCoords) -> Array:
27912852
>>> z2.shape
27922853
(50, 50)
27932854
"""
2794-
resized = sync(self._async_array.resize(new_shape))
2795-
# TODO: remove this cast when type inference improves
2796-
_resized = cast(AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], resized)
2797-
return type(self)(_resized)
2855+
sync(self._async_array.resize(new_shape))
2856+
2857+
def append(self, data: npt.ArrayLike, axis: int = 0) -> ChunkCoords:
2858+
"""Append `data` to `axis`.
2859+
2860+
Parameters
2861+
----------
2862+
data : array-like
2863+
Data to be appended.
2864+
axis : int
2865+
Axis along which to append.
2866+
2867+
Returns
2868+
-------
2869+
new_shape : tuple
2870+
2871+
Notes
2872+
-----
2873+
The size of all dimensions other than `axis` must match between this
2874+
array and `data`.
2875+
2876+
Examples
2877+
--------
2878+
>>> import numpy as np
2879+
>>> import zarr
2880+
>>> a = np.arange(10000000, dtype='i4').reshape(10000, 1000)
2881+
>>> z = zarr.array(a, chunks=(1000, 100))
2882+
>>> z.shape
2883+
(10000, 1000)
2884+
>>> z.append(a)
2885+
(20000, 1000)
2886+
>>> z.append(np.vstack([a, a]), axis=1)
2887+
(20000, 2000)
2888+
>>> z.shape
2889+
(20000, 2000)
2890+
"""
2891+
return sync(self._async_array.append(data, axis=axis))
27982892

27992893
def update_attributes(self, new_attributes: dict[str, JSON]) -> Array:
28002894
# TODO: remove this cast when type inference improves

src/zarr/storage/remote.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import warnings
34
from typing import TYPE_CHECKING, Any, Self
45

56
from zarr.abc.store import ByteRangeRequest, Store
@@ -32,7 +33,8 @@ class RemoteStore(Store):
3233
mode : AccessModeLiteral
3334
The access mode to use.
3435
path : str
35-
The root path of the store.
36+
The root path of the store. This should be a relative path and must not include the
37+
filesystem scheme.
3638
allowed_exceptions : tuple[type[Exception], ...]
3739
When fetching data, these cases will be deemed to correspond to missing keys.
3840
@@ -44,6 +46,23 @@ class RemoteStore(Store):
4446
supports_deletes
4547
supports_partial_writes
4648
supports_listing
49+
50+
Raises
51+
------
52+
TypeError
53+
If the Filesystem does not support async operations.
54+
ValueError
55+
If the path argument includes a scheme.
56+
57+
Warns
58+
-----
59+
UserWarning
60+
If the file system (fs) was not created with `asynchronous=True`.
61+
62+
See Also
63+
--------
64+
RemoteStore.from_upath
65+
RemoteStore.from_url
4766
"""
4867

4968
# based on FSSpec
@@ -69,6 +88,15 @@ def __init__(
6988

7089
if not self.fs.async_impl:
7190
raise TypeError("Filesystem needs to support async operations.")
91+
if not self.fs.asynchronous:
92+
warnings.warn(
93+
f"fs ({fs}) was not created with `asynchronous=True`, this may lead to surprising behavior",
94+
stacklevel=2,
95+
)
96+
if "://" in path and not path.startswith("http"):
97+
# `not path.startswith("http")` is a special case for the http filesystem (¯\_(ツ)_/¯)
98+
scheme, _ = path.split("://", maxsplit=1)
99+
raise ValueError(f"path argument to RemoteStore must not include scheme ({scheme}://)")
72100

73101
@classmethod
74102
def from_upath(
@@ -134,7 +162,17 @@ def from_url(
134162
# before fsspec==2024.3.1
135163
from fsspec.core import url_to_fs
136164

137-
fs, path = url_to_fs(url, **storage_options)
165+
opts = storage_options or {}
166+
opts = {"asynchronous": True, **opts}
167+
168+
fs, path = url_to_fs(url, **opts)
169+
170+
# fsspec is not consistent about removing the scheme from the path, so check and strip it here
171+
# https://github.com/fsspec/filesystem_spec/issues/1722
172+
if "://" in path and not path.startswith("http"):
173+
# `not path.startswith("http")` is a special case for the http filesystem (¯\_(ツ)_/¯)
174+
path = fs._strip_protocol(path)
175+
138176
return cls(fs=fs, path=path, mode=mode, allowed_exceptions=allowed_exceptions)
139177

140178
async def clear(self) -> None:

0 commit comments

Comments
 (0)