Skip to content

Commit 3265abd

Browse files
committed
Merge remote-tracking branch 'upstream/v3' into user/tom/feature/consolidated-metadata
2 parents ae02bb5 + f3a2e0a commit 3265abd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+258
-137
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ default_language_version:
77
python: python3
88
repos:
99
- repo: https://github.com/astral-sh/ruff-pre-commit
10-
rev: v0.6.7
10+
rev: v0.6.8
1111
hooks:
1212
- id: ruff
1313
args: ["--fix", "--show-fixes"]

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,9 @@ gpu = [
7878
"cupy-cuda12x",
7979
]
8080
docs = [
81-
'sphinx==7.4.7',
81+
'sphinx==8.0.2',
8282
'sphinx-autobuild>=2021.3.14',
83-
'sphinx-autoapi==3.3.1',
83+
'sphinx-autoapi==3.3.2',
8484
'sphinx_design',
8585
'sphinx-issues',
8686
'sphinx-copybutton',

src/zarr/api/asynchronous.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import numpy as np
99
import numpy.typing as npt
1010

11+
from zarr.abc.store import Store
1112
from zarr.core.array import Array, AsyncArray, get_array_metadata
1213
from zarr.core.buffer import NDArrayLike
1314
from zarr.core.chunk_key_encodings import ChunkKeyEncoding
@@ -21,8 +22,9 @@
2122
from zarr.core.config import config
2223
from zarr.core.group import AsyncGroup, ConsolidatedMetadata, GroupMetadata
2324
from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
24-
from zarr.store import (
25+
from zarr.storage import (
2526
StoreLike,
27+
StorePath,
2628
make_store_path,
2729
)
2830

@@ -286,6 +288,7 @@ async def open(
286288
Return type depends on what exists in the given store.
287289
"""
288290
zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
291+
289292
store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
290293

291294
if path is not None:
@@ -304,9 +307,9 @@ async def open(
304307
return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs)
305308

306309
try:
307-
return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs)
310+
return await open_array(store=store_path, zarr_format=zarr_format, **kwargs)
308311
except KeyError:
309-
return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs)
312+
return await open_group(store=store_path, zarr_format=zarr_format, **kwargs)
310313

311314

312315
async def open_consolidated(*args: Any, use_consolidated: bool = True, **kwargs: Any) -> AsyncGroup:
@@ -383,7 +386,8 @@ async def save_array(
383386
or _default_zarr_version()
384387
)
385388

386-
store_path = await make_store_path(store, mode="w", storage_options=storage_options)
389+
mode = kwargs.pop("mode", None)
390+
store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
387391
if path is not None:
388392
store_path = store_path / path
389393
new = await AsyncArray.create(
@@ -560,7 +564,9 @@ async def group(
560564

561565
zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
562566

563-
store_path = await make_store_path(store, storage_options=storage_options)
567+
mode = None if isinstance(store, Store) else cast(AccessModeLiteral, "a")
568+
569+
store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
564570
if path is not None:
565571
store_path = store_path / path
566572

@@ -854,7 +860,11 @@ async def create(
854860
if meta_array is not None:
855861
warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2)
856862

857-
mode = kwargs.pop("mode", cast(AccessModeLiteral, "r" if read_only else "w"))
863+
mode = kwargs.pop("mode", None)
864+
if mode is None:
865+
if not isinstance(store, Store | StorePath):
866+
mode = "a"
867+
858868
store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
859869
if path is not None:
860870
store_path = store_path / path
@@ -1030,7 +1040,8 @@ async def open_array(
10301040
The opened array.
10311041
"""
10321042

1033-
store_path = await make_store_path(store, storage_options=storage_options)
1043+
mode = kwargs.pop("mode", None)
1044+
store_path = await make_store_path(store, mode=mode)
10341045
if path is not None:
10351046
store_path = store_path / path
10361047

src/zarr/api/synchronous.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
if TYPE_CHECKING:
1212
from zarr.core.buffer import NDArrayLike
1313
from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, ZarrFormat
14-
from zarr.store import StoreLike
14+
from zarr.storage import StoreLike
1515

1616
__all__ = [
1717
"array",

src/zarr/core/array.py

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33
import json
44
from asyncio import gather
55
from dataclasses import dataclass, field, replace
6+
from logging import getLogger
67
from typing import TYPE_CHECKING, Any, Literal, cast
78

89
import numpy as np
910
import numpy.typing as npt
1011

1112
from zarr._compat import _deprecate_positional_args
12-
from zarr.abc.store import set_or_delete
13+
from zarr.abc.store import Store, set_or_delete
1314
from zarr.codecs import BytesCodec
1415
from zarr.codecs._v2 import V2Compressor, V2Filters
1516
from zarr.core.attributes import Attributes
@@ -19,7 +20,7 @@
1920
NDBuffer,
2021
default_buffer_prototype,
2122
)
22-
from zarr.core.chunk_grids import RegularChunkGrid, _guess_chunks
23+
from zarr.core.chunk_grids import RegularChunkGrid, normalize_chunks
2324
from zarr.core.chunk_key_encodings import (
2425
ChunkKeyEncoding,
2526
DefaultChunkKeyEncoding,
@@ -67,10 +68,8 @@
6768
from zarr.core.metadata.v3 import ArrayV3Metadata
6869
from zarr.core.sync import collect_aiterator, sync
6970
from zarr.registry import get_pipeline_class
70-
from zarr.store import StoreLike, StorePath, make_store_path
71-
from zarr.store.common import (
72-
ensure_no_existing_node,
73-
)
71+
from zarr.storage import StoreLike, make_store_path
72+
from zarr.storage.common import StorePath, ensure_no_existing_node
7473

7574
if TYPE_CHECKING:
7675
from collections.abc import Iterable, Iterator, Sequence
@@ -82,6 +81,8 @@
8281
# Array and AsyncArray are defined in the base ``zarr`` namespace
8382
__all__ = ["create_codec_pipeline", "parse_array_metadata"]
8483

84+
logger = getLogger(__name__)
85+
8586

8687
def parse_array_metadata(data: Any) -> ArrayV2Metadata | ArrayV3Metadata:
8788
if isinstance(data, ArrayV2Metadata | ArrayV3Metadata):
@@ -228,15 +229,14 @@ async def create(
228229

229230
shape = parse_shapelike(shape)
230231

231-
if chunk_shape is None:
232-
if chunks is None:
233-
chunk_shape = chunks = _guess_chunks(shape=shape, typesize=np.dtype(dtype).itemsize)
234-
else:
235-
chunks = parse_shapelike(chunks)
232+
if chunks is not None and chunk_shape is not None:
233+
raise ValueError("Only one of chunk_shape or chunks can be provided.")
236234

237-
chunk_shape = chunks
238-
elif chunks is not None:
239-
raise ValueError("Only one of chunk_shape or chunks must be provided.")
235+
dtype = np.dtype(dtype)
236+
if chunks:
237+
_chunks = normalize_chunks(chunks, shape, dtype.itemsize)
238+
else:
239+
_chunks = normalize_chunks(chunk_shape, shape, dtype.itemsize)
240240

241241
if zarr_format == 3:
242242
if dimension_separator is not None:
@@ -259,7 +259,7 @@ async def create(
259259
store_path,
260260
shape=shape,
261261
dtype=dtype,
262-
chunk_shape=chunk_shape,
262+
chunk_shape=_chunks,
263263
fill_value=fill_value,
264264
chunk_key_encoding=chunk_key_encoding,
265265
codecs=codecs,
@@ -282,7 +282,7 @@ async def create(
282282
store_path,
283283
shape=shape,
284284
dtype=dtype,
285-
chunks=chunk_shape,
285+
chunks=_chunks,
286286
dimension_separator=dimension_separator,
287287
fill_value=fill_value,
288288
order=order,
@@ -410,6 +410,10 @@ async def open(
410410
metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format)
411411
return cls(store_path=store_path, metadata=metadata_dict)
412412

413+
@property
414+
def store(self) -> Store:
415+
return self.store_path.store
416+
413417
@property
414418
def ndim(self) -> int:
415419
return len(self.metadata.shape)
@@ -837,6 +841,10 @@ def open(
837841
async_array = sync(AsyncArray.open(store))
838842
return cls(async_array)
839843

844+
@property
845+
def store(self) -> Store:
846+
return self._async_array.store
847+
840848
@property
841849
def ndim(self) -> int:
842850
return self._async_array.ndim
@@ -2386,15 +2394,26 @@ def chunks_initialized(array: Array | AsyncArray) -> tuple[str, ...]:
23862394
def _build_parents(node: AsyncArray | AsyncGroup) -> list[AsyncGroup]:
23872395
from zarr.core.group import AsyncGroup, GroupMetadata
23882396

2389-
required_parts = node.store_path.path.split("/")[:-1]
2390-
parents = []
2397+
store = node.store_path.store
2398+
path = node.store_path.path
2399+
if not path:
2400+
return []
2401+
2402+
required_parts = path.split("/")[:-1]
2403+
parents = [
2404+
# the root group
2405+
AsyncGroup(
2406+
metadata=GroupMetadata(zarr_format=node.metadata.zarr_format),
2407+
store_path=StorePath(store=store, path=""),
2408+
)
2409+
]
23912410

23922411
for i, part in enumerate(required_parts):
2393-
path = "/".join(required_parts[:i] + [part])
2412+
p = "/".join(required_parts[:i] + [part])
23942413
parents.append(
23952414
AsyncGroup(
23962415
metadata=GroupMetadata(zarr_format=node.metadata.zarr_format),
2397-
store_path=StorePath(store=node.store_path.store, path=path),
2416+
store_path=StorePath(store=store, path=p),
23982417
)
23992418
)
24002419

src/zarr/core/chunk_grids.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22

33
import itertools
44
import math
5+
import numbers
56
import operator
67
from abc import abstractmethod
78
from dataclasses import dataclass
89
from functools import reduce
9-
from typing import TYPE_CHECKING
10+
from typing import TYPE_CHECKING, Any
1011

1112
import numpy as np
1213

@@ -97,6 +98,49 @@ def _guess_chunks(
9798
return tuple(int(x) for x in chunks)
9899

99100

101+
def normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tuple[int, ...]:
102+
"""Convenience function to normalize the `chunks` argument for an array
103+
with the given `shape`."""
104+
105+
# N.B., expect shape already normalized
106+
107+
# handle auto-chunking
108+
if chunks is None or chunks is True:
109+
return _guess_chunks(shape, typesize)
110+
111+
# handle no chunking
112+
if chunks is False:
113+
return shape
114+
115+
# handle 1D convenience form
116+
if isinstance(chunks, numbers.Integral):
117+
chunks = tuple(int(chunks) for _ in shape)
118+
119+
# handle dask-style chunks (iterable of iterables)
120+
if all(isinstance(c, (tuple | list)) for c in chunks):
121+
# take first chunk size for each dimension
122+
chunks = tuple(
123+
c[0] for c in chunks
124+
) # TODO: check/error/warn for irregular chunks (e.g. if c[0] != c[1:-1])
125+
126+
# handle bad dimensionality
127+
if len(chunks) > len(shape):
128+
raise ValueError("too many dimensions in chunks")
129+
130+
# handle underspecified chunks
131+
if len(chunks) < len(shape):
132+
# assume chunks across remaining dimensions
133+
chunks += shape[len(chunks) :]
134+
135+
# handle None or -1 in chunks
136+
if -1 in chunks or None in chunks:
137+
chunks = tuple(
138+
s if c == -1 or c is None else int(c) for s, c in zip(shape, chunks, strict=False)
139+
)
140+
141+
return tuple(int(c) for c in chunks)
142+
143+
100144
@dataclass(frozen=True)
101145
class ChunkGrid(Metadata):
102146
@classmethod

src/zarr/core/group.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@
3636
from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
3737
from zarr.core.metadata.common import ArrayMetadata
3838
from zarr.core.sync import SyncMixin, sync
39-
from zarr.store import StoreLike, StorePath, make_store_path
40-
from zarr.store.common import ensure_no_existing_node
39+
from zarr.storage import StoreLike, make_store_path
40+
from zarr.storage.common import StorePath, ensure_no_existing_node
4141

4242
if TYPE_CHECKING:
4343
from collections.abc import AsyncGenerator, Generator, Iterable, Iterator
@@ -501,7 +501,9 @@ async def open(
501501
# alternatively, we could warn and favor v3
502502
raise ValueError("Both zarr.json and .zgroup objects exist")
503503
if zarr_json_bytes is None and zgroup_bytes is None:
504-
raise FileNotFoundError(store_path)
504+
raise FileNotFoundError(
505+
f"could not find zarr.json or .zgroup objects in {store_path}"
506+
)
505507
# set zarr_format based on which keys were found
506508
if zarr_json_bytes is not None:
507509
zarr_format = 3
@@ -1169,6 +1171,10 @@ def _members_consolidated(
11691171
):
11701172
yield from obj._members_consolidated(max_depth, current_depth + 1, prefix=key)
11711173

1174+
async def keys(self) -> AsyncGenerator[str, None]:
1175+
async for key, _ in self.members():
1176+
yield key
1177+
11721178
async def contains(self, member: str) -> bool:
11731179
# TODO: this can be made more efficient.
11741180
try:
@@ -1294,15 +1300,18 @@ def __delitem__(self, key: str) -> None:
12941300
self._sync(self._async_group.delitem(key))
12951301

12961302
def __iter__(self) -> Iterator[str]:
1297-
raise NotImplementedError
1303+
yield from self.keys()
12981304

12991305
def __len__(self) -> int:
1300-
raise NotImplementedError
1306+
return self.nmembers()
13011307

13021308
def __setitem__(self, key: str, value: Any) -> None:
13031309
"""__setitem__ is not supported in v3"""
13041310
raise NotImplementedError
13051311

1312+
def __repr__(self) -> str:
1313+
return f"<Group {self.store_path}>"
1314+
13061315
async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group:
13071316
new_metadata = replace(self.metadata, attributes=new_attributes)
13081317

@@ -1377,6 +1386,9 @@ def members(self, max_depth: int | None = 0) -> tuple[tuple[str, Array | Group],
13771386

13781387
return tuple((kv[0], _parse_async_node(kv[1])) for kv in _members)
13791388

1389+
def keys(self) -> Generator[str, None]:
1390+
yield from self._sync_iter(self._async_group.keys())
1391+
13801392
def __contains__(self, member: str) -> bool:
13811393
return self._sync(self._async_group.contains(member))
13821394

src/zarr/storage/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from zarr.storage.common import StoreLike, StorePath, make_store_path
2+
from zarr.storage.local import LocalStore
3+
from zarr.storage.memory import MemoryStore
4+
from zarr.storage.remote import RemoteStore
5+
from zarr.storage.zip import ZipStore
6+
7+
__all__ = [
8+
"LocalStore",
9+
"MemoryStore",
10+
"RemoteStore",
11+
"StoreLike",
12+
"StorePath",
13+
"ZipStore",
14+
"make_store_path",
15+
]
File renamed without changes.

0 commit comments

Comments
 (0)