Skip to content

Commit 2296c3f

Browse files
committed
Merge branch 'main' of https://github.com/zarr-developers/zarr-python into refactor/store-mode
2 parents 62939b6 + 2fa0082 commit 2296c3f

File tree

10 files changed

+776
-22
lines changed

10 files changed

+776
-22
lines changed

.github/workflows/releases.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ jobs:
5555
with:
5656
name: releases
5757
path: dist
58-
- uses: pypa/gh-action-pypi-publish@v1.11.0
58+
- uses: pypa/gh-action-pypi-publish@v1.12.2
5959
with:
6060
user: __token__
6161
password: ${{ secrets.pypi_password }}

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ default_language_version:
77
python: python3
88
repos:
99
- repo: https://github.com/astral-sh/ruff-pre-commit
10-
rev: v0.7.2
10+
rev: v0.7.3
1111
hooks:
1212
- id: ruff
1313
args: ["--fix", "--show-fixes"]

src/zarr/core/_info.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import dataclasses
2+
import textwrap
3+
from typing import Any, Literal
4+
5+
import numcodecs.abc
6+
import numpy as np
7+
8+
from zarr.abc.codec import Codec
9+
from zarr.core.metadata.v3 import DataType
10+
11+
12+
@dataclasses.dataclass(kw_only=True)
13+
class GroupInfo:
14+
"""
15+
Visual summary for a Group.
16+
17+
Note that this method and its properties is not part of
18+
Zarr's public API.
19+
"""
20+
21+
_name: str
22+
_type: Literal["Group"] = "Group"
23+
_zarr_format: Literal[2, 3]
24+
_read_only: bool
25+
_store_type: str
26+
_count_members: int | None = None
27+
_count_arrays: int | None = None
28+
_count_groups: int | None = None
29+
30+
def __repr__(self) -> str:
31+
template = textwrap.dedent("""\
32+
Name : {_name}
33+
Type : {_type}
34+
Zarr format : {_zarr_format}
35+
Read-only : {_read_only}
36+
Store type : {_store_type}""")
37+
38+
if self._count_members is not None:
39+
template += "\nNo. members : {_count_members}"
40+
if self._count_arrays is not None:
41+
template += "\nNo. arrays : {_count_arrays}"
42+
if self._count_groups is not None:
43+
template += "\nNo. groups : {_count_groups}"
44+
return template.format(**dataclasses.asdict(self))
45+
46+
47+
def human_readable_size(size: int) -> str:
48+
if size < 2**10:
49+
return f"{size}"
50+
elif size < 2**20:
51+
return f"{size / float(2**10):.1f}K"
52+
elif size < 2**30:
53+
return f"{size / float(2**20):.1f}M"
54+
elif size < 2**40:
55+
return f"{size / float(2**30):.1f}G"
56+
elif size < 2**50:
57+
return f"{size / float(2**40):.1f}T"
58+
else:
59+
return f"{size / float(2**50):.1f}P"
60+
61+
62+
def byte_info(size: int) -> str:
63+
if size < 2**10:
64+
return str(size)
65+
else:
66+
return f"{size} ({human_readable_size(size)})"
67+
68+
69+
@dataclasses.dataclass(kw_only=True)
70+
class ArrayInfo:
71+
"""
72+
Visual summary for an Array.
73+
74+
Note that this method and its properties is not part of
75+
Zarr's public API.
76+
"""
77+
78+
_type: Literal["Array"] = "Array"
79+
_zarr_format: Literal[2, 3]
80+
_data_type: np.dtype[Any] | DataType
81+
_shape: tuple[int, ...]
82+
_chunk_shape: tuple[int, ...] | None = None
83+
_order: Literal["C", "F"]
84+
_read_only: bool
85+
_store_type: str
86+
_compressor: numcodecs.abc.Codec | None = None
87+
_filters: tuple[numcodecs.abc.Codec, ...] | None = None
88+
_codecs: list[Codec] | None = None
89+
_count_bytes: int | None = None
90+
_count_bytes_stored: int | None = None
91+
_count_chunks_initialized: int | None = None
92+
93+
def __repr__(self) -> str:
94+
template = textwrap.dedent("""\
95+
Type : {_type}
96+
Zarr format : {_zarr_format}
97+
Data type : {_data_type}
98+
Shape : {_shape}
99+
Chunk shape : {_chunk_shape}
100+
Order : {_order}
101+
Read-only : {_read_only}
102+
Store type : {_store_type}""")
103+
104+
kwargs = dataclasses.asdict(self)
105+
if self._chunk_shape is None:
106+
# for non-regular chunk grids
107+
kwargs["chunk_shape"] = "<variable>"
108+
if self._compressor is not None:
109+
template += "\nCompressor : {_compressor}"
110+
111+
if self._filters is not None:
112+
template += "\nFilters : {_filters}"
113+
114+
if self._codecs is not None:
115+
template += "\nCodecs : {_codecs}"
116+
117+
if self._count_bytes is not None:
118+
template += "\nNo. bytes : {_count_bytes}"
119+
kwargs["_count_bytes"] = byte_info(self._count_bytes)
120+
121+
if self._count_bytes_stored is not None:
122+
template += "\nNo. bytes stored : {_count_bytes_stored}"
123+
kwargs["_count_stored"] = byte_info(self._count_bytes_stored)
124+
125+
if (
126+
self._count_bytes is not None
127+
and self._count_bytes_stored is not None
128+
and self._count_bytes_stored > 0
129+
):
130+
template += "\nStorage ratio : {_storage_ratio}"
131+
kwargs["_storage_ratio"] = f"{self._count_bytes / self._count_bytes_stored:.1f}"
132+
133+
if self._count_chunks_initialized is not None:
134+
template += "\nChunks Initialized : {_count_chunks_initialized}"
135+
return template.format(**kwargs)

src/zarr/core/array.py

Lines changed: 110 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from zarr.abc.store import Store, set_or_delete
1515
from zarr.codecs import _get_default_array_bytes_codec
1616
from zarr.codecs._v2 import V2Codec
17+
from zarr.core._info import ArrayInfo
1718
from zarr.core.attributes import Attributes
1819
from zarr.core.buffer import (
1920
BufferPrototype,
@@ -1332,9 +1333,65 @@ async def update_attributes(self, new_attributes: dict[str, JSON]) -> Self:
13321333
def __repr__(self) -> str:
13331334
return f"<AsyncArray {self.store_path} shape={self.shape} dtype={self.dtype}>"
13341335

1335-
async def info(self) -> None:
1336+
@property
1337+
def info(self) -> Any:
1338+
"""
1339+
Return the statically known information for an array.
1340+
1341+
Returns
1342+
-------
1343+
ArrayInfo
1344+
1345+
See Also
1346+
--------
1347+
AsyncArray.info_complete
1348+
All information about a group, including dynamic information
1349+
like the number of bytes and chunks written.
1350+
"""
1351+
return self._info()
1352+
1353+
async def info_complete(self) -> Any:
1354+
# TODO: get the size of the object from the store.
1355+
extra = {
1356+
"count_chunks_initialized": await self.nchunks_initialized(),
1357+
# count_bytes_stored isn't yet implemented.
1358+
}
1359+
return self._info(extra=extra)
1360+
13361361
raise NotImplementedError
13371362

1363+
def _info(self, extra: dict[str, int] | None = None) -> Any:
1364+
kwargs: dict[str, Any] = {}
1365+
if self.metadata.zarr_format == 2:
1366+
assert isinstance(self.metadata, ArrayV2Metadata)
1367+
if self.metadata.compressor is not None:
1368+
kwargs["_compressor"] = self.metadata.compressor
1369+
if self.metadata.filters is not None:
1370+
kwargs["_filters"] = self.metadata.filters
1371+
kwargs["_data_type"] = self.metadata.dtype
1372+
kwargs["_chunk_shape"] = self.metadata.chunks
1373+
else:
1374+
kwargs["_codecs"] = self.metadata.codecs
1375+
kwargs["_data_type"] = self.metadata.data_type
1376+
# just regular?
1377+
chunk_grid = self.metadata.chunk_grid
1378+
if isinstance(chunk_grid, RegularChunkGrid):
1379+
kwargs["_chunk_shape"] = chunk_grid.chunk_shape
1380+
else:
1381+
raise NotImplementedError(
1382+
"'info' is not yet implemented for chunk grids of type {type(self.metadata.chunk_grid)}"
1383+
)
1384+
1385+
return ArrayInfo(
1386+
_zarr_format=self.metadata.zarr_format,
1387+
_shape=self.shape,
1388+
_order=self.order,
1389+
_read_only=self.read_only,
1390+
_store_type=type(self.store_path.store).__name__,
1391+
_count_bytes=self.dtype.itemsize * self.size,
1392+
**kwargs,
1393+
)
1394+
13381395

13391396
# TODO: Array can be a frozen data class again once property setters (e.g. shape) are removed
13401397
@dataclass(frozen=False)
@@ -3099,10 +3156,58 @@ def update_attributes(self, new_attributes: dict[str, JSON]) -> Array:
30993156
def __repr__(self) -> str:
31003157
return f"<Array {self.store_path} shape={self.shape} dtype={self.dtype}>"
31013158

3102-
def info(self) -> None:
3103-
return sync(
3104-
self._async_array.info(),
3105-
)
3159+
@property
3160+
def info(self) -> Any:
3161+
"""
3162+
Return the statically known information for an array.
3163+
3164+
Returns
3165+
-------
3166+
ArrayInfo
3167+
3168+
See Also
3169+
--------
3170+
Array.info_complete
3171+
All information about a group, including dynamic information
3172+
like the number of bytes and chunks written.
3173+
3174+
Examples
3175+
--------
3176+
>>> arr = zarr.create(shape=(10,), chunks=(2,), dtype="float32")
3177+
>>> arr.info
3178+
Type : Array
3179+
Zarr format : 3
3180+
Data type : DataType.float32
3181+
Shape : (10,)
3182+
Chunk shape : (2,)
3183+
Order : C
3184+
Read-only : False
3185+
Store type : MemoryStore
3186+
Codecs : [BytesCodec(endian=<Endian.little: 'little'>)]
3187+
No. bytes : 40
3188+
"""
3189+
return self._async_array.info
3190+
3191+
def info_complete(self) -> Any:
3192+
"""
3193+
Returns all the information about an array, including information from the Store.
3194+
3195+
In addition to the statically known information like ``name`` and ``zarr_format``,
3196+
this includes additional information like the size of the array in bytes and
3197+
the number of chunks written.
3198+
3199+
Note that this method will need to read metadata from the store.
3200+
3201+
Returns
3202+
-------
3203+
ArrayInfo
3204+
3205+
See Also
3206+
--------
3207+
Array.info
3208+
The statically known subset of metadata about an array.
3209+
"""
3210+
return sync(self._async_array.info_complete())
31063211

31073212

31083213
async def chunks_initialized(

0 commit comments

Comments
 (0)