Skip to content

Commit b94bff2

Browse files
committed
Basic array
1 parent a6ef792 commit b94bff2

File tree

5 files changed

+211
-43
lines changed

5 files changed

+211
-43
lines changed

src/zarr/_info.py

Lines changed: 86 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
import textwrap
33
from typing import Literal
44

5-
import zarr.abc.store
6-
75
# Group
86
# Name : /
97
# Type : zarr.hierarchy.Group
@@ -14,26 +12,11 @@
1412
# No. groups : 0
1513

1614

17-
# In [19]: z.info
18-
# Out[19]:
19-
# Type : zarr.core.Array
20-
# Data type : int32
21-
# Shape : (1000000,)
22-
# Chunk shape : (100000,)
23-
# Order : C
24-
# Read-only : False
25-
# Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
26-
# Store type : zarr.storage.KVStore
27-
# No. bytes : 4000000 (3.8M)
28-
# No. bytes stored : 320
29-
# Storage ratio : 12500.0
30-
# Chunks initialized : 0/10
31-
32-
3315
@dataclasses.dataclass(kw_only=True)
3416
class GroupInfo:
3517
name: str
3618
type: Literal["Group"] = "Group"
19+
zarr_format: Literal[2, 3]
3720
read_only: bool
3821
store_type: str
3922
count_members: int | None = None
@@ -44,17 +27,95 @@ def __repr__(self) -> str:
4427
template = textwrap.dedent("""\
4528
Name : {name}
4629
Type : {type}
30+
Zarr format : {zarr_format}
4731
Read-only : {read_only}
4832
Store type : {store_type}""")
4933

5034
if self.count_members is not None:
51-
template += ("\nNo. members : {count_members}")
35+
template += "\nNo. members : {count_members}"
5236
if self.count_arrays is not None:
53-
template += ("\nNo. arrays : {count_arrays}")
37+
template += "\nNo. arrays : {count_arrays}"
5438
if self.count_groups is not None:
55-
template += ("\nNo. groups : {count_groups}")
56-
return template.format(
57-
**dataclasses.asdict(self)
58-
)
39+
template += "\nNo. groups : {count_groups}"
40+
return template.format(**dataclasses.asdict(self))
41+
42+
43+
def human_readable_size(size: int) -> str:
44+
if size < 2**10:
45+
return f"{size}"
46+
elif size < 2**20:
47+
return f"{size / float(2**10):.1f}K"
48+
elif size < 2**30:
49+
return f"{size / float(2**20):.1f}M"
50+
elif size < 2**40:
51+
return f"{size / float(2**30):.1f}G"
52+
elif size < 2**50:
53+
return f"{size / float(2**40):.1f}T"
54+
else:
55+
return f"{size / float(2**50):.1f}P"
56+
57+
58+
def byte_info(size: int) -> str:
59+
if size < 2**10:
60+
return size
61+
else:
62+
return f"{size} ({human_readable_size(size)})"
63+
64+
65+
@dataclasses.dataclass(kw_only=True)
66+
class ArrayInfo:
67+
type: Literal["Array"] = "Array"
68+
zarr_format: Literal[2, 3]
69+
data_type: str
70+
shape: tuple[int,]
71+
chunk_shape: tuple[int,]
72+
order: Literal["C", "F"]
73+
read_only: bool
74+
store_type: str
75+
compressor: str | None = None
76+
filters: list[str] | None = None
77+
codecs: list[str] | None = None
78+
count_bytes: int | None = None
79+
count_bytes_stored: int | None = None
80+
count_chunks_initialized: int | None = None
81+
82+
def __repr__(self) -> str:
83+
template = textwrap.dedent("""\
84+
Type : {type}
85+
Zarr format : {zarr_format}
86+
Data type : {data_type}
87+
Shape : {shape}
88+
Chunk shape : {chunk_shape}
89+
Order : {order}
90+
Read-only : {read_only}
91+
Store type : {store_type}""")
92+
93+
kwargs = dataclasses.asdict(self)
94+
if self.compressor is not None:
95+
template += "\nCompressor : {compressor}"
96+
97+
if self.filters is not None:
98+
template += "\nFilters : {filters}"
99+
100+
if self.codecs is not None:
101+
template += "\nCodecs : {codecs}"
102+
103+
if self.count_bytes is not None:
104+
template += "\nNo. bytes : {count_bytes}"
105+
kwargs["count_bytes"] = byte_info(self.count_bytes)
106+
107+
if self.count_bytes_stored is not None:
108+
template += "\nNo. bytes stored : {count_bytes_stored}"
109+
kwargs["count_stored"] = byte_info(self.count_bytes_stored)
110+
111+
if (
112+
self.count_bytes is not None
113+
and self.count_bytes_stored is not None
114+
and self.count_bytes_stored > 0
115+
):
116+
template += "\nStorage ratio : {storage_ratio}"
117+
kwargs["storage_ratio"] = f"{self.count_bytes / self.count_bytes_stored:.1f}"
59118

60-
# def _repr_html_(self): ...
119+
if self.count_chunks_initialized is not None:
120+
template += "\nChunks Initialized : {count_chunks_initialized}"
121+
return template.format(**kwargs)

src/zarr/core/array.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import numpy.typing as npt
1111

1212
from zarr._compat import _deprecate_positional_args
13+
from zarr._info import ArrayInfo
1314
from zarr.abc.store import Store, set_or_delete
1415
from zarr.codecs import _get_default_array_bytes_codec
1516
from zarr.codecs._v2 import V2Compressor, V2Filters
@@ -1146,11 +1147,27 @@ def __repr__(self) -> str:
11461147
return f"<AsyncArray {self.store_path} shape={self.shape} dtype={self.dtype}>"
11471148

11481149
@property
1149-
def info(self) -> ...:
1150-
...
1150+
def info(self) -> ArrayInfo:
1151+
kwargs = {}
1152+
if self.metadata.zarr_format == 2:
1153+
kwargs["compressor"] = self.metadata.compressor
1154+
kwargs["filters"] = self.metadata.filters
1155+
else:
1156+
kwargs["codecs"] = self.metadata.codecs
1157+
1158+
return ArrayInfo(
1159+
zarr_format=self.metadata.zarr_format,
1160+
data_type=self.metadata.data_type,
1161+
shape=self.shape,
1162+
chunk_shape=self.metadata.chunk_grid.chunk_shape,
1163+
order=self.order,
1164+
read_only=self.store_path.store.mode.readonly,
1165+
store_type=type(self.store_path.store).__name__,
1166+
**kwargs
1167+
)
11511168

11521169
async def info_full(self) -> None:
1153-
raise NotImplementedError
1170+
return ArrayInfo()
11541171

11551172

11561173
@dataclass(frozen=True)
@@ -2823,11 +2840,11 @@ def __repr__(self) -> str:
28232840
return f"<Array {self.store_path} shape={self.shape} dtype={self.dtype}>"
28242841

28252842
@property
2826-
def info(self) -> None:
2843+
def info(self) -> ArrayInfo:
28272844
return self._async_array.info
28282845

2829-
def info_full(self) -> None:
2830-
...
2846+
def info_complete(self) -> ArrayInfo:
2847+
return sync(self._async_array.info_complete)
28312848

28322849

28332850
def nchunks_initialized(

src/zarr/core/group.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
import numpy.typing as npt
1414
from typing_extensions import deprecated
1515

16-
from zarr._info import GroupInfo
1716
import zarr.api.asynchronous as async_api
17+
from zarr._info import GroupInfo
1818
from zarr.abc.metadata import Metadata
1919
from zarr.abc.store import Store, set_or_delete
2020
from zarr.core.array import Array, AsyncArray, _build_parents
@@ -804,7 +804,9 @@ async def info_complete(self) -> GroupInfo:
804804
members = [x[1].metadata async for x in self.members(max_depth=None)]
805805
return self._info(members=members)
806806

807-
def _info(self, members: list[ArrayV2Metadata | ArrayV3Metadata | GroupMetadata] | None = None) -> GroupInfo:
807+
def _info(
808+
self, members: list[ArrayV2Metadata | ArrayV3Metadata | GroupMetadata] | None = None
809+
) -> GroupInfo:
808810
kwargs = {}
809811
if members is not None:
810812
kwargs["count_members"] = len(members)
@@ -822,7 +824,8 @@ def _info(self, members: list[ArrayV2Metadata | ArrayV3Metadata | GroupMetadata]
822824
name=self.store_path.path,
823825
read_only=self.store_path.store.mode.readonly,
824826
store_type=type(self.store_path.store).__name__,
825-
**kwargs
827+
zarr_format=self.metadata.zarr_format,
828+
**kwargs,
826829
)
827830

828831
@property

tests/v3/test_group.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,17 @@
99
import pytest
1010

1111
import zarr
12-
from zarr._info import GroupInfo
1312
import zarr.api.asynchronous
1413
import zarr.api.synchronous
14+
import zarr.storage
1515
from zarr import Array, AsyncArray, AsyncGroup, Group
16+
from zarr._info import GroupInfo
1617
from zarr.abc.store import Store
1718
from zarr.core.buffer import default_buffer_prototype
1819
from zarr.core.group import ConsolidatedMetadata, GroupMetadata
1920
from zarr.core.sync import sync
2021
from zarr.errors import ContainsArrayError, ContainsGroupError
2122
from zarr.storage import LocalStore, MemoryStore, StorePath, ZipStore
22-
import zarr.storage
2323
from zarr.storage.common import make_store_path
2424

2525
from .conftest import parse_store
@@ -1343,6 +1343,7 @@ def test_info(self):
13431343
)
13441344
assert result == expected
13451345

1346+
13461347
def test_update_attrs() -> None:
13471348
# regression test for https://github.com/zarr-developers/zarr-python/issues/2328
13481349
root = Group.from_store(

tests/v3/test_info.py

Lines changed: 93 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,115 @@
11
import textwrap
2+
from typing import Literal
23

3-
from zarr._info import GroupInfo
4+
import pytest
45

6+
from zarr.core.common import ZarrFormat
7+
from zarr._info import ArrayInfo, GroupInfo
58

6-
def test_group_info_repr() -> None:
7-
info = GroupInfo(name="a", store_type="MemoryStore", read_only=False)
9+
10+
ZARR_FORMATS = [2, 3]
11+
12+
13+
@pytest.mark.parametrize("zarr_format", ZARR_FORMATS)
14+
def test_group_info_repr(zarr_format: ZarrFormat) -> None:
15+
info = GroupInfo(name="a", store_type="MemoryStore", read_only=False, zarr_format=zarr_format)
816
result = repr(info)
9-
expected = textwrap.dedent("""\
17+
expected = textwrap.dedent(f"""\
1018
Name : a
1119
Type : Group
20+
Zarr format : {zarr_format}
1221
Read-only : False
1322
Store type : MemoryStore""")
1423
assert result == expected
1524

1625

17-
def test_group_info_complete() -> None:
18-
info = GroupInfo(name="a", store_type="MemoryStore", read_only=False, count_arrays=10, count_groups=4, count_members=14)
26+
@pytest.mark.parametrize("zarr_format", ZARR_FORMATS)
27+
def test_group_info_complete(zarr_format: ZarrFormat) -> None:
28+
info = GroupInfo(
29+
name="a",
30+
store_type="MemoryStore",
31+
zarr_format=zarr_format,
32+
read_only=False,
33+
count_arrays=10,
34+
count_groups=4,
35+
count_members=14,
36+
)
1937
result = repr(info)
20-
expected = textwrap.dedent("""\
38+
expected = textwrap.dedent(f"""\
2139
Name : a
2240
Type : Group
41+
Zarr format : {zarr_format}
2342
Read-only : False
2443
Store type : MemoryStore
2544
No. members : 14
2645
No. arrays : 10
2746
No. groups : 4""")
2847
assert result == expected
2948

49+
50+
@pytest.mark.parametrize("zarr_format", ZARR_FORMATS)
51+
def test_array_info(zarr_format: ZarrFormat):
52+
info = ArrayInfo(
53+
zarr_format=zarr_format,
54+
data_type="int32",
55+
shape=(100, 100),
56+
chunk_shape=(10, 100),
57+
order="C",
58+
read_only=True,
59+
store_type="MemoryStore",
60+
codecs=["BytesCodec(endian=<Endian.little: 'little'>"],
61+
)
62+
result = repr(info)
63+
assert result == textwrap.dedent(f"""\
64+
Type : Array
65+
Zarr format : {zarr_format}
66+
Data type : int32
67+
Shape : (100, 100)
68+
Chunk shape : (10, 100)
69+
Order : C
70+
Read-only : True
71+
Store type : MemoryStore
72+
Codecs : ["BytesCodec(endian=<Endian.little: 'little'>"]""")
73+
74+
75+
@pytest.mark.parametrize("zarr_format", ZARR_FORMATS)
76+
@pytest.mark.parametrize("bytes_things", [(1_000_000, "976.6K", 500_000, "5", "2.0", 5)])
77+
def test_array_info_complete(
78+
zarr_format: ZarrFormat, bytes_things: tuple[int, str, int, str, str, int]
79+
):
80+
(
81+
count_bytes,
82+
count_bytes_formatted,
83+
count_bytes_stored,
84+
count_bytes_stored_formatted,
85+
storage_ratio_formatted,
86+
count_chunks_initialized,
87+
) = bytes_things
88+
info = ArrayInfo(
89+
zarr_format=zarr_format,
90+
data_type="int32",
91+
shape=(100, 100),
92+
chunk_shape=(10, 100),
93+
order="C",
94+
read_only=True,
95+
store_type="MemoryStore",
96+
codecs=["BytesCodec(endian=<Endian.little: 'little'>"],
97+
count_bytes=count_bytes,
98+
count_bytes_stored=count_bytes_stored,
99+
count_chunks_initialized=count_chunks_initialized,
100+
)
101+
result = repr(info)
102+
assert result == textwrap.dedent(f"""\
103+
Type : Array
104+
Zarr format : {zarr_format}
105+
Data type : int32
106+
Shape : (100, 100)
107+
Chunk shape : (10, 100)
108+
Order : C
109+
Read-only : True
110+
Store type : MemoryStore
111+
Codecs : ["BytesCodec(endian=<Endian.little: 'little'>"]
112+
No. bytes : 1000000 (976.6K)
113+
No. bytes stored : 500000
114+
Storage ratio : 2.0
115+
Chunks Initialized : 5""")

0 commit comments

Comments
 (0)