Skip to content

Commit 63bafe9

Browse files
authored
Merge branch 'main' into gh-complete
2 parents a23c453 + f8e3432 commit 63bafe9

File tree

6 files changed

+185
-11
lines changed

6 files changed

+185
-11
lines changed

src/zarr/core/array.py

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,18 +1346,53 @@ def info(self) -> Any:
13461346
AsyncArray.info_complete
13471347
All information about a group, including dynamic information
13481348
like the number of bytes and chunks written.
1349+
1350+
Examples
1351+
--------
1352+
1353+
>>> arr = await zarr.api.asynchronous.create(
1354+
... path="array", shape=(3, 4, 5), chunks=(2, 2, 2))
1355+
... )
1356+
>>> arr.info
1357+
Type : Array
1358+
Zarr format : 3
1359+
Data type : DataType.float64
1360+
Shape : (3, 4, 5)
1361+
Chunk shape : (2, 2, 2)
1362+
Order : C
1363+
Read-only : False
1364+
Store type : MemoryStore
1365+
Codecs : [{'endian': <Endian.little: 'little'>}]
1366+
No. bytes : 480
13491367
"""
13501368
return self._info()
13511369

13521370
async def info_complete(self) -> Any:
1353-
# TODO: get the size of the object from the store.
1354-
extra = {
1355-
"count_chunks_initialized": await self.nchunks_initialized(),
1356-
# count_bytes_stored isn't yet implemented.
1357-
}
1358-
return self._info(extra=extra)
1359-
1360-
def _info(self, extra: dict[str, int] | None = None) -> Any:
1371+
"""
1372+
Return all the information for an array, including dynamic information like a storage size.
1373+
1374+
In addition to the static information, this provides
1375+
1376+
- The count of chunks initialized
1377+
- The sum of the bytes written
1378+
1379+
Returns
1380+
-------
1381+
ArrayInfo
1382+
1383+
See Also
1384+
--------
1385+
AsyncArray.info
1386+
A property giving just the statically known information about an array.
1387+
"""
1388+
return self._info(
1389+
await self.nchunks_initialized(),
1390+
await self.store_path.store.getsize_prefix(self.store_path.path),
1391+
)
1392+
1393+
def _info(
1394+
self, count_chunks_initialized: int | None = None, count_bytes_stored: int | None = None
1395+
) -> Any:
13611396
kwargs: dict[str, Any] = {}
13621397
if self.metadata.zarr_format == 2:
13631398
assert isinstance(self.metadata, ArrayV2Metadata)
@@ -1386,6 +1421,8 @@ def _info(self, extra: dict[str, int] | None = None) -> Any:
13861421
_read_only=self.read_only,
13871422
_store_type=type(self.store_path.store).__name__,
13881423
_count_bytes=self.dtype.itemsize * self.size,
1424+
_count_bytes_stored=count_bytes_stored,
1425+
_count_chunks_initialized=count_chunks_initialized,
13891426
**kwargs,
13901427
)
13911428

src/zarr/core/indexing.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,8 +1346,15 @@ def decode_morton(z: int, chunk_shape: ChunkCoords) -> ChunkCoords:
13461346

13471347

13481348
def morton_order_iter(chunk_shape: ChunkCoords) -> Iterator[ChunkCoords]:
1349-
for i in range(product(chunk_shape)):
1350-
yield decode_morton(i, chunk_shape)
1349+
i = 0
1350+
order: list[ChunkCoords] = []
1351+
while len(order) < product(chunk_shape):
1352+
m = decode_morton(i, chunk_shape)
1353+
if m not in order and all(x < y for x, y in zip(m, chunk_shape, strict=False)):
1354+
order.append(m)
1355+
i += 1
1356+
for j in range(product(chunk_shape)):
1357+
yield order[j]
13511358

13521359

13531360
def c_order_iter(chunks_per_shard: ChunkCoords) -> Iterator[ChunkCoords]:

tests/test_array.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import dataclasses
12
import json
23
import math
34
import pickle
@@ -474,6 +475,87 @@ def test_info_v3(self) -> None:
474475
)
475476
assert result == expected
476477

478+
def test_info_complete(self) -> None:
479+
arr = zarr.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
480+
result = arr.info_complete()
481+
expected = ArrayInfo(
482+
_zarr_format=3,
483+
_data_type=DataType.parse("float64"),
484+
_shape=(4, 4),
485+
_chunk_shape=(2, 2),
486+
_order="C",
487+
_read_only=False,
488+
_store_type="MemoryStore",
489+
_codecs=[BytesCodec()],
490+
_count_bytes=128,
491+
_count_chunks_initialized=0,
492+
_count_bytes_stored=373, # the metadata?
493+
)
494+
assert result == expected
495+
496+
arr[:2, :2] = 10
497+
result = arr.info_complete()
498+
expected = dataclasses.replace(
499+
expected, _count_chunks_initialized=1, _count_bytes_stored=405
500+
)
501+
assert result == expected
502+
503+
async def test_info_v2_async(self) -> None:
504+
arr = await zarr.api.asynchronous.create(shape=(4, 4), chunks=(2, 2), zarr_format=2)
505+
result = arr.info
506+
expected = ArrayInfo(
507+
_zarr_format=2,
508+
_data_type=np.dtype("float64"),
509+
_shape=(4, 4),
510+
_chunk_shape=(2, 2),
511+
_order="C",
512+
_read_only=False,
513+
_store_type="MemoryStore",
514+
_count_bytes=128,
515+
)
516+
assert result == expected
517+
518+
async def test_info_v3_async(self) -> None:
519+
arr = await zarr.api.asynchronous.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
520+
result = arr.info
521+
expected = ArrayInfo(
522+
_zarr_format=3,
523+
_data_type=DataType.parse("float64"),
524+
_shape=(4, 4),
525+
_chunk_shape=(2, 2),
526+
_order="C",
527+
_read_only=False,
528+
_store_type="MemoryStore",
529+
_codecs=[BytesCodec()],
530+
_count_bytes=128,
531+
)
532+
assert result == expected
533+
534+
async def test_info_complete_async(self) -> None:
535+
arr = await zarr.api.asynchronous.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
536+
result = await arr.info_complete()
537+
expected = ArrayInfo(
538+
_zarr_format=3,
539+
_data_type=DataType.parse("float64"),
540+
_shape=(4, 4),
541+
_chunk_shape=(2, 2),
542+
_order="C",
543+
_read_only=False,
544+
_store_type="MemoryStore",
545+
_codecs=[BytesCodec()],
546+
_count_bytes=128,
547+
_count_chunks_initialized=0,
548+
_count_bytes_stored=373, # the metadata?
549+
)
550+
assert result == expected
551+
552+
await arr.setitem((slice(2), slice(2)), 10)
553+
result = await arr.info_complete()
554+
expected = dataclasses.replace(
555+
expected, _count_chunks_initialized=1, _count_bytes_stored=405
556+
)
557+
assert result == expected
558+
477559

478560
@pytest.mark.parametrize("store", ["memory"], indirect=True)
479561
@pytest.mark.parametrize("zarr_format", [2, 3])

tests/test_codecs/test_codecs.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,26 @@ def test_morton() -> None:
204204
]
205205

206206

207+
@pytest.mark.parametrize(
208+
"shape",
209+
[
210+
[2, 2, 2],
211+
[5, 2],
212+
[2, 5],
213+
[2, 9, 2],
214+
[3, 2, 12],
215+
[2, 5, 1],
216+
[4, 3, 6, 2, 7],
217+
[3, 2, 1, 6, 4, 5, 2],
218+
],
219+
)
220+
def test_morton2(shape) -> None:
221+
order = list(morton_order_iter(shape))
222+
for i, x in enumerate(order):
223+
assert x not in order[:i] # no duplicates
224+
assert all(x[j] < shape[j] for j in range(len(shape))) # all indices are within bounds
225+
226+
207227
@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
208228
def test_write_partial_chunks(store: Store) -> None:
209229
data = np.arange(0, 256, dtype="uint16").reshape((16, 16))

tests/test_codecs/test_sharding.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,3 +393,32 @@ async def test_sharding_with_empty_inner_chunk(
393393
print("read data")
394394
data_read = await a.getitem(...)
395395
assert np.array_equal(data_read, data)
396+
397+
398+
@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
399+
@pytest.mark.parametrize(
400+
"index_location",
401+
[ShardingCodecIndexLocation.start, ShardingCodecIndexLocation.end],
402+
)
403+
@pytest.mark.parametrize("chunks_per_shard", [(5, 2), (2, 5), (5, 5)])
404+
async def test_sharding_with_chunks_per_shard(
405+
store: Store, index_location: ShardingCodecIndexLocation, chunks_per_shard: tuple[int]
406+
) -> None:
407+
chunk_shape = (2, 1)
408+
shape = [x * y for x, y in zip(chunks_per_shard, chunk_shape, strict=False)]
409+
data = np.ones(np.prod(shape), dtype="int32").reshape(shape)
410+
fill_value = 42
411+
412+
path = f"test_sharding_with_chunks_per_shard_{index_location}"
413+
spath = StorePath(store, path)
414+
a = Array.create(
415+
spath,
416+
shape=shape,
417+
chunk_shape=shape,
418+
dtype="int32",
419+
fill_value=fill_value,
420+
codecs=[ShardingCodec(chunk_shape=chunk_shape, index_location=index_location)],
421+
)
422+
a[...] = data
423+
data_read = a[...]
424+
assert np.array_equal(data_read, data)

tests/test_store/test_logging.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import pytest
66

77
import zarr
8-
import zarr.storage
98
from zarr.core.buffer import default_buffer_prototype
109
from zarr.storage.logging import LoggingStore
1110

0 commit comments

Comments
 (0)