Skip to content

Commit 238622c

Browse files
authored
Merge branch 'main' into fix-indexing
2 parents 9245140 + 501ae9e commit 238622c

File tree

14 files changed

+261
-41
lines changed

14 files changed

+261
-41
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ default_language_version:
77
python: python3
88
repos:
99
- repo: https://github.com/astral-sh/ruff-pre-commit
10-
rev: v0.7.4
10+
rev: v0.8.1
1111
hooks:
1212
- id: ruff
1313
args: ["--fix", "--show-fixes"]

pyproject.toml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -294,19 +294,14 @@ extend-select = [
294294
"W", # pycodestyle warnings
295295
]
296296
ignore = [
297-
"ANN101", # deprecated
298-
"ANN102", # deprecated
299297
"ANN401",
300-
"PT004", # deprecated
301-
"PT005", # deprecated
302298
"PT011", # TODO: apply this rule
303299
"PT012", # TODO: apply this rule
304300
"RET505",
305301
"RET506",
306302
"RUF005",
307303
"SIM108",
308304
"TRY003",
309-
"UP027", # deprecated
310305
"UP038", # https://github.com/astral-sh/ruff/issues/7871
311306
# https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
312307
"W191",

src/zarr/codecs/_v2.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing import TYPE_CHECKING
66

77
import numcodecs
8-
from numcodecs.compat import ensure_ndarray_like
8+
from numcodecs.compat import ensure_bytes, ensure_ndarray_like
99

1010
from zarr.abc.codec import ArrayBytesCodec
1111
from zarr.registry import get_ndbuffer_class
@@ -68,6 +68,9 @@ async def _encode_single(
6868
) -> Buffer | None:
6969
chunk = chunk_array.as_ndarray_like()
7070

71+
# ensure contiguous and correct order
72+
chunk = chunk.astype(chunk_spec.dtype, order=chunk_spec.order, copy=False)
73+
7174
# apply filters
7275
if self.filters:
7376
for f in self.filters:
@@ -83,6 +86,7 @@ async def _encode_single(
8386
else:
8487
cdata = chunk
8588

89+
cdata = ensure_bytes(cdata)
8690
return chunk_spec.prototype.buffer.from_bytes(cdata)
8791

8892
def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:

src/zarr/core/array.py

Lines changed: 54 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -810,34 +810,30 @@ def path(self) -> str:
810810
return self.store_path.path
811811

812812
@property
813-
def name(self) -> str | None:
813+
def name(self) -> str:
814814
"""Array name following h5py convention.
815815
816816
Returns
817817
-------
818818
str
819819
The name of the array.
820820
"""
821-
if self.path:
822-
# follow h5py convention: add leading slash
823-
name = self.path
824-
if name[0] != "/":
825-
name = "/" + name
826-
return name
827-
return None
821+
# follow h5py convention: add leading slash
822+
name = self.path
823+
if not name.startswith("/"):
824+
name = "/" + name
825+
return name
828826

829827
@property
830-
def basename(self) -> str | None:
828+
def basename(self) -> str:
831829
"""Final component of name.
832830
833831
Returns
834832
-------
835833
str
836834
The basename or final component of the array name.
837835
"""
838-
if self.name is not None:
839-
return self.name.split("/")[-1]
840-
return None
836+
return self.name.split("/")[-1]
841837

842838
@property
843839
def cdata_shape(self) -> ChunkCoords:
@@ -1350,18 +1346,53 @@ def info(self) -> Any:
13501346
AsyncArray.info_complete
13511347
All information about a group, including dynamic information
13521348
like the number of bytes and chunks written.
1349+
1350+
Examples
1351+
--------
1352+
1353+
>>> arr = await zarr.api.asynchronous.create(
1354+
... path="array", shape=(3, 4, 5), chunks=(2, 2, 2))
1355+
... )
1356+
>>> arr.info
1357+
Type : Array
1358+
Zarr format : 3
1359+
Data type : DataType.float64
1360+
Shape : (3, 4, 5)
1361+
Chunk shape : (2, 2, 2)
1362+
Order : C
1363+
Read-only : False
1364+
Store type : MemoryStore
1365+
Codecs : [{'endian': <Endian.little: 'little'>}]
1366+
No. bytes : 480
13531367
"""
13541368
return self._info()
13551369

13561370
async def info_complete(self) -> Any:
1357-
# TODO: get the size of the object from the store.
1358-
extra = {
1359-
"count_chunks_initialized": await self.nchunks_initialized(),
1360-
# count_bytes_stored isn't yet implemented.
1361-
}
1362-
return self._info(extra=extra)
1363-
1364-
def _info(self, extra: dict[str, int] | None = None) -> Any:
1371+
"""
1372+
Return all the information for an array, including dynamic information like a storage size.
1373+
1374+
In addition to the static information, this provides
1375+
1376+
- The count of chunks initialized
1377+
- The sum of the bytes written
1378+
1379+
Returns
1380+
-------
1381+
ArrayInfo
1382+
1383+
See Also
1384+
--------
1385+
AsyncArray.info
1386+
A property giving just the statically known information about an array.
1387+
"""
1388+
return self._info(
1389+
await self.nchunks_initialized(),
1390+
await self.store_path.store.getsize_prefix(self.store_path.path),
1391+
)
1392+
1393+
def _info(
1394+
self, count_chunks_initialized: int | None = None, count_bytes_stored: int | None = None
1395+
) -> Any:
13651396
kwargs: dict[str, Any] = {}
13661397
if self.metadata.zarr_format == 2:
13671398
assert isinstance(self.metadata, ArrayV2Metadata)
@@ -1390,6 +1421,8 @@ def _info(self, extra: dict[str, int] | None = None) -> Any:
13901421
_read_only=self.read_only,
13911422
_store_type=type(self.store_path.store).__name__,
13921423
_count_bytes=self.dtype.itemsize * self.size,
1424+
_count_bytes_stored=count_bytes_stored,
1425+
_count_chunks_initialized=count_chunks_initialized,
13931426
**kwargs,
13941427
)
13951428

@@ -1626,8 +1659,7 @@ def path(self) -> str:
16261659
return self._async_array.path
16271660

16281661
@property
1629-
def name(self) -> str | None:
1630-
"""Array name following h5py convention."""
1662+
def name(self) -> str:
16311663
return self._async_array.name
16321664

16331665
@property

src/zarr/core/buffer/core.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,13 @@ def reshape(
8080

8181
def view(self, dtype: npt.DTypeLike) -> Self: ...
8282

83-
def astype(self, dtype: npt.DTypeLike, order: Literal["K", "A", "C", "F"] = ...) -> Self: ...
83+
def astype(
84+
self,
85+
dtype: npt.DTypeLike,
86+
order: Literal["K", "A", "C", "F"] = ...,
87+
*,
88+
copy: bool = ...,
89+
) -> Self: ...
8490

8591
def fill(self, value: Any) -> None: ...
8692

src/zarr/core/group.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ async def from_store(
434434
async def open(
435435
cls,
436436
store: StoreLike,
437-
zarr_format: Literal[2, 3, None] = 3,
437+
zarr_format: Literal[2, 3] | None = 3,
438438
use_consolidated: bool | str | None = None,
439439
) -> AsyncGroup:
440440
"""Open a new AsyncGroup
@@ -1691,7 +1691,7 @@ def from_store(
16911691
def open(
16921692
cls,
16931693
store: StoreLike,
1694-
zarr_format: Literal[2, 3, None] = 3,
1694+
zarr_format: Literal[2, 3] | None = 3,
16951695
) -> Group:
16961696
"""Open a group from an initialized store.
16971697

src/zarr/core/indexing.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,8 +1346,15 @@ def decode_morton(z: int, chunk_shape: ChunkCoords) -> ChunkCoords:
13461346

13471347

13481348
def morton_order_iter(chunk_shape: ChunkCoords) -> Iterator[ChunkCoords]:
1349-
for i in range(product(chunk_shape)):
1350-
yield decode_morton(i, chunk_shape)
1349+
i = 0
1350+
order: list[ChunkCoords] = []
1351+
while len(order) < product(chunk_shape):
1352+
m = decode_morton(i, chunk_shape)
1353+
if m not in order and all(x < y for x, y in zip(m, chunk_shape, strict=False)):
1354+
order.append(m)
1355+
i += 1
1356+
for j in range(product(chunk_shape)):
1357+
yield order[j]
13511358

13521359

13531360
def c_order_iter(chunks_per_shard: ChunkCoords) -> Iterator[ChunkCoords]:

src/zarr/testing/strategies.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ def arrays(
153153
assert isinstance(a, Array)
154154
if a.metadata.zarr_format == 3:
155155
assert a.fill_value is not None
156+
assert a.name is not None
156157
assert isinstance(root[array_path], Array)
157158
assert nparray.shape == a.shape
158159
assert chunks == a.chunks

tests/test_array.py

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import dataclasses
12
import json
23
import math
34
import pickle
@@ -122,8 +123,8 @@ def test_array_name_properties_no_group(
122123
) -> None:
123124
arr = Array.create(store=store, shape=(100,), chunks=(10,), zarr_format=zarr_format, dtype="i4")
124125
assert arr.path == ""
125-
assert arr.name is None
126-
assert arr.basename is None
126+
assert arr.name == "/"
127+
assert arr.basename == ""
127128

128129

129130
@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
@@ -474,6 +475,87 @@ def test_info_v3(self) -> None:
474475
)
475476
assert result == expected
476477

478+
def test_info_complete(self) -> None:
479+
arr = zarr.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
480+
result = arr.info_complete()
481+
expected = ArrayInfo(
482+
_zarr_format=3,
483+
_data_type=DataType.parse("float64"),
484+
_shape=(4, 4),
485+
_chunk_shape=(2, 2),
486+
_order="C",
487+
_read_only=False,
488+
_store_type="MemoryStore",
489+
_codecs=[BytesCodec()],
490+
_count_bytes=128,
491+
_count_chunks_initialized=0,
492+
_count_bytes_stored=373, # the metadata?
493+
)
494+
assert result == expected
495+
496+
arr[:2, :2] = 10
497+
result = arr.info_complete()
498+
expected = dataclasses.replace(
499+
expected, _count_chunks_initialized=1, _count_bytes_stored=405
500+
)
501+
assert result == expected
502+
503+
async def test_info_v2_async(self) -> None:
504+
arr = await zarr.api.asynchronous.create(shape=(4, 4), chunks=(2, 2), zarr_format=2)
505+
result = arr.info
506+
expected = ArrayInfo(
507+
_zarr_format=2,
508+
_data_type=np.dtype("float64"),
509+
_shape=(4, 4),
510+
_chunk_shape=(2, 2),
511+
_order="C",
512+
_read_only=False,
513+
_store_type="MemoryStore",
514+
_count_bytes=128,
515+
)
516+
assert result == expected
517+
518+
async def test_info_v3_async(self) -> None:
519+
arr = await zarr.api.asynchronous.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
520+
result = arr.info
521+
expected = ArrayInfo(
522+
_zarr_format=3,
523+
_data_type=DataType.parse("float64"),
524+
_shape=(4, 4),
525+
_chunk_shape=(2, 2),
526+
_order="C",
527+
_read_only=False,
528+
_store_type="MemoryStore",
529+
_codecs=[BytesCodec()],
530+
_count_bytes=128,
531+
)
532+
assert result == expected
533+
534+
async def test_info_complete_async(self) -> None:
535+
arr = await zarr.api.asynchronous.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
536+
result = await arr.info_complete()
537+
expected = ArrayInfo(
538+
_zarr_format=3,
539+
_data_type=DataType.parse("float64"),
540+
_shape=(4, 4),
541+
_chunk_shape=(2, 2),
542+
_order="C",
543+
_read_only=False,
544+
_store_type="MemoryStore",
545+
_codecs=[BytesCodec()],
546+
_count_bytes=128,
547+
_count_chunks_initialized=0,
548+
_count_bytes_stored=373, # the metadata?
549+
)
550+
assert result == expected
551+
552+
await arr.setitem((slice(2), slice(2)), 10)
553+
result = await arr.info_complete()
554+
expected = dataclasses.replace(
555+
expected, _count_chunks_initialized=1, _count_bytes_stored=405
556+
)
557+
assert result == expected
558+
477559

478560
@pytest.mark.parametrize("store", ["memory"], indirect=True)
479561
@pytest.mark.parametrize("zarr_format", [2, 3])

tests/test_codecs/test_codecs.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,26 @@ def test_morton() -> None:
204204
]
205205

206206

207+
@pytest.mark.parametrize(
208+
"shape",
209+
[
210+
[2, 2, 2],
211+
[5, 2],
212+
[2, 5],
213+
[2, 9, 2],
214+
[3, 2, 12],
215+
[2, 5, 1],
216+
[4, 3, 6, 2, 7],
217+
[3, 2, 1, 6, 4, 5, 2],
218+
],
219+
)
220+
def test_morton2(shape) -> None:
221+
order = list(morton_order_iter(shape))
222+
for i, x in enumerate(order):
223+
assert x not in order[:i] # no duplicates
224+
assert all(x[j] < shape[j] for j in range(len(shape))) # all indices are within bounds
225+
226+
207227
@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
208228
def test_write_partial_chunks(store: Store) -> None:
209229
data = np.arange(0, 256, dtype="uint16").reshape((16, 16))

0 commit comments

Comments
 (0)