Skip to content

Commit 16bad38

Browse files
committed
Merge remote-tracking branch 'upstream/v3' into user/tom/fix/v2-compat
2 parents 559399e + 8c5038a commit 16bad38

File tree

19 files changed

+264
-164
lines changed

19 files changed

+264
-164
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ default_language_version:
77
python: python3
88
repos:
99
- repo: https://github.com/astral-sh/ruff-pre-commit
10-
rev: v0.6.4
10+
rev: v0.6.5
1111
hooks:
1212
- id: ruff
1313
args: ["--fix", "--show-fixes"]

src/zarr/abc/codec.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
from zarr.abc.store import ByteGetter, ByteSetter
1818
from zarr.core.array_spec import ArraySpec
1919
from zarr.core.chunk_grids import ChunkGrid
20-
from zarr.core.common import JSON
2120
from zarr.core.indexing import SelectorTuple
2221

2322
__all__ = [
@@ -242,7 +241,7 @@ async def encode_partial(
242241
)
243242

244243

245-
class CodecPipeline(Metadata):
244+
class CodecPipeline:
246245
"""Base class for implementing CodecPipeline.
247246
A CodecPipeline implements the read and write paths for chunk data.
248247
On the read path, it is responsible for fetching chunks from a store (via ByteGetter),
@@ -266,12 +265,12 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
266265

267266
@classmethod
268267
@abstractmethod
269-
def from_list(cls, codecs: Iterable[Codec]) -> Self:
270-
"""Creates a codec pipeline from a list of codecs.
268+
def from_codecs(cls, codecs: Iterable[Codec]) -> Self:
269+
"""Creates a codec pipeline from an iterable of codecs.
271270
272271
Parameters
273272
----------
274-
codecs : list[Codec]
273+
codecs : Iterable[Codec]
275274
276275
Returns
277276
-------
@@ -402,15 +401,6 @@ async def write(
402401
"""
403402
...
404403

405-
@classmethod
406-
def from_dict(cls, data: Iterable[JSON | Codec]) -> Self:
407-
"""
408-
Create an instance of the model from a dictionary
409-
"""
410-
...
411-
412-
return cls(**data)
413-
414404

415405
async def _batching_helper(
416406
func: Callable[[CodecInput, ArraySpec], Awaitable[CodecOutput | None]],

src/zarr/abc/metadata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
@dataclass(frozen=True)
1717
class Metadata:
18-
def to_dict(self) -> JSON:
18+
def to_dict(self) -> dict[str, JSON]:
1919
"""
2020
Recursively serialize this model to a dictionary.
2121
This method inspects the fields of self and calls `x.to_dict()` for any fields that

src/zarr/codecs/_v2.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec)
6767

6868
@dataclass(frozen=True)
6969
class V2Filters(ArrayArrayCodec):
70-
filters: list[dict[str, JSON]]
70+
filters: tuple[numcodecs.abc.Codec, ...] | None
7171

7272
is_fixed_size = False
7373

@@ -79,8 +79,7 @@ async def _decode_single(
7979
chunk_ndarray = chunk_array.as_ndarray_like()
8080
# apply filters in reverse order
8181
if self.filters is not None:
82-
for filter_metadata in self.filters[::-1]:
83-
filter = numcodecs.get_codec(filter_metadata)
82+
for filter in self.filters[::-1]:
8483
chunk_ndarray = await to_thread(filter.decode, chunk_ndarray)
8584

8685
# ensure correct chunk shape
@@ -99,9 +98,9 @@ async def _encode_single(
9998
) -> NDBuffer | None:
10099
chunk_ndarray = chunk_array.as_ndarray_like().ravel(order=chunk_spec.order)
101100

102-
for filter_metadata in self.filters:
103-
filter = numcodecs.get_codec(filter_metadata)
104-
chunk_ndarray = await to_thread(filter.encode, chunk_ndarray)
101+
if self.filters is not None:
102+
for filter in self.filters:
103+
chunk_ndarray = await to_thread(filter.encode, chunk_ndarray)
105104

106105
return get_ndbuffer_class().from_ndarray_like(chunk_ndarray)
107106

src/zarr/codecs/blosc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,9 @@ def to_dict(self) -> dict[str, JSON]:
127127
"name": "blosc",
128128
"configuration": {
129129
"typesize": self.typesize,
130-
"cname": self.cname,
130+
"cname": self.cname.value,
131131
"clevel": self.clevel,
132-
"shuffle": self.shuffle,
132+
"shuffle": self.shuffle.value,
133133
"blocksize": self.blocksize,
134134
},
135135
}

src/zarr/codecs/bytes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def to_dict(self) -> dict[str, JSON]:
5353
if self.endian is None:
5454
return {"name": "bytes"}
5555
else:
56-
return {"name": "bytes", "configuration": {"endian": self.endian}}
56+
return {"name": "bytes", "configuration": {"endian": self.endian.value}}
5757

5858
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
5959
if array_spec.dtype.itemsize == 0:

src/zarr/codecs/pipeline.py

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
from collections.abc import Iterable, Iterator
43
from dataclasses import dataclass
54
from itertools import islice, pairwise
65
from typing import TYPE_CHECKING, Any, TypeVar
@@ -15,12 +14,14 @@
1514
Codec,
1615
CodecPipeline,
1716
)
18-
from zarr.core.common import JSON, ChunkCoords, concurrent_map, parse_named_configuration
17+
from zarr.core.common import ChunkCoords, concurrent_map
1918
from zarr.core.config import config
2019
from zarr.core.indexing import SelectorTuple, is_scalar, is_total_slice
21-
from zarr.registry import get_codec_class, register_pipeline
20+
from zarr.registry import register_pipeline
2221

2322
if TYPE_CHECKING:
23+
from collections.abc import Iterable, Iterator
24+
2425
import numpy as np
2526
from typing_extensions import Self
2627

@@ -68,30 +69,11 @@ class BatchedCodecPipeline(CodecPipeline):
6869
bytes_bytes_codecs: tuple[BytesBytesCodec, ...]
6970
batch_size: int
7071

71-
@classmethod
72-
def from_dict(cls, data: Iterable[JSON | Codec], *, batch_size: int | None = None) -> Self:
73-
out: list[Codec] = []
74-
if not isinstance(data, Iterable):
75-
raise TypeError(f"Expected iterable, got {type(data)}")
76-
77-
for c in data:
78-
if isinstance(
79-
c, ArrayArrayCodec | ArrayBytesCodec | BytesBytesCodec
80-
): # Can't use Codec here because of mypy limitation
81-
out.append(c)
82-
else:
83-
name_parsed, _ = parse_named_configuration(c, require_configuration=False)
84-
out.append(get_codec_class(name_parsed).from_dict(c)) # type: ignore[arg-type]
85-
return cls.from_list(out, batch_size=batch_size)
86-
87-
def to_dict(self) -> JSON:
88-
return [c.to_dict() for c in self]
89-
9072
def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
91-
return type(self).from_list([c.evolve_from_array_spec(array_spec=array_spec) for c in self])
73+
return type(self).from_codecs(c.evolve_from_array_spec(array_spec=array_spec) for c in self)
9274

9375
@classmethod
94-
def from_list(cls, codecs: Iterable[Codec], *, batch_size: int | None = None) -> Self:
76+
def from_codecs(cls, codecs: Iterable[Codec], *, batch_size: int | None = None) -> Self:
9577
array_array_codecs, array_bytes_codec, bytes_bytes_codecs = codecs_from_list(codecs)
9678

9779
return cls(

src/zarr/codecs/sharding.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ class ShardingCodecIndexLocation(Enum):
6868
end = "end"
6969

7070

71-
def parse_index_location(data: JSON) -> ShardingCodecIndexLocation:
71+
def parse_index_location(data: object) -> ShardingCodecIndexLocation:
7272
return parse_enum(data, ShardingCodecIndexLocation)
7373

7474

@@ -333,7 +333,7 @@ def __init__(
333333
chunk_shape: ChunkCoordsLike,
334334
codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(),),
335335
index_codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(), Crc32cCodec()),
336-
index_location: ShardingCodecIndexLocation = ShardingCodecIndexLocation.end,
336+
index_location: ShardingCodecIndexLocation | str = ShardingCodecIndexLocation.end,
337337
) -> None:
338338
chunk_shape_parsed = parse_shapelike(chunk_shape)
339339
codecs_parsed = parse_codecs(codecs)
@@ -373,16 +373,16 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
373373

374374
@property
375375
def codec_pipeline(self) -> CodecPipeline:
376-
return get_pipeline_class().from_list(self.codecs)
376+
return get_pipeline_class().from_codecs(self.codecs)
377377

378378
def to_dict(self) -> dict[str, JSON]:
379379
return {
380380
"name": "sharding_indexed",
381381
"configuration": {
382-
"chunk_shape": list(self.chunk_shape),
383-
"codecs": [s.to_dict() for s in self.codecs],
384-
"index_codecs": [s.to_dict() for s in self.index_codecs],
385-
"index_location": self.index_location,
382+
"chunk_shape": self.chunk_shape,
383+
"codecs": tuple([s.to_dict() for s in self.codecs]),
384+
"index_codecs": tuple([s.to_dict() for s in self.index_codecs]),
385+
"index_location": self.index_location.value,
386386
},
387387
}
388388

@@ -620,7 +620,7 @@ async def _decode_shard_index(
620620
index_array = next(
621621
iter(
622622
await get_pipeline_class()
623-
.from_list(self.index_codecs)
623+
.from_codecs(self.index_codecs)
624624
.decode(
625625
[(index_bytes, self._get_index_chunk_spec(chunks_per_shard))],
626626
)
@@ -633,7 +633,7 @@ async def _encode_shard_index(self, index: _ShardIndex) -> Buffer:
633633
index_bytes = next(
634634
iter(
635635
await get_pipeline_class()
636-
.from_list(self.index_codecs)
636+
.from_codecs(self.index_codecs)
637637
.encode(
638638
[
639639
(
@@ -651,7 +651,7 @@ async def _encode_shard_index(self, index: _ShardIndex) -> Buffer:
651651
def _shard_index_size(self, chunks_per_shard: ChunkCoords) -> int:
652652
return (
653653
get_pipeline_class()
654-
.from_list(self.index_codecs)
654+
.from_codecs(self.index_codecs)
655655
.compute_encoded_size(
656656
16 * product(chunks_per_shard), self._get_index_chunk_spec(chunks_per_shard)
657657
)

src/zarr/codecs/transpose.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
4545
return cls(**configuration_parsed) # type: ignore[arg-type]
4646

4747
def to_dict(self) -> dict[str, JSON]:
48-
return {"name": "transpose", "configuration": {"order": list(self.order)}}
48+
return {"name": "transpose", "configuration": {"order": tuple(self.order)}}
4949

5050
def validate(self, shape: tuple[int, ...], dtype: np.dtype[Any], chunk_grid: ChunkGrid) -> None:
5151
if len(self.order) != len(shape):

src/zarr/core/array.py

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,10 @@ def parse_array_metadata(data: Any) -> ArrayV2Metadata | ArrayV3Metadata:
8989

9090
def create_codec_pipeline(metadata: ArrayV2Metadata | ArrayV3Metadata) -> CodecPipeline:
9191
if isinstance(metadata, ArrayV3Metadata):
92-
return get_pipeline_class().from_list(metadata.codecs)
92+
return get_pipeline_class().from_codecs(metadata.codecs)
9393
elif isinstance(metadata, ArrayV2Metadata):
94-
return get_pipeline_class().from_list(
95-
[V2Filters(metadata.filters or []), V2Compressor(metadata.compressor)]
94+
return get_pipeline_class().from_codecs(
95+
[V2Filters(metadata.filters), V2Compressor(metadata.compressor)]
9696
)
9797
else:
9898
raise TypeError
@@ -301,8 +301,6 @@ async def _create_v2(
301301
attributes: dict[str, JSON] | None = None,
302302
exists_ok: bool = False,
303303
) -> AsyncArray:
304-
import numcodecs
305-
306304
if not exists_ok:
307305
await ensure_no_existing_node(store_path, zarr_format=2)
308306
if order is None:
@@ -317,15 +315,9 @@ async def _create_v2(
317315
chunks=chunks,
318316
order=order,
319317
dimension_separator=dimension_separator,
320-
fill_value=0 if fill_value is None else fill_value,
321-
compressor=(
322-
numcodecs.get_codec(compressor).get_config() if compressor is not None else None
323-
),
324-
filters=(
325-
[numcodecs.get_codec(filter).get_config() for filter in filters]
326-
if filters is not None
327-
else None
328-
),
318+
fill_value=fill_value,
319+
compressor=compressor,
320+
filters=filters,
329321
attributes=attributes,
330322
)
331323
array = cls(metadata=metadata, store_path=store_path)

0 commit comments

Comments
 (0)