Skip to content

Commit 9ecbbd1

Browse files
committed
Merge remote-tracking branch 'upstream/main' into tom/fix/info
2 parents 0aef240 + 4c3081c commit 9ecbbd1

File tree

18 files changed

+178
-110
lines changed

18 files changed

+178
-110
lines changed

.github/workflows/gpu_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
strategy:
2626
matrix:
2727
python-version: ['3.11']
28-
numpy-version: ['2.0']
28+
numpy-version: ['2.1']
2929
dependency-set: ["minimal"]
3030

3131
steps:

.github/workflows/hypothesis.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
strategy:
2727
matrix:
2828
python-version: ['3.11']
29-
numpy-version: ['1.26']
29+
numpy-version: ['2.1']
3030
dependency-set: ["optional"]
3131

3232
steps:

.github/workflows/test.yml

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,33 @@ concurrency:
1616

1717
jobs:
1818
test:
19-
name: py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }}
19+
name: os=${{ matrix.os }}, py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }}
2020

21-
runs-on: ubuntu-latest
2221
strategy:
2322
matrix:
2423
python-version: ['3.11', '3.12', '3.13']
25-
numpy-version: ['1.25', '1.26', '2.0']
24+
numpy-version: ['1.25', '2.1']
2625
dependency-set: ["minimal", "optional"]
26+
os: ["ubuntu-latest"]
27+
include:
28+
- python-version: '3.11'
29+
numpy-version: '1.25'
30+
dependency-set: 'optional'
31+
os: 'macos-latest'
32+
- python-version: '3.13'
33+
numpy-version: '2.1'
34+
dependency-set: 'optional'
35+
os: 'macos-latest'
36+
# https://github.com/zarr-developers/zarr-python/issues/2438
37+
# - python-version: '3.11'
38+
# numpy-version: '1.25'
39+
# dependency-set: 'optional'
40+
# os: 'windows-latest'
41+
# - python-version: '3.13'
42+
# numpy-version: '2.1'
43+
# dependency-set: 'optional'
44+
# os: 'windows-latest'
45+
runs-on: ${{ matrix.os }}
2746

2847
steps:
2948
- uses: actions/checkout@v4

.pre-commit-config.yaml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ default_language_version:
77
python: python3
88
repos:
99
- repo: https://github.com/astral-sh/ruff-pre-commit
10-
rev: v0.7.0
10+
rev: v0.7.1
1111
hooks:
1212
- id: ruff
1313
args: ["--fix", "--show-fixes"]
@@ -22,7 +22,7 @@ repos:
2222
hooks:
2323
- id: check-yaml
2424
- repo: https://github.com/pre-commit/mirrors-mypy
25-
rev: v1.12.1
25+
rev: v1.13.0
2626
hooks:
2727
- id: mypy
2828
files: src|tests
@@ -37,8 +37,6 @@ repos:
3737
- universal-pathlib
3838
# Tests
3939
- pytest
40-
# Zarr v2
41-
- types-redis
4240
- repo: https://github.com/scientific-python/cookie
4341
rev: 2024.08.19
4442
hooks:

pyproject.toml

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ test = [
6161
"pytest",
6262
"pytest-cov",
6363
"msgpack",
64-
"lmdb",
6564
"s3fs",
6665
"pytest-asyncio",
6766
"moto[s3]",
@@ -84,21 +83,19 @@ gpu = [
8483
docs = [
8584
'sphinx==8.1.3',
8685
'sphinx-autobuild>=2021.3.14',
87-
'sphinx-autoapi==3.3.2',
86+
'sphinx-autoapi==3.3.3',
8887
'sphinx_design',
8988
'sphinx-issues',
9089
'sphinx-copybutton',
9190
'pydata-sphinx-theme',
9291
'numpydoc',
9392
'numcodecs[msgpack]',
9493
'msgpack',
95-
'lmdb',
9694
]
9795
extra = [
9896
'msgpack',
9997
]
10098
optional = [
101-
'lmdb',
10299
'universal-pathlib>=0.0.22',
103100
]
104101

@@ -135,17 +132,17 @@ features = ["test", "extra"]
135132

136133
[[tool.hatch.envs.test.matrix]]
137134
python = ["3.11", "3.12", "3.13"]
138-
numpy = ["1.25", "1.26", "2.0"]
135+
numpy = ["1.25", "2.1"]
139136
version = ["minimal"]
140137

141138
[[tool.hatch.envs.test.matrix]]
142139
python = ["3.11", "3.12", "3.13"]
143-
numpy = ["1.25", "1.26", "2.0"]
140+
numpy = ["1.25", "2.1"]
144141
features = ["optional"]
145142

146143
[[tool.hatch.envs.test.matrix]]
147144
python = ["3.11", "3.12", "3.13"]
148-
numpy = ["1.25", "1.26", "2.0"]
145+
numpy = ["1.25", "2.1"]
149146
features = ["gpu"]
150147

151148
[tool.hatch.envs.test.scripts]
@@ -166,7 +163,7 @@ features = ["test", "extra", "gpu"]
166163

167164
[[tool.hatch.envs.gputest.matrix]]
168165
python = ["3.11", "3.12", "3.13"]
169-
numpy = ["1.25", "1.26", "2.0"]
166+
numpy = ["1.25", "2.1"]
170167
version = ["minimal"]
171168

172169
[tool.hatch.envs.gputest.scripts]

src/zarr/api/asynchronous.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -396,12 +396,16 @@ async def save_array(
396396

397397
mode = kwargs.pop("mode", None)
398398
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
399+
if np.isscalar(arr):
400+
arr = np.array(arr)
401+
shape = arr.shape
402+
chunks = getattr(arr, "chunks", None) # for array-likes with chunks attribute
399403
new = await AsyncArray.create(
400404
store_path,
401405
zarr_format=zarr_format,
402-
shape=arr.shape,
406+
shape=shape,
403407
dtype=arr.dtype,
404-
chunks=arr.shape,
408+
chunks=chunks,
405409
**kwargs,
406410
)
407411
await new.setitem(slice(None), arr)

src/zarr/codecs/_v2.py

Lines changed: 48 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,21 @@
55
from typing import TYPE_CHECKING
66

77
import numcodecs
8-
from numcodecs.compat import ensure_bytes, ensure_ndarray
8+
from numcodecs.compat import ensure_ndarray_like
99

10-
from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec
11-
from zarr.core.buffer import Buffer, NDBuffer, default_buffer_prototype
10+
from zarr.abc.codec import ArrayBytesCodec
1211
from zarr.registry import get_ndbuffer_class
1312

1413
if TYPE_CHECKING:
1514
import numcodecs.abc
1615

1716
from zarr.core.array_spec import ArraySpec
17+
from zarr.core.buffer import Buffer, NDBuffer
1818

1919

2020
@dataclass(frozen=True)
21-
class V2Compressor(ArrayBytesCodec):
21+
class V2Codec(ArrayBytesCodec):
22+
filters: tuple[numcodecs.abc.Codec, ...] | None
2223
compressor: numcodecs.abc.Codec | None
2324

2425
is_fixed_size = False
@@ -28,81 +29,61 @@ async def _decode_single(
2829
chunk_bytes: Buffer,
2930
chunk_spec: ArraySpec,
3031
) -> NDBuffer:
31-
if self.compressor is not None:
32-
chunk_numpy_array = ensure_ndarray(
33-
await asyncio.to_thread(self.compressor.decode, chunk_bytes.as_array_like())
34-
)
32+
cdata = chunk_bytes.as_array_like()
33+
# decompress
34+
if self.compressor:
35+
chunk = await asyncio.to_thread(self.compressor.decode, cdata)
3536
else:
36-
chunk_numpy_array = ensure_ndarray(chunk_bytes.as_array_like())
37+
chunk = cdata
38+
39+
# apply filters
40+
if self.filters:
41+
for f in reversed(self.filters):
42+
chunk = await asyncio.to_thread(f.decode, chunk)
43+
44+
# view as numpy array with correct dtype
45+
chunk = ensure_ndarray_like(chunk)
46+
# special case object dtype, because incorrect handling can lead to
47+
# segfaults and other bad things happening
48+
if chunk_spec.dtype != object:
49+
chunk = chunk.view(chunk_spec.dtype)
50+
elif chunk.dtype != object:
51+
# If we end up here, someone must have hacked around with the filters.
52+
# We cannot deal with object arrays unless there is an object
53+
# codec in the filter chain, i.e., a filter that converts from object
54+
# array to something else during encoding, and converts back to object
55+
# array during decoding.
56+
raise RuntimeError("cannot read object array without object codec")
3757

38-
# ensure correct dtype
39-
if str(chunk_numpy_array.dtype) != chunk_spec.dtype and not chunk_spec.dtype.hasobject:
40-
chunk_numpy_array = chunk_numpy_array.view(chunk_spec.dtype)
58+
# ensure correct chunk shape
59+
chunk = chunk.reshape(-1, order="A")
60+
chunk = chunk.reshape(chunk_spec.shape, order=chunk_spec.order)
4161

42-
return get_ndbuffer_class().from_numpy_array(chunk_numpy_array)
62+
return get_ndbuffer_class().from_ndarray_like(chunk)
4363

4464
async def _encode_single(
45-
self,
46-
chunk_array: NDBuffer,
47-
_chunk_spec: ArraySpec,
48-
) -> Buffer | None:
49-
chunk_numpy_array = chunk_array.as_numpy_array()
50-
if self.compressor is not None:
51-
if (
52-
not chunk_numpy_array.flags.c_contiguous
53-
and not chunk_numpy_array.flags.f_contiguous
54-
):
55-
chunk_numpy_array = chunk_numpy_array.copy(order="A")
56-
encoded_chunk_bytes = ensure_bytes(
57-
await asyncio.to_thread(self.compressor.encode, chunk_numpy_array)
58-
)
59-
else:
60-
encoded_chunk_bytes = ensure_bytes(chunk_numpy_array)
61-
62-
return default_buffer_prototype().buffer.from_bytes(encoded_chunk_bytes)
63-
64-
def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
65-
raise NotImplementedError
66-
67-
68-
@dataclass(frozen=True)
69-
class V2Filters(ArrayArrayCodec):
70-
filters: tuple[numcodecs.abc.Codec, ...] | None
71-
72-
is_fixed_size = False
73-
74-
async def _decode_single(
7565
self,
7666
chunk_array: NDBuffer,
7767
chunk_spec: ArraySpec,
78-
) -> NDBuffer:
79-
chunk_ndarray = chunk_array.as_ndarray_like()
80-
# apply filters in reverse order
81-
if self.filters is not None:
82-
for filter in self.filters[::-1]:
83-
chunk_ndarray = await asyncio.to_thread(filter.decode, chunk_ndarray)
84-
85-
# ensure correct chunk shape
86-
if chunk_ndarray.shape != chunk_spec.shape:
87-
chunk_ndarray = chunk_ndarray.reshape(
88-
chunk_spec.shape,
89-
order=chunk_spec.order,
90-
)
68+
) -> Buffer | None:
69+
chunk = chunk_array.as_ndarray_like()
9170

92-
return get_ndbuffer_class().from_ndarray_like(chunk_ndarray)
71+
# apply filters
72+
if self.filters:
73+
for f in self.filters:
74+
chunk = await asyncio.to_thread(f.encode, chunk)
9375

94-
async def _encode_single(
95-
self,
96-
chunk_array: NDBuffer,
97-
chunk_spec: ArraySpec,
98-
) -> NDBuffer | None:
99-
chunk_ndarray = chunk_array.as_ndarray_like().ravel(order=chunk_spec.order)
76+
# check object encoding
77+
if ensure_ndarray_like(chunk).dtype == object:
78+
raise RuntimeError("cannot write object array without object codec")
10079

101-
if self.filters is not None:
102-
for filter in self.filters:
103-
chunk_ndarray = await asyncio.to_thread(filter.encode, chunk_ndarray)
80+
# compress
81+
if self.compressor:
82+
cdata = await asyncio.to_thread(self.compressor.encode, chunk)
83+
else:
84+
cdata = chunk
10485

105-
return get_ndbuffer_class().from_ndarray_like(chunk_ndarray)
86+
return chunk_spec.prototype.buffer.from_bytes(cdata)
10687

10788
def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
10889
raise NotImplementedError

src/zarr/core/array.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from zarr._compat import _deprecate_positional_args
1414
from zarr.abc.store import Store, set_or_delete
1515
from zarr.codecs import _get_default_array_bytes_codec
16-
from zarr.codecs._v2 import V2Compressor, V2Filters
16+
from zarr.codecs._v2 import V2Codec
1717
from zarr.core._info import ArrayInfo
1818
from zarr.core.attributes import Attributes
1919
from zarr.core.buffer import (
@@ -119,9 +119,8 @@ def create_codec_pipeline(metadata: ArrayMetadata) -> CodecPipeline:
119119
if isinstance(metadata, ArrayV3Metadata):
120120
return get_pipeline_class().from_codecs(metadata.codecs)
121121
elif isinstance(metadata, ArrayV2Metadata):
122-
return get_pipeline_class().from_codecs(
123-
[V2Filters(metadata.filters), V2Compressor(metadata.compressor)]
124-
)
122+
v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor)
123+
return get_pipeline_class().from_codecs([v2_codec])
125124
else:
126125
raise TypeError
127126

src/zarr/core/group.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,23 @@ def from_dict(
601601
store_path=store_path,
602602
)
603603

604+
async def setitem(self, key: str, value: Any) -> None:
605+
"""Fastpath for creating a new array
606+
607+
New arrays will be created with default array settings for the array type.
608+
609+
Parameters
610+
----------
611+
key : str
612+
Array name
613+
value : array-like
614+
Array data
615+
"""
616+
path = self.store_path / key
617+
await async_api.save_array(
618+
store=path, arr=value, zarr_format=self.metadata.zarr_format, exists_ok=True
619+
)
620+
604621
async def getitem(
605622
self,
606623
key: str,
@@ -1456,8 +1473,11 @@ def __len__(self) -> int:
14561473
return self.nmembers()
14571474

14581475
def __setitem__(self, key: str, value: Any) -> None:
1459-
"""__setitem__ is not supported in v3"""
1460-
raise NotImplementedError
1476+
"""Fastpath for creating a new array.
1477+
1478+
New arrays will be created using default settings for the array type.
1479+
"""
1480+
self._sync(self._async_group.setitem(key, value))
14611481

14621482
def __repr__(self) -> str:
14631483
return f"<Group {self.store_path}>"

src/zarr/core/indexing.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ def __iter__(self) -> Iterator[ChunkProjection]: ...
9494

9595

9696
def ceildiv(a: float, b: float) -> int:
97+
if a == 0:
98+
return 0
9799
return math.ceil(a / b)
98100

99101

@@ -374,7 +376,7 @@ def __init__(self, dim_sel: slice, dim_len: int, dim_chunk_len: int) -> None:
374376

375377
def __iter__(self) -> Iterator[ChunkDimProjection]:
376378
# figure out the range of chunks we need to visit
377-
dim_chunk_ix_from = self.start // self.dim_chunk_len
379+
dim_chunk_ix_from = 0 if self.start == 0 else self.start // self.dim_chunk_len
378380
dim_chunk_ix_to = ceildiv(self.stop, self.dim_chunk_len)
379381

380382
# iterate over chunks in range

0 commit comments

Comments
 (0)