Skip to content

Commit 2f7c5e5

Browse files
committed
Merge remote-tracking branch 'origin/document-group' into document-group
2 parents 1bbfc17 + a027f0d commit 2f7c5e5

File tree

21 files changed

+525
-133
lines changed

21 files changed

+525
-133
lines changed

.github/workflows/gpu_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
strategy:
2626
matrix:
2727
python-version: ['3.11']
28-
numpy-version: ['2.0']
28+
numpy-version: ['2.1']
2929
dependency-set: ["minimal"]
3030

3131
steps:

.github/workflows/hypothesis.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
strategy:
2727
matrix:
2828
python-version: ['3.11']
29-
numpy-version: ['1.26']
29+
numpy-version: ['2.1']
3030
dependency-set: ["optional"]
3131

3232
steps:

.github/workflows/test.yml

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,33 @@ concurrency:
1616

1717
jobs:
1818
test:
19-
name: py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }}
19+
name: os=${{ matrix.os }}, py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }}
2020

21-
runs-on: ubuntu-latest
2221
strategy:
2322
matrix:
2423
python-version: ['3.11', '3.12', '3.13']
25-
numpy-version: ['1.25', '1.26', '2.0']
24+
numpy-version: ['1.25', '2.1']
2625
dependency-set: ["minimal", "optional"]
26+
os: ["ubuntu-latest"]
27+
include:
28+
- python-version: '3.11'
29+
numpy-version: '1.25'
30+
dependency-set: 'optional'
31+
os: 'macos-latest'
32+
- python-version: '3.13'
33+
numpy-version: '2.1'
34+
dependency-set: 'optional'
35+
os: 'macos-latest'
36+
# https://github.com/zarr-developers/zarr-python/issues/2438
37+
# - python-version: '3.11'
38+
# numpy-version: '1.25'
39+
# dependency-set: 'optional'
40+
# os: 'windows-latest'
41+
# - python-version: '3.13'
42+
# numpy-version: '2.1'
43+
# dependency-set: 'optional'
44+
# os: 'windows-latest'
45+
runs-on: ${{ matrix.os }}
2746

2847
steps:
2948
- uses: actions/checkout@v4

.pre-commit-config.yaml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ default_language_version:
77
python: python3
88
repos:
99
- repo: https://github.com/astral-sh/ruff-pre-commit
10-
rev: v0.7.0
10+
rev: v0.7.1
1111
hooks:
1212
- id: ruff
1313
args: ["--fix", "--show-fixes"]
@@ -22,7 +22,7 @@ repos:
2222
hooks:
2323
- id: check-yaml
2424
- repo: https://github.com/pre-commit/mirrors-mypy
25-
rev: v1.12.1
25+
rev: v1.13.0
2626
hooks:
2727
- id: mypy
2828
files: src|tests
@@ -37,8 +37,6 @@ repos:
3737
- universal-pathlib
3838
# Tests
3939
- pytest
40-
# Zarr v2
41-
- types-redis
4240
- repo: https://github.com/scientific-python/cookie
4341
rev: 2024.08.19
4442
hooks:

pyproject.toml

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ test = [
6161
"pytest",
6262
"pytest-cov",
6363
"msgpack",
64-
"lmdb",
6564
"s3fs",
6665
"pytest-asyncio",
6766
"moto[s3]",
@@ -84,21 +83,19 @@ gpu = [
8483
docs = [
8584
'sphinx==8.1.3',
8685
'sphinx-autobuild>=2021.3.14',
87-
'sphinx-autoapi==3.3.2',
86+
'sphinx-autoapi==3.3.3',
8887
'sphinx_design',
8988
'sphinx-issues',
9089
'sphinx-copybutton',
9190
'pydata-sphinx-theme',
9291
'numpydoc',
9392
'numcodecs[msgpack]',
9493
'msgpack',
95-
'lmdb',
9694
]
9795
extra = [
9896
'msgpack',
9997
]
10098
optional = [
101-
'lmdb',
10299
'universal-pathlib>=0.0.22',
103100
]
104101

@@ -135,17 +132,17 @@ features = ["test", "extra"]
135132

136133
[[tool.hatch.envs.test.matrix]]
137134
python = ["3.11", "3.12", "3.13"]
138-
numpy = ["1.25", "1.26", "2.0"]
135+
numpy = ["1.25", "2.1"]
139136
version = ["minimal"]
140137

141138
[[tool.hatch.envs.test.matrix]]
142139
python = ["3.11", "3.12", "3.13"]
143-
numpy = ["1.25", "1.26", "2.0"]
140+
numpy = ["1.25", "2.1"]
144141
features = ["optional"]
145142

146143
[[tool.hatch.envs.test.matrix]]
147144
python = ["3.11", "3.12", "3.13"]
148-
numpy = ["1.25", "1.26", "2.0"]
145+
numpy = ["1.25", "2.1"]
149146
features = ["gpu"]
150147

151148
[tool.hatch.envs.test.scripts]
@@ -166,7 +163,7 @@ features = ["test", "extra", "gpu"]
166163

167164
[[tool.hatch.envs.gputest.matrix]]
168165
python = ["3.11", "3.12", "3.13"]
169-
numpy = ["1.25", "1.26", "2.0"]
166+
numpy = ["1.25", "2.1"]
170167
version = ["minimal"]
171168

172169
[tool.hatch.envs.gputest.scripts]

src/zarr/api/asynchronous.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -396,12 +396,16 @@ async def save_array(
396396

397397
mode = kwargs.pop("mode", None)
398398
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
399+
if np.isscalar(arr):
400+
arr = np.array(arr)
401+
shape = arr.shape
402+
chunks = getattr(arr, "chunks", None) # for array-likes with chunks attribute
399403
new = await AsyncArray.create(
400404
store_path,
401405
zarr_format=zarr_format,
402-
shape=arr.shape,
406+
shape=shape,
403407
dtype=arr.dtype,
404-
chunks=arr.shape,
408+
chunks=chunks,
405409
**kwargs,
406410
)
407411
await new.setitem(slice(None), arr)

src/zarr/codecs/_v2.py

Lines changed: 48 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,21 @@
55
from typing import TYPE_CHECKING
66

77
import numcodecs
8-
from numcodecs.compat import ensure_bytes, ensure_ndarray
8+
from numcodecs.compat import ensure_ndarray_like
99

10-
from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec
11-
from zarr.core.buffer import Buffer, NDBuffer, default_buffer_prototype
10+
from zarr.abc.codec import ArrayBytesCodec
1211
from zarr.registry import get_ndbuffer_class
1312

1413
if TYPE_CHECKING:
1514
import numcodecs.abc
1615

1716
from zarr.core.array_spec import ArraySpec
17+
from zarr.core.buffer import Buffer, NDBuffer
1818

1919

2020
@dataclass(frozen=True)
21-
class V2Compressor(ArrayBytesCodec):
21+
class V2Codec(ArrayBytesCodec):
22+
filters: tuple[numcodecs.abc.Codec, ...] | None
2223
compressor: numcodecs.abc.Codec | None
2324

2425
is_fixed_size = False
@@ -28,81 +29,61 @@ async def _decode_single(
2829
chunk_bytes: Buffer,
2930
chunk_spec: ArraySpec,
3031
) -> NDBuffer:
31-
if self.compressor is not None:
32-
chunk_numpy_array = ensure_ndarray(
33-
await asyncio.to_thread(self.compressor.decode, chunk_bytes.as_array_like())
34-
)
32+
cdata = chunk_bytes.as_array_like()
33+
# decompress
34+
if self.compressor:
35+
chunk = await asyncio.to_thread(self.compressor.decode, cdata)
3536
else:
36-
chunk_numpy_array = ensure_ndarray(chunk_bytes.as_array_like())
37+
chunk = cdata
38+
39+
# apply filters
40+
if self.filters:
41+
for f in reversed(self.filters):
42+
chunk = await asyncio.to_thread(f.decode, chunk)
43+
44+
# view as numpy array with correct dtype
45+
chunk = ensure_ndarray_like(chunk)
46+
# special case object dtype, because incorrect handling can lead to
47+
# segfaults and other bad things happening
48+
if chunk_spec.dtype != object:
49+
chunk = chunk.view(chunk_spec.dtype)
50+
elif chunk.dtype != object:
51+
# If we end up here, someone must have hacked around with the filters.
52+
# We cannot deal with object arrays unless there is an object
53+
# codec in the filter chain, i.e., a filter that converts from object
54+
# array to something else during encoding, and converts back to object
55+
# array during decoding.
56+
raise RuntimeError("cannot read object array without object codec")
3757

38-
# ensure correct dtype
39-
if str(chunk_numpy_array.dtype) != chunk_spec.dtype and not chunk_spec.dtype.hasobject:
40-
chunk_numpy_array = chunk_numpy_array.view(chunk_spec.dtype)
58+
# ensure correct chunk shape
59+
chunk = chunk.reshape(-1, order="A")
60+
chunk = chunk.reshape(chunk_spec.shape, order=chunk_spec.order)
4161

42-
return get_ndbuffer_class().from_numpy_array(chunk_numpy_array)
62+
return get_ndbuffer_class().from_ndarray_like(chunk)
4363

4464
async def _encode_single(
45-
self,
46-
chunk_array: NDBuffer,
47-
_chunk_spec: ArraySpec,
48-
) -> Buffer | None:
49-
chunk_numpy_array = chunk_array.as_numpy_array()
50-
if self.compressor is not None:
51-
if (
52-
not chunk_numpy_array.flags.c_contiguous
53-
and not chunk_numpy_array.flags.f_contiguous
54-
):
55-
chunk_numpy_array = chunk_numpy_array.copy(order="A")
56-
encoded_chunk_bytes = ensure_bytes(
57-
await asyncio.to_thread(self.compressor.encode, chunk_numpy_array)
58-
)
59-
else:
60-
encoded_chunk_bytes = ensure_bytes(chunk_numpy_array)
61-
62-
return default_buffer_prototype().buffer.from_bytes(encoded_chunk_bytes)
63-
64-
def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
65-
raise NotImplementedError
66-
67-
68-
@dataclass(frozen=True)
69-
class V2Filters(ArrayArrayCodec):
70-
filters: tuple[numcodecs.abc.Codec, ...] | None
71-
72-
is_fixed_size = False
73-
74-
async def _decode_single(
7565
self,
7666
chunk_array: NDBuffer,
7767
chunk_spec: ArraySpec,
78-
) -> NDBuffer:
79-
chunk_ndarray = chunk_array.as_ndarray_like()
80-
# apply filters in reverse order
81-
if self.filters is not None:
82-
for filter in self.filters[::-1]:
83-
chunk_ndarray = await asyncio.to_thread(filter.decode, chunk_ndarray)
84-
85-
# ensure correct chunk shape
86-
if chunk_ndarray.shape != chunk_spec.shape:
87-
chunk_ndarray = chunk_ndarray.reshape(
88-
chunk_spec.shape,
89-
order=chunk_spec.order,
90-
)
68+
) -> Buffer | None:
69+
chunk = chunk_array.as_ndarray_like()
9170

92-
return get_ndbuffer_class().from_ndarray_like(chunk_ndarray)
71+
# apply filters
72+
if self.filters:
73+
for f in self.filters:
74+
chunk = await asyncio.to_thread(f.encode, chunk)
9375

94-
async def _encode_single(
95-
self,
96-
chunk_array: NDBuffer,
97-
chunk_spec: ArraySpec,
98-
) -> NDBuffer | None:
99-
chunk_ndarray = chunk_array.as_ndarray_like().ravel(order=chunk_spec.order)
76+
# check object encoding
77+
if ensure_ndarray_like(chunk).dtype == object:
78+
raise RuntimeError("cannot write object array without object codec")
10079

101-
if self.filters is not None:
102-
for filter in self.filters:
103-
chunk_ndarray = await asyncio.to_thread(filter.encode, chunk_ndarray)
80+
# compress
81+
if self.compressor:
82+
cdata = await asyncio.to_thread(self.compressor.encode, chunk)
83+
else:
84+
cdata = chunk
10485

105-
return get_ndbuffer_class().from_ndarray_like(chunk_ndarray)
86+
return chunk_spec.prototype.buffer.from_bytes(cdata)
10687

10788
def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
10889
raise NotImplementedError

0 commit comments

Comments
 (0)