Skip to content

Commit 61b4477

Browse files
committed
Merge branch 'main' of https://github.com/zarr-developers/zarr-python into doc/3.0-updates
2 parents 7cfb8f8 + 6930fe8 commit 61b4477

File tree

17 files changed

+572
-204
lines changed

17 files changed

+572
-204
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ repos:
2828
files: src|tests
2929
additional_dependencies:
3030
# Package dependencies
31+
- packaging
3132
- donfig
3233
- numcodecs[crc32c]
33-
- numpy
34+
- numpy==2.1 # until https://github.com/numpy/numpy/issues/28034 is resolved
3435
- typing_extensions
3536
- universal-pathlib
3637
# Tests

README-v3.md

Lines changed: 0 additions & 49 deletions
This file was deleted.

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ maintainers = [
2626
requires-python = ">=3.11"
2727
# If you add a new dependency here, please also add it to .pre-commit-config.yml
2828
dependencies = [
29+
'packaging>=22.0',
2930
'numpy>=1.25',
3031
'numcodecs[crc32c]>=0.14',
3132
'typing_extensions>=4.9',
@@ -177,6 +178,7 @@ serve = "sphinx-autobuild docs docs/_build --host 0.0.0.0"
177178
[tool.hatch.envs.upstream]
178179
python = "3.13"
179180
dependencies = [
181+
'packaging @ git+https://github.com/pypa/packaging',
180182
'numpy', # from scientific-python-nightly-wheels
181183
'numcodecs @ git+https://github.com/zarr-developers/numcodecs',
182184
'fsspec @ git+https://github.com/fsspec/filesystem_spec',
@@ -210,6 +212,7 @@ See Spec 0000 for details and drop schedule: https://scientific-python.org/specs
210212
"""
211213
python = "3.11"
212214
dependencies = [
215+
'packaging==22.*',
213216
'numpy==1.25.*',
214217
'numcodecs==0.14.*', # 0.14 needed for zarr3 codecs
215218
'fsspec==2022.10.0',

src/zarr/api/asynchronous.py

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717
ChunkCoords,
1818
MemoryOrder,
1919
ZarrFormat,
20+
parse_dtype,
2021
)
2122
from zarr.core.config import config
2223
from zarr.core.group import AsyncGroup, ConsolidatedMetadata, GroupMetadata
2324
from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata
25+
from zarr.core.metadata.v2 import _default_filters_and_compressor
2426
from zarr.errors import NodeTypeValidationError
2527
from zarr.storage import (
2628
StoreLike,
@@ -401,7 +403,7 @@ async def save_array(
401403
arr : ndarray
402404
NumPy array with data to save.
403405
zarr_format : {2, 3, None}, optional
404-
The zarr format to use when saving.
406+
The zarr format to use when saving (default is 3 if not specified).
405407
path : str or None, optional
406408
The path within the store where the array will be saved.
407409
storage_options : dict
@@ -817,19 +819,45 @@ async def create(
817819
shape : int or tuple of ints
818820
Array shape.
819821
chunks : int or tuple of ints, optional
820-
Chunk shape. If True, will be guessed from `shape` and `dtype`. If
821-
False, will be set to `shape`, i.e., single chunk for the whole array.
822-
If an int, the chunk size in each dimension will be given by the value
823-
of `chunks`. Default is True.
822+
The shape of the array's chunks.
823+
V2 only. V3 arrays should use `chunk_shape` instead.
824+
If not specified, default values are guessed based on the shape and dtype.
824825
dtype : str or dtype, optional
825826
NumPy dtype.
827+
chunk_shape : int or tuple of ints, optional
828+
The shape of the Array's chunks (default is None).
829+
V3 only. V2 arrays should use `chunks` instead.
830+
chunk_key_encoding : ChunkKeyEncoding, optional
831+
A specification of how the chunk keys are represented in storage.
832+
V3 only. V2 arrays should use `dimension_separator` instead.
833+
Default is ``("default", "/")``.
834+
codecs : Sequence of Codecs or dicts, optional
835+
An iterable of Codec or dict serializations of Codecs. The elements of
836+
this collection specify the transformation from array values to stored bytes.
837+
V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
838+
839+
If no codecs are provided, default codecs will be used:
840+
841+
- For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
842+
- For Unicode strings, the default is ``VLenUTF8Codec``.
843+
- For bytes or objects, the default is ``VLenBytesCodec``.
844+
845+
These defaults can be changed by modifying the value of ``array.v3_default_codecs`` in :mod:`zarr.core.config`.
826846
compressor : Codec, optional
827-
Primary compressor.
828-
fill_value : object
847+
Primary compressor to compress chunk data.
848+
V2 only. V3 arrays should use ``codecs`` instead.
849+
850+
If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
851+
852+
- For numeric arrays, the default is ``ZstdCodec``.
853+
- For Unicode strings, the default is ``VLenUTF8Codec``.
854+
- For bytes or objects, the default is ``VLenBytesCodec``.
855+
856+
These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`. fill_value : object
829857
Default value to use for uninitialized portions of the array.
830858
order : {'C', 'F'}, optional
831859
Memory layout to be used within each chunk.
832-
Default is set in Zarr's config (`array.order`).
860+
If not specified, default is taken from the Zarr config ```array.order```.
833861
store : Store or str
834862
Store or path to directory in file system or name of zip file.
835863
synchronizer : object, optional
@@ -844,6 +872,8 @@ async def create(
844872
for storage of both chunks and metadata.
845873
filters : sequence of Codecs, optional
846874
Sequence of filters to use to encode chunk data prior to compression.
875+
V2 only. If neither ``compressor`` nor ``filters`` are provided, a default
876+
compressor will be used. (see ``compressor`` for details).
847877
cache_metadata : bool, optional
848878
If True, array configuration metadata will be cached for the
849879
lifetime of the object. If False, array metadata will be reloaded
@@ -859,7 +889,8 @@ async def create(
859889
A codec to encode object arrays, only needed if dtype=object.
860890
dimension_separator : {'.', '/'}, optional
861891
Separator placed between the dimensions of a chunk.
862-
892+
V2 only. V3 arrays should use ``chunk_key_encoding`` instead.
893+
Default is ".".
863894
.. versionadded:: 2.8
864895
865896
write_empty_chunks : bool, optional
@@ -875,6 +906,7 @@ async def create(
875906
876907
zarr_format : {2, 3, None}, optional
877908
The zarr format to use when saving.
909+
Default is 3.
878910
meta_array : array-like, optional
879911
An array instance to use for determining arrays to create and return
880912
to users. Use `numpy.empty(())` by default.
@@ -894,9 +926,13 @@ async def create(
894926
or _default_zarr_version()
895927
)
896928

897-
if zarr_format == 2 and chunks is None:
898-
chunks = shape
899-
elif zarr_format == 3 and chunk_shape is None:
929+
if zarr_format == 2:
930+
if chunks is None:
931+
chunks = shape
932+
dtype = parse_dtype(dtype, zarr_format)
933+
if not filters and not compressor:
934+
filters, compressor = _default_filters_and_compressor(dtype)
935+
elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr]
900936
if chunks is not None:
901937
chunk_shape = chunks
902938
chunks = None

src/zarr/codecs/__init__.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
11
from __future__ import annotations
22

3-
from typing import TYPE_CHECKING, Any
4-
5-
if TYPE_CHECKING:
6-
import numpy as np
7-
83
from zarr.codecs.blosc import BloscCname, BloscCodec, BloscShuffle
94
from zarr.codecs.bytes import BytesCodec, Endian
105
from zarr.codecs.crc32c_ import Crc32cCodec
@@ -13,7 +8,6 @@
138
from zarr.codecs.transpose import TransposeCodec
149
from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec
1510
from zarr.codecs.zstd import ZstdCodec
16-
from zarr.core.metadata.v3 import DataType
1711

1812
__all__ = [
1913
"BloscCname",
@@ -30,15 +24,3 @@
3024
"VLenUTF8Codec",
3125
"ZstdCodec",
3226
]
33-
34-
35-
def _get_default_array_bytes_codec(
36-
np_dtype: np.dtype[Any],
37-
) -> BytesCodec | VLenUTF8Codec | VLenBytesCodec:
38-
dtype = DataType.from_numpy(np_dtype)
39-
if dtype == DataType.string:
40-
return VLenUTF8Codec()
41-
elif dtype == DataType.bytes:
42-
return VLenBytesCodec()
43-
else:
44-
return BytesCodec()

src/zarr/codecs/_v2.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing import TYPE_CHECKING
66

77
import numcodecs
8+
import numpy as np
89
from numcodecs.compat import ensure_bytes, ensure_ndarray_like
910

1011
from zarr.abc.codec import ArrayBytesCodec
@@ -46,7 +47,17 @@ async def _decode_single(
4647
# special case object dtype, because incorrect handling can lead to
4748
# segfaults and other bad things happening
4849
if chunk_spec.dtype != object:
49-
chunk = chunk.view(chunk_spec.dtype)
50+
try:
51+
chunk = chunk.view(chunk_spec.dtype)
52+
except TypeError:
53+
# this will happen if the dtype of the chunk
54+
# does not match the dtype of the array spec i.g. if
55+
# the dtype of the chunk_spec is a string dtype, but the chunk
56+
# is an object array. In this case, we need to convert the object
57+
# array to the correct dtype.
58+
59+
chunk = np.array(chunk).astype(chunk_spec.dtype)
60+
5061
elif chunk.dtype != object:
5162
# If we end up here, someone must have hacked around with the filters.
5263
# We cannot deal with object arrays unless there is an object

0 commit comments

Comments
 (0)