Skip to content

Commit ca21289

Browse files
committed
merge
2 parents 08a2d52 + 4cb8ddd commit ca21289

File tree

14 files changed

+573
-159
lines changed

14 files changed

+573
-159
lines changed

src/zarr/api/asynchronous.py

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@
1919
ZarrFormat,
2020
_warn_order_kwarg,
2121
_warn_write_empty_chunks_kwarg,
22+
parse_dtype,
2223
)
2324
from zarr.core.config import config
2425
from zarr.core.group import AsyncGroup, ConsolidatedMetadata, GroupMetadata
2526
from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata
27+
from zarr.core.metadata.v2 import _default_filters_and_compressor
2628
from zarr.errors import NodeTypeValidationError
2729
from zarr.storage import (
2830
StoreLike,
@@ -403,7 +405,7 @@ async def save_array(
403405
arr : ndarray
404406
NumPy array with data to save.
405407
zarr_format : {2, 3, None}, optional
406-
The zarr format to use when saving.
408+
The zarr format to use when saving (default is 3 if not specified).
407409
path : str or None, optional
408410
The path within the store where the array will be saved.
409411
storage_options : dict
@@ -819,20 +821,46 @@ async def create(
819821
shape : int or tuple of ints
820822
Array shape.
821823
chunks : int or tuple of ints, optional
822-
Chunk shape. If True, will be guessed from `shape` and `dtype`. If
823-
False, will be set to `shape`, i.e., single chunk for the whole array.
824-
If an int, the chunk size in each dimension will be given by the value
825-
of `chunks`. Default is True.
824+
The shape of the array's chunks.
825+
V2 only. V3 arrays should use `chunk_shape` instead.
826+
If not specified, default values are guessed based on the shape and dtype.
826827
dtype : str or dtype, optional
827828
NumPy dtype.
829+
chunk_shape : int or tuple of ints, optional
830+
The shape of the Array's chunks (default is None).
831+
V3 only. V2 arrays should use `chunks` instead.
832+
chunk_key_encoding : ChunkKeyEncoding, optional
833+
A specification of how the chunk keys are represented in storage.
834+
V3 only. V2 arrays should use `dimension_separator` instead.
835+
Default is ``("default", "/")``.
836+
codecs : Sequence of Codecs or dicts, optional
837+
An iterable of Codec or dict serializations of Codecs. The elements of
838+
this collection specify the transformation from array values to stored bytes.
839+
V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
840+
841+
If no codecs are provided, default codecs will be used:
842+
843+
- For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
844+
- For Unicode strings, the default is ``VLenUTF8Codec``.
845+
- For bytes or objects, the default is ``VLenBytesCodec``.
846+
847+
These defaults can be changed by modifying the value of ``array.v3_default_codecs`` in :mod:`zarr.core.config`.
828848
compressor : Codec, optional
829-
Primary compressor.
830-
fill_value : object
849+
Primary compressor to compress chunk data.
850+
V2 only. V3 arrays should use ``codecs`` instead.
851+
852+
If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
853+
854+
- For numeric arrays, the default is ``ZstdCodec``.
855+
- For Unicode strings, the default is ``VLenUTF8Codec``.
856+
- For bytes or objects, the default is ``VLenBytesCodec``.
857+
858+
These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`. fill_value : object
831859
Default value to use for uninitialized portions of the array.
832860
order : {'C', 'F'}, optional
833861
Deprecated in favor of the `array.order` configuration variable.
834862
Memory layout to be used within each chunk.
835-
Default is set in Zarr's config (`array.order`).
863+
If not specified, default is taken from the Zarr config ```array.order```.
836864
store : Store or str
837865
Store or path to directory in file system or name of zip file.
838866
synchronizer : object, optional
@@ -847,6 +875,8 @@ async def create(
847875
for storage of both chunks and metadata.
848876
filters : sequence of Codecs, optional
849877
Sequence of filters to use to encode chunk data prior to compression.
878+
V2 only. If neither ``compressor`` nor ``filters`` are provided, a default
879+
compressor will be used. (see ``compressor`` for details).
850880
cache_metadata : bool, optional
851881
If True, array configuration metadata will be cached for the
852882
lifetime of the object. If False, array metadata will be reloaded
@@ -862,7 +892,8 @@ async def create(
862892
A codec to encode object arrays, only needed if dtype=object.
863893
dimension_separator : {'.', '/'}, optional
864894
Separator placed between the dimensions of a chunk.
865-
895+
V2 only. V3 arrays should use ``chunk_key_encoding`` instead.
896+
Default is ".".
866897
.. versionadded:: 2.8
867898
868899
write_empty_chunks : bool, optional
@@ -880,6 +911,7 @@ async def create(
880911
881912
zarr_format : {2, 3, None}, optional
882913
The zarr format to use when saving.
914+
Default is 3.
883915
meta_array : array-like, optional
884916
An array instance to use for determining arrays to create and return
885917
to users. Use `numpy.empty(())` by default.
@@ -899,9 +931,13 @@ async def create(
899931
or _default_zarr_version()
900932
)
901933

902-
if zarr_format == 2 and chunks is None:
903-
chunks = shape
904-
elif zarr_format == 3 and chunk_shape is None:
934+
if zarr_format == 2:
935+
if chunks is None:
936+
chunks = shape
937+
dtype = parse_dtype(dtype, zarr_format)
938+
if not filters and not compressor:
939+
filters, compressor = _default_filters_and_compressor(dtype)
940+
elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr]
905941
if chunks is not None:
906942
chunk_shape = chunks
907943
chunks = None

src/zarr/codecs/__init__.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
11
from __future__ import annotations
22

3-
from typing import TYPE_CHECKING, Any
4-
5-
if TYPE_CHECKING:
6-
import numpy as np
7-
83
from zarr.codecs.blosc import BloscCname, BloscCodec, BloscShuffle
94
from zarr.codecs.bytes import BytesCodec, Endian
105
from zarr.codecs.crc32c_ import Crc32cCodec
@@ -13,7 +8,6 @@
138
from zarr.codecs.transpose import TransposeCodec
149
from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec
1510
from zarr.codecs.zstd import ZstdCodec
16-
from zarr.core.metadata.v3 import DataType
1711

1812
__all__ = [
1913
"BloscCname",
@@ -30,15 +24,3 @@
3024
"VLenUTF8Codec",
3125
"ZstdCodec",
3226
]
33-
34-
35-
def _get_default_array_bytes_codec(
36-
np_dtype: np.dtype[Any],
37-
) -> BytesCodec | VLenUTF8Codec | VLenBytesCodec:
38-
dtype = DataType.from_numpy(np_dtype)
39-
if dtype == DataType.string:
40-
return VLenUTF8Codec()
41-
elif dtype == DataType.bytes:
42-
return VLenBytesCodec()
43-
else:
44-
return BytesCodec()

src/zarr/codecs/_v2.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing import TYPE_CHECKING
66

77
import numcodecs
8+
import numpy as np
89
from numcodecs.compat import ensure_bytes, ensure_ndarray_like
910

1011
from zarr.abc.codec import ArrayBytesCodec
@@ -46,7 +47,17 @@ async def _decode_single(
4647
# special case object dtype, because incorrect handling can lead to
4748
# segfaults and other bad things happening
4849
if chunk_spec.dtype != object:
49-
chunk = chunk.view(chunk_spec.dtype)
50+
try:
51+
chunk = chunk.view(chunk_spec.dtype)
52+
except TypeError:
53+
# this will happen if the dtype of the chunk
54+
# does not match the dtype of the array spec i.g. if
55+
# the dtype of the chunk_spec is a string dtype, but the chunk
56+
# is an object array. In this case, we need to convert the object
57+
# array to the correct dtype.
58+
59+
chunk = np.array(chunk).astype(chunk_spec.dtype)
60+
5061
elif chunk.dtype != object:
5162
# If we end up here, someone must have hacked around with the filters.
5263
# We cannot deal with object arrays unless there is an object

0 commit comments

Comments
 (0)