Skip to content

Commit a829fbb

Browse files
authored
Merge branch 'main' into doc/3.0-updates
2 parents 44e768a + 6dc6d07 commit a829fbb

24 files changed

+967
-309
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ repos:
2828
files: src|tests
2929
additional_dependencies:
3030
# Package dependencies
31+
- packaging
3132
- donfig
3233
- numcodecs[crc32c]
33-
- numpy
34+
- numpy==2.1 # until https://github.com/numpy/numpy/issues/28034 is resolved
3435
- typing_extensions
3536
- universal-pathlib
3637
# Tests

README-v3.md

Lines changed: 0 additions & 49 deletions
This file was deleted.

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ maintainers = [
2626
requires-python = ">=3.11"
2727
# If you add a new dependency here, please also add it to .pre-commit-config.yml
2828
dependencies = [
29+
'packaging>=22.0',
2930
'numpy>=1.25',
3031
'numcodecs[crc32c]>=0.14',
3132
'typing_extensions>=4.9',
@@ -177,6 +178,7 @@ serve = "sphinx-autobuild docs docs/_build --host 0.0.0.0"
177178
[tool.hatch.envs.upstream]
178179
python = "3.13"
179180
dependencies = [
181+
'packaging @ git+https://github.com/pypa/packaging',
180182
'numpy', # from scientific-python-nightly-wheels
181183
'numcodecs @ git+https://github.com/zarr-developers/numcodecs',
182184
'fsspec @ git+https://github.com/fsspec/filesystem_spec',
@@ -210,6 +212,7 @@ See Spec 0000 for details and drop schedule: https://scientific-python.org/specs
210212
"""
211213
python = "3.11"
212214
dependencies = [
215+
'packaging==22.*',
213216
'numpy==1.25.*',
214217
'numcodecs==0.14.*', # 0.14 needed for zarr3 codecs
215218
'fsspec==2022.10.0',

src/zarr/api/asynchronous.py

Lines changed: 102 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,22 @@
1010
from typing_extensions import deprecated
1111

1212
from zarr.core.array import Array, AsyncArray, get_array_metadata
13+
from zarr.core.array_spec import ArrayConfig, ArrayConfigParams
1314
from zarr.core.buffer import NDArrayLike
1415
from zarr.core.common import (
1516
JSON,
1617
AccessModeLiteral,
1718
ChunkCoords,
1819
MemoryOrder,
1920
ZarrFormat,
21+
_warn_order_kwarg,
22+
_warn_write_empty_chunks_kwarg,
23+
parse_dtype,
2024
)
2125
from zarr.core.config import config
2226
from zarr.core.group import AsyncGroup, ConsolidatedMetadata, GroupMetadata
2327
from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata
28+
from zarr.core.metadata.v2 import _default_filters_and_compressor
2429
from zarr.errors import NodeTypeValidationError
2530
from zarr.storage import (
2631
StoreLike,
@@ -401,7 +406,7 @@ async def save_array(
401406
arr : ndarray
402407
NumPy array with data to save.
403408
zarr_format : {2, 3, None}, optional
404-
The zarr format to use when saving.
409+
The zarr format to use when saving (default is 3 if not specified).
405410
path : str or None, optional
406411
The path within the store where the array will be saved.
407412
storage_options : dict
@@ -792,7 +797,7 @@ async def create(
792797
read_only: bool | None = None,
793798
object_codec: Codec | None = None, # TODO: type has changed
794799
dimension_separator: Literal[".", "/"] | None = None,
795-
write_empty_chunks: bool = False, # TODO: default has changed
800+
write_empty_chunks: bool | None = None,
796801
zarr_version: ZarrFormat | None = None, # deprecated
797802
zarr_format: ZarrFormat | None = None,
798803
meta_array: Any | None = None, # TODO: need type
@@ -808,6 +813,7 @@ async def create(
808813
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
809814
dimension_names: Iterable[str] | None = None,
810815
storage_options: dict[str, Any] | None = None,
816+
config: ArrayConfig | ArrayConfigParams | None = None,
811817
**kwargs: Any,
812818
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
813819
"""Create an array.
@@ -817,19 +823,47 @@ async def create(
817823
shape : int or tuple of ints
818824
Array shape.
819825
chunks : int or tuple of ints, optional
820-
Chunk shape. If True, will be guessed from `shape` and `dtype`. If
821-
False, will be set to `shape`, i.e., single chunk for the whole array.
822-
If an int, the chunk size in each dimension will be given by the value
823-
of `chunks`. Default is True.
826+
The shape of the array's chunks.
827+
V2 only. V3 arrays should use `chunk_shape` instead.
828+
If not specified, default values are guessed based on the shape and dtype.
824829
dtype : str or dtype, optional
825830
NumPy dtype.
831+
chunk_shape : int or tuple of ints, optional
832+
The shape of the Array's chunks (default is None).
833+
V3 only. V2 arrays should use `chunks` instead.
834+
chunk_key_encoding : ChunkKeyEncoding, optional
835+
A specification of how the chunk keys are represented in storage.
836+
V3 only. V2 arrays should use `dimension_separator` instead.
837+
Default is ``("default", "/")``.
838+
codecs : Sequence of Codecs or dicts, optional
839+
An iterable of Codec or dict serializations of Codecs. The elements of
840+
this collection specify the transformation from array values to stored bytes.
841+
V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
842+
843+
If no codecs are provided, default codecs will be used:
844+
845+
- For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
846+
- For Unicode strings, the default is ``VLenUTF8Codec``.
847+
- For bytes or objects, the default is ``VLenBytesCodec``.
848+
849+
These defaults can be changed by modifying the value of ``array.v3_default_codecs`` in :mod:`zarr.core.config`.
826850
compressor : Codec, optional
827-
Primary compressor.
828-
fill_value : object
851+
Primary compressor to compress chunk data.
852+
V2 only. V3 arrays should use ``codecs`` instead.
853+
854+
If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
855+
856+
- For numeric arrays, the default is ``ZstdCodec``.
857+
- For Unicode strings, the default is ``VLenUTF8Codec``.
858+
- For bytes or objects, the default is ``VLenBytesCodec``.
859+
860+
These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`. fill_value : object
829861
Default value to use for uninitialized portions of the array.
830862
order : {'C', 'F'}, optional
863+
Deprecated in favor of the ``config`` keyword argument.
864+
Pass ``{'order': <value>}`` to ``create`` instead of using this parameter.
831865
Memory layout to be used within each chunk.
832-
Default is set in Zarr's config (`array.order`).
866+
If not specified, the ``array.order`` parameter in the global config will be used.
833867
store : Store or str
834868
Store or path to directory in file system or name of zip file.
835869
synchronizer : object, optional
@@ -844,6 +878,8 @@ async def create(
844878
for storage of both chunks and metadata.
845879
filters : sequence of Codecs, optional
846880
Sequence of filters to use to encode chunk data prior to compression.
881+
V2 only. If neither ``compressor`` nor ``filters`` are provided, a default
882+
compressor will be used. (see ``compressor`` for details).
847883
cache_metadata : bool, optional
848884
If True, array configuration metadata will be cached for the
849885
lifetime of the object. If False, array metadata will be reloaded
@@ -859,30 +895,28 @@ async def create(
859895
A codec to encode object arrays, only needed if dtype=object.
860896
dimension_separator : {'.', '/'}, optional
861897
Separator placed between the dimensions of a chunk.
862-
863-
.. versionadded:: 2.8
864-
898+
V2 only. V3 arrays should use ``chunk_key_encoding`` instead.
899+
Default is ".".
865900
write_empty_chunks : bool, optional
866-
If True (default), all chunks will be stored regardless of their
901+
Deprecated in favor of the ``config`` keyword argument.
902+
Pass ``{'write_empty_chunks': <value>}`` to ``create`` instead of using this parameter.
903+
If True, all chunks will be stored regardless of their
867904
contents. If False, each chunk is compared to the array's fill value
868905
prior to storing. If a chunk is uniformly equal to the fill value, then
869906
that chunk is not be stored, and the store entry for that chunk's key
870-
is deleted. This setting enables sparser storage, as only chunks with
871-
non-fill-value data are stored, at the expense of overhead associated
872-
with checking the data of each chunk.
873-
874-
.. versionadded:: 2.11
875-
907+
is deleted.
876908
zarr_format : {2, 3, None}, optional
877909
The zarr format to use when saving.
910+
Default is 3.
878911
meta_array : array-like, optional
879912
An array instance to use for determining arrays to create and return
880913
to users. Use `numpy.empty(())` by default.
881-
882-
.. versionadded:: 2.13
883914
storage_options : dict
884915
If using an fsspec URL to create the store, these will be passed to
885916
the backend implementation. Ignored otherwise.
917+
config : ArrayConfig or ArrayConfigParams, optional
918+
Runtime configuration of the array. If provided, will override the
919+
default values from `zarr.config.array`.
886920
887921
Returns
888922
-------
@@ -894,9 +928,13 @@ async def create(
894928
or _default_zarr_version()
895929
)
896930

897-
if zarr_format == 2 and chunks is None:
898-
chunks = shape
899-
elif zarr_format == 3 and chunk_shape is None:
931+
if zarr_format == 2:
932+
if chunks is None:
933+
chunks = shape
934+
dtype = parse_dtype(dtype, zarr_format)
935+
if not filters and not compressor:
936+
filters, compressor = _default_filters_and_compressor(dtype)
937+
elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr]
900938
if chunks is not None:
901939
chunk_shape = chunks
902940
chunks = None
@@ -915,26 +953,47 @@ async def create(
915953
warnings.warn("object_codec is not yet implemented", RuntimeWarning, stacklevel=2)
916954
if read_only is not None:
917955
warnings.warn("read_only is not yet implemented", RuntimeWarning, stacklevel=2)
918-
if dimension_separator is not None:
919-
if zarr_format == 3:
920-
raise ValueError(
921-
"dimension_separator is not supported for zarr format 3, use chunk_key_encoding instead"
922-
)
923-
else:
924-
warnings.warn(
925-
"dimension_separator is not yet implemented",
926-
RuntimeWarning,
927-
stacklevel=2,
928-
)
929-
if write_empty_chunks:
930-
warnings.warn("write_empty_chunks is not yet implemented", RuntimeWarning, stacklevel=2)
956+
if dimension_separator is not None and zarr_format == 3:
957+
raise ValueError(
958+
"dimension_separator is not supported for zarr format 3, use chunk_key_encoding instead"
959+
)
960+
961+
if order is not None:
962+
_warn_order_kwarg()
963+
if write_empty_chunks is not None:
964+
_warn_write_empty_chunks_kwarg()
965+
931966
if meta_array is not None:
932967
warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2)
933968

934969
mode = kwargs.pop("mode", None)
935970
if mode is None:
936971
mode = "a"
937972
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
973+
974+
config_dict: ArrayConfigParams = {}
975+
976+
if write_empty_chunks is not None:
977+
if config is not None:
978+
msg = (
979+
"Both write_empty_chunks and config keyword arguments are set. "
980+
"This is redundant. When both are set, write_empty_chunks will be ignored and "
981+
"config will be used."
982+
)
983+
warnings.warn(UserWarning(msg), stacklevel=1)
984+
config_dict["write_empty_chunks"] = write_empty_chunks
985+
if order is not None:
986+
if config is not None:
987+
msg = (
988+
"Both order and config keyword arguments are set. "
989+
"This is redundant. When both are set, order will be ignored and "
990+
"config will be used."
991+
)
992+
warnings.warn(UserWarning(msg), stacklevel=1)
993+
config_dict["order"] = order
994+
995+
config_parsed = ArrayConfig.from_dict(config_dict)
996+
938997
return await AsyncArray.create(
939998
store_path,
940999
shape=shape,
@@ -951,7 +1010,7 @@ async def create(
9511010
codecs=codecs,
9521011
dimension_names=dimension_names,
9531012
attributes=attributes,
954-
order=order,
1013+
config=config_parsed,
9551014
**kwargs,
9561015
)
9571016

@@ -1127,6 +1186,11 @@ async def open_array(
11271186

11281187
zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
11291188

1189+
if "order" in kwargs:
1190+
_warn_order_kwarg()
1191+
if "write_empty_chunks" in kwargs:
1192+
_warn_write_empty_chunks_kwarg()
1193+
11301194
try:
11311195
return await AsyncArray.open(store_path, zarr_format=zarr_format)
11321196
except FileNotFoundError:

0 commit comments

Comments
 (0)