Skip to content

Commit e204a32

Browse files
committed
Merge branch 'main' of github.com:zarr-developers/zarr-python into feat/read-funcs
2 parents 5025ad6 + 6dc6d07 commit e204a32

File tree

22 files changed

+974
-314
lines changed

22 files changed

+974
-314
lines changed

README-v3.md

Lines changed: 0 additions & 49 deletions
This file was deleted.

src/zarr/api/asynchronous.py

Lines changed: 114 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from typing_extensions import deprecated
1111

1212
from zarr.core.array import Array, AsyncArray, get_array_metadata
13+
from zarr.core.array_spec import ArrayConfig, ArrayConfigParams, parse_array_config
1314
from zarr.core.buffer import NDArrayLike
1415
from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition
1516
from zarr.core.common import (
@@ -18,11 +19,14 @@
1819
ChunkCoords,
1920
MemoryOrder,
2021
ZarrFormat,
22+
_warn_order_kwarg,
23+
_warn_write_empty_chunks_kwarg,
2124
parse_dtype,
2225
)
23-
from zarr.core.config import config
26+
from zarr.core.config import config as zarr_config
2427
from zarr.core.group import AsyncGroup, ConsolidatedMetadata, GroupMetadata
2528
from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata
29+
from zarr.core.metadata.v2 import _default_filters_and_compressor
2630
from zarr.errors import NodeTypeValidationError
2731
from zarr.storage import (
2832
StoreLike,
@@ -149,7 +153,7 @@ def _handle_zarr_version_or_format(
149153

150154
def _default_zarr_version() -> ZarrFormat:
151155
"""Return the default zarr_version"""
152-
return cast(ZarrFormat, int(config.get("default_zarr_version", 3)))
156+
return cast(ZarrFormat, int(zarr_config.get("default_zarr_version", 3)))
153157

154158

155159
async def consolidate_metadata(
@@ -444,7 +448,7 @@ async def save_array(
444448
arr : ndarray
445449
NumPy array with data to save.
446450
zarr_format : {2, 3, None}, optional
447-
The zarr format to use when saving.
451+
The zarr format to use when saving (default is 3 if not specified).
448452
path : str or None, optional
449453
The path within the store where the array will be saved.
450454
storage_options : dict
@@ -747,7 +751,7 @@ async def create_group(
747751
return await AsyncGroup.from_store(
748752
store=store_path,
749753
zarr_format=zarr_format,
750-
exists_ok=overwrite,
754+
overwrite=overwrite,
751755
attributes=attributes,
752756
)
753757

@@ -948,6 +952,7 @@ async def create_array(
948952
dimension_names: Iterable[str] | None = None,
949953
storage_options: dict[str, Any] | None = None,
950954
overwrite: bool = False,
955+
config: ArrayConfig | ArrayConfigParams | None = None
951956
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
952957
"""Create an array.
953958
@@ -956,7 +961,7 @@ async def create_array(
956961
store: str or Store
957962
Store or path to directory in file system or name of zip file.
958963
path: str or None, optional
959-
The name of the array within the store. If ``path`` is ``None``, the array will be located
964+
The name of the array within the store. If ``path`` is ``None``, the array will be located
960965
at the root of the store.
961966
shape: ChunkCoords
962967
Shape of the array.
@@ -1007,6 +1012,7 @@ async def create_array(
10071012
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
10081013
sub_codecs = (*filters, BytesCodec(), *compressors)
10091014
_dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format)
1015+
config_parsed = parse_array_config(config)
10101016
if zarr_format == 2:
10111017
if shard_shape is not None or shard_shape != "auto":
10121018
msg = (
@@ -1019,18 +1025,23 @@ async def create_array(
10191025
filters = (*filters, *rest)
10201026
if dimension_names is not None:
10211027
raise ValueError("Zarr v2 arrays do not support dimension names.")
1028+
if order is None:
1029+
order_parsed = zarr_config.get('array.order')
1030+
else:
1031+
order_parsed = order
10221032
return await AsyncArray._create_v2(
10231033
store_path=store_path,
10241034
shape=shape,
10251035
dtype=_dtype_parsed,
10261036
chunks=chunk_shape,
10271037
dimension_separator="/",
10281038
fill_value=fill_value,
1029-
order=order,
1039+
order=order_parsed,
10301040
filters=filters,
10311041
compressor=compressor,
10321042
attributes=attributes,
10331043
overwrite=overwrite,
1044+
config=config_parsed,
10341045
)
10351046
else:
10361047
shard_shape_parsed, chunk_shape_parsed = _auto_partition(
@@ -1059,8 +1070,8 @@ async def create_array(
10591070
chunk_key_encoding=chunk_key_encoding,
10601071
codecs=codecs,
10611072
dimension_names=dimension_names,
1062-
order=order,
10631073
overwrite=overwrite,
1074+
config=config_parsed
10641075
)
10651076

10661077

@@ -1083,7 +1094,7 @@ async def create(
10831094
read_only: bool | None = None,
10841095
object_codec: Codec | None = None, # TODO: type has changed
10851096
dimension_separator: Literal[".", "/"] | None = None,
1086-
write_empty_chunks: bool = False, # TODO: default has changed
1097+
write_empty_chunks: bool | None = None,
10871098
zarr_version: ZarrFormat | None = None, # deprecated
10881099
zarr_format: ZarrFormat | None = None,
10891100
meta_array: Any | None = None, # TODO: need type
@@ -1099,6 +1110,7 @@ async def create(
10991110
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
11001111
dimension_names: Iterable[str] | None = None,
11011112
storage_options: dict[str, Any] | None = None,
1113+
config: ArrayConfig | ArrayConfigParams | None = None,
11021114
**kwargs: Any,
11031115
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
11041116
"""Create an array.
@@ -1108,19 +1120,47 @@ async def create(
11081120
shape : int or tuple of ints
11091121
Array shape.
11101122
chunks : int or tuple of ints, optional
1111-
Chunk shape. If True, will be guessed from `shape` and `dtype`. If
1112-
False, will be set to `shape`, i.e., single chunk for the whole array.
1113-
If an int, the chunk size in each dimension will be given by the value
1114-
of `chunks`. Default is True.
1123+
The shape of the array's chunks.
1124+
V2 only. V3 arrays should use `chunk_shape` instead.
1125+
If not specified, default values are guessed based on the shape and dtype.
11151126
dtype : str or dtype, optional
11161127
NumPy dtype.
1128+
chunk_shape : int or tuple of ints, optional
1129+
The shape of the Array's chunks (default is None).
1130+
V3 only. V2 arrays should use `chunks` instead.
1131+
chunk_key_encoding : ChunkKeyEncoding, optional
1132+
A specification of how the chunk keys are represented in storage.
1133+
V3 only. V2 arrays should use `dimension_separator` instead.
1134+
Default is ``("default", "/")``.
1135+
codecs : Sequence of Codecs or dicts, optional
1136+
An iterable of Codec or dict serializations of Codecs. The elements of
1137+
this collection specify the transformation from array values to stored bytes.
1138+
V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
1139+
1140+
If no codecs are provided, default codecs will be used:
1141+
1142+
- For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
1143+
- For Unicode strings, the default is ``VLenUTF8Codec``.
1144+
- For bytes or objects, the default is ``VLenBytesCodec``.
1145+
1146+
These defaults can be changed by modifying the value of ``array.v3_default_codecs`` in :mod:`zarr.core.config`.
11171147
compressor : Codec, optional
1118-
Primary compressor.
1119-
fill_value : object
1148+
Primary compressor to compress chunk data.
1149+
V2 only. V3 arrays should use ``codecs`` instead.
1150+
1151+
If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
1152+
1153+
- For numeric arrays, the default is ``ZstdCodec``.
1154+
- For Unicode strings, the default is ``VLenUTF8Codec``.
1155+
- For bytes or objects, the default is ``VLenBytesCodec``.
1156+
1157+
These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`. fill_value : object
11201158
Default value to use for uninitialized portions of the array.
11211159
order : {'C', 'F'}, optional
1160+
Deprecated in favor of the ``config`` keyword argument.
1161+
Pass ``{'order': <value>}`` to ``create`` instead of using this parameter.
11221162
Memory layout to be used within each chunk.
1123-
Default is set in Zarr's config (`array.order`).
1163+
If not specified, the ``array.order`` parameter in the global config will be used.
11241164
store : Store or str
11251165
Store or path to directory in file system or name of zip file.
11261166
synchronizer : object, optional
@@ -1135,6 +1175,8 @@ async def create(
11351175
for storage of both chunks and metadata.
11361176
filters : sequence of Codecs, optional
11371177
Sequence of filters to use to encode chunk data prior to compression.
1178+
V2 only. If neither ``compressor`` nor ``filters`` are provided, a default
1179+
compressor will be used. (see ``compressor`` for details).
11381180
cache_metadata : bool, optional
11391181
If True, array configuration metadata will be cached for the
11401182
lifetime of the object. If False, array metadata will be reloaded
@@ -1150,30 +1192,28 @@ async def create(
11501192
A codec to encode object arrays, only needed if dtype=object.
11511193
dimension_separator : {'.', '/'}, optional
11521194
Separator placed between the dimensions of a chunk.
1153-
1154-
.. versionadded:: 2.8
1155-
1195+
V2 only. V3 arrays should use ``chunk_key_encoding`` instead.
1196+
Default is ".".
11561197
write_empty_chunks : bool, optional
1157-
If True (default), all chunks will be stored regardless of their
1198+
Deprecated in favor of the ``config`` keyword argument.
1199+
Pass ``{'write_empty_chunks': <value>}`` to ``create`` instead of using this parameter.
1200+
If True, all chunks will be stored regardless of their
11581201
contents. If False, each chunk is compared to the array's fill value
11591202
prior to storing. If a chunk is uniformly equal to the fill value, then
11601203
that chunk is not be stored, and the store entry for that chunk's key
1161-
is deleted. This setting enables sparser storage, as only chunks with
1162-
non-fill-value data are stored, at the expense of overhead associated
1163-
with checking the data of each chunk.
1164-
1165-
.. versionadded:: 2.11
1166-
1204+
is deleted.
11671205
zarr_format : {2, 3, None}, optional
11681206
The zarr format to use when saving.
1207+
Default is 3.
11691208
meta_array : array-like, optional
11701209
An array instance to use for determining arrays to create and return
11711210
to users. Use `numpy.empty(())` by default.
1172-
1173-
.. versionadded:: 2.13
11741211
storage_options : dict
11751212
If using an fsspec URL to create the store, these will be passed to
11761213
the backend implementation. Ignored otherwise.
1214+
config : ArrayConfig or ArrayConfigParams, optional
1215+
Runtime configuration of the array. If provided, will override the
1216+
default values from `zarr.config.array`.
11771217
11781218
Returns
11791219
-------
@@ -1185,9 +1225,13 @@ async def create(
11851225
or _default_zarr_version()
11861226
)
11871227

1188-
if zarr_format == 2 and chunks is None:
1189-
chunks = shape
1190-
elif zarr_format == 3 and chunk_shape is None:
1228+
if zarr_format == 2:
1229+
if chunks is None:
1230+
chunks = shape
1231+
dtype = parse_dtype(dtype, zarr_format)
1232+
if not filters and not compressor:
1233+
filters, compressor = _default_filters_and_compressor(dtype)
1234+
elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr]
11911235
if chunks is not None:
11921236
chunk_shape = chunks
11931237
chunks = None
@@ -1206,26 +1250,47 @@ async def create(
12061250
warnings.warn("object_codec is not yet implemented", RuntimeWarning, stacklevel=2)
12071251
if read_only is not None:
12081252
warnings.warn("read_only is not yet implemented", RuntimeWarning, stacklevel=2)
1209-
if dimension_separator is not None:
1210-
if zarr_format == 3:
1211-
raise ValueError(
1212-
"dimension_separator is not supported for zarr format 3, use chunk_key_encoding instead"
1213-
)
1214-
else:
1215-
warnings.warn(
1216-
"dimension_separator is not yet implemented",
1217-
RuntimeWarning,
1218-
stacklevel=2,
1219-
)
1220-
if write_empty_chunks:
1221-
warnings.warn("write_empty_chunks is not yet implemented", RuntimeWarning, stacklevel=2)
1253+
if dimension_separator is not None and zarr_format == 3:
1254+
raise ValueError(
1255+
"dimension_separator is not supported for zarr format 3, use chunk_key_encoding instead"
1256+
)
1257+
1258+
if order is not None:
1259+
_warn_order_kwarg()
1260+
if write_empty_chunks is not None:
1261+
_warn_write_empty_chunks_kwarg()
1262+
12221263
if meta_array is not None:
12231264
warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2)
12241265

12251266
mode = kwargs.pop("mode", None)
12261267
if mode is None:
12271268
mode = "a"
12281269
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
1270+
1271+
config_dict: ArrayConfigParams = {}
1272+
1273+
if write_empty_chunks is not None:
1274+
if config is not None:
1275+
msg = (
1276+
"Both write_empty_chunks and config keyword arguments are set. "
1277+
"This is redundant. When both are set, write_empty_chunks will be ignored and "
1278+
"config will be used."
1279+
)
1280+
warnings.warn(UserWarning(msg), stacklevel=1)
1281+
config_dict["write_empty_chunks"] = write_empty_chunks
1282+
if order is not None:
1283+
if config is not None:
1284+
msg = (
1285+
"Both order and config keyword arguments are set. "
1286+
"This is redundant. When both are set, order will be ignored and "
1287+
"config will be used."
1288+
)
1289+
warnings.warn(UserWarning(msg), stacklevel=1)
1290+
config_dict["order"] = order
1291+
1292+
config_parsed = ArrayConfig.from_dict(config_dict)
1293+
12291294
return await AsyncArray.create(
12301295
store_path,
12311296
shape=shape,
@@ -1242,7 +1307,7 @@ async def create(
12421307
codecs=codecs,
12431308
dimension_names=dimension_names,
12441309
attributes=attributes,
1245-
order=order,
1310+
config=config_parsed,
12461311
**kwargs,
12471312
)
12481313

@@ -1453,6 +1518,11 @@ async def open_array(
14531518

14541519
zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
14551520

1521+
if "order" in kwargs:
1522+
_warn_order_kwarg()
1523+
if "write_empty_chunks" in kwargs:
1524+
_warn_write_empty_chunks_kwarg()
1525+
14561526
try:
14571527
return await AsyncArray.open(store_path, zarr_format=zarr_format)
14581528
except FileNotFoundError:

0 commit comments

Comments
 (0)