Skip to content

Commit a39457f

Browse files
committed
allow data in create_array
1 parent 99cc8f5 commit a39457f

File tree

4 files changed

+229
-242
lines changed

4 files changed

+229
-242
lines changed

src/zarr/api/asynchronous.py

Lines changed: 1 addition & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@
1010
from typing_extensions import deprecated
1111

1212
from zarr.core.array import Array, AsyncArray, get_array_metadata
13-
from zarr.core.array_spec import ArrayConfig, ArrayConfigParams, parse_array_config
13+
from zarr.core.array_spec import ArrayConfig, ArrayConfigParams
1414
from zarr.core.buffer import NDArrayLike
15-
from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition
1615
from zarr.core.common import (
1716
JSON,
1817
AccessModeLiteral,
@@ -929,158 +928,6 @@ async def read_group(
929928
)
930929

931930

932-
async def create_array(
933-
store: str | StoreLike,
934-
*,
935-
path: PathLike | None = None,
936-
shape: ChunkCoords,
937-
dtype: npt.DTypeLike,
938-
chunk_shape: ChunkCoords,
939-
shard_shape: ChunkCoords | None = None,
940-
filters: Iterable[dict[str, JSON] | Codec] = (),
941-
compressors: Iterable[dict[str, JSON] | Codec] = (),
942-
fill_value: Any | None = 0,
943-
order: MemoryOrder | None = "C",
944-
zarr_format: ZarrFormat | None = 3,
945-
attributes: dict[str, JSON] | None = None,
946-
chunk_key_encoding: (
947-
ChunkKeyEncoding
948-
| tuple[Literal["default"], Literal[".", "/"]]
949-
| tuple[Literal["v2"], Literal[".", "/"]]
950-
| None
951-
) = ("default", "/"),
952-
dimension_names: Iterable[str] | None = None,
953-
storage_options: dict[str, Any] | None = None,
954-
overwrite: bool = False,
955-
config: ArrayConfig | ArrayConfigParams | None = None,
956-
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
957-
"""Create an array.
958-
959-
Parameters
960-
----------
961-
store : str or Store
962-
Store or path to directory in file system or name of zip file.
963-
path : str or None, optional
964-
The name of the array within the store. If ``path`` is ``None``, the array will be located
965-
at the root of the store.
966-
shape : ChunkCoords
967-
Shape of the array.
968-
dtype : npt.DTypeLike
969-
Data type of the array.
970-
chunk_shape : ChunkCoords
971-
Chunk shape of the array.
972-
shard_shape : ChunkCoords, optional
973-
Shard shape of the array. The default value of ``None`` results in no sharding at all.
974-
filters : Iterable[Codec], optional
975-
List of filters to apply to the array.
976-
compressors : Iterable[Codec], optional
977-
List of compressors to apply to the array.
978-
fill_value : Any, optional
979-
Fill value for the array.
980-
order : {"C", "F"}, optional
981-
Memory layout of the array.
982-
zarr_format : {2, 3}, optional
983-
The zarr format to use when saving.
984-
attributes : dict, optional
985-
Attributes for the array.
986-
chunk_key_encoding : ChunkKeyEncoding, optional
987-
The chunk key encoding to use.
988-
dimension_names : Iterable[str], optional
989-
Dimension names for the array.
990-
storage_options : dict, optional
991-
If using an fsspec URL to create the store, these will be passed to the backend implementation.
992-
Ignored otherwise.
993-
overwrite : bool, default False
994-
Whether to overwrite an array with the same name in the store, if one exists.
995-
config : ArrayConfig or ArrayConfigParams, optional
996-
Runtime configuration for the array.
997-
998-
Returns
999-
-------
1000-
z : array
1001-
The array.
1002-
"""
1003-
1004-
if zarr_format is None:
1005-
zarr_format = _default_zarr_version()
1006-
1007-
# TODO: figure out why putting these imports at top-level causes circular imports
1008-
from zarr.codecs.bytes import BytesCodec
1009-
from zarr.codecs.sharding import ShardingCodec
1010-
1011-
# TODO: fix this when modes make sense. It should be `w` for overwriting, `w-` otherwise
1012-
mode: Literal["a"] = "a"
1013-
1014-
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
1015-
sub_codecs = (*filters, BytesCodec(), *compressors)
1016-
_dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format)
1017-
config_parsed = parse_array_config(config)
1018-
if zarr_format == 2:
1019-
if shard_shape is not None:
1020-
msg = (
1021-
'Zarr v2 arrays can only be created with `shard_shape` set to `None` or `"auto"`.'
1022-
f"Got `shard_shape={shard_shape}` instead."
1023-
)
1024-
1025-
raise ValueError(msg)
1026-
if len(tuple(compressors)) > 1:
1027-
compressor, *rest = compressors
1028-
else:
1029-
compressor = None
1030-
rest = []
1031-
filters = (*filters, *rest)
1032-
if dimension_names is not None:
1033-
raise ValueError("Zarr v2 arrays do not support dimension names.")
1034-
if order is None:
1035-
order_parsed = zarr_config.get("array.order")
1036-
else:
1037-
order_parsed = order
1038-
return await AsyncArray._create_v2(
1039-
store_path=store_path,
1040-
shape=shape,
1041-
dtype=_dtype_parsed,
1042-
chunks=chunk_shape,
1043-
dimension_separator="/",
1044-
fill_value=fill_value,
1045-
order=order_parsed,
1046-
filters=filters,
1047-
compressor=compressor,
1048-
attributes=attributes,
1049-
overwrite=overwrite,
1050-
config=config_parsed,
1051-
)
1052-
else:
1053-
shard_shape_parsed, chunk_shape_parsed = _auto_partition(
1054-
shape, dtype, shard_shape, chunk_shape
1055-
)
1056-
if shard_shape_parsed is not None:
1057-
sharding_codec = ShardingCodec(chunk_shape=chunk_shape_parsed, codecs=sub_codecs)
1058-
sharding_codec.validate(
1059-
shape=chunk_shape_parsed,
1060-
dtype=dtype,
1061-
chunk_grid=RegularChunkGrid(chunk_shape=shard_shape_parsed),
1062-
)
1063-
codecs = (sharding_codec,)
1064-
chunks_out = shard_shape_parsed
1065-
else:
1066-
chunks_out = chunk_shape_parsed
1067-
codecs = sub_codecs
1068-
1069-
return await AsyncArray._create_v3(
1070-
store_path=store_path,
1071-
shape=shape,
1072-
dtype=_dtype_parsed,
1073-
fill_value=fill_value,
1074-
attributes=attributes,
1075-
chunk_shape=chunks_out,
1076-
chunk_key_encoding=chunk_key_encoding,
1077-
codecs=codecs,
1078-
dimension_names=dimension_names,
1079-
overwrite=overwrite,
1080-
config=config_parsed,
1081-
)
1082-
1083-
1084931
async def create(
1085932
shape: ChunkCoords | int,
1086933
*, # Note: this is a change from v2

src/zarr/api/synchronous.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing_extensions import deprecated
66

77
import zarr.api.asynchronous as async_api
8+
import zarr.core.array
89
from zarr._compat import _deprecate_positional_args
910
from zarr.core.array import Array, AsyncArray
1011
from zarr.core.group import Group
@@ -727,7 +728,7 @@ def create(
727728

728729

729730
def create_array(*args: Any, **kwargs: Any) -> Array:
730-
return Array(sync(async_api.create_array(*args, **kwargs)))
731+
return Array(sync(zarr.core.array.create_array(*args, **kwargs)))
731732

732733

733734
def read_array(*args: Any, **kwargs: Any) -> Array:

src/zarr/core/array.py

Lines changed: 165 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import json
44
import warnings
55
from asyncio import gather
6+
from collections.abc import Iterable
67
from dataclasses import dataclass, field
78
from itertools import starmap
89
from logging import getLogger
@@ -14,6 +15,7 @@
1415

1516
from zarr._compat import _deprecate_positional_args
1617
from zarr.abc.store import Store, set_or_delete
18+
from zarr.api.asynchronous import PathLike, _default_zarr_version
1719
from zarr.codecs._v2 import V2Codec
1820
from zarr.core._info import ArrayInfo
1921
from zarr.core.array_spec import ArrayConfig, ArrayConfigParams, parse_array_config
@@ -24,7 +26,7 @@
2426
NDBuffer,
2527
default_buffer_prototype,
2628
)
27-
from zarr.core.chunk_grids import RegularChunkGrid, normalize_chunks
29+
from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition, normalize_chunks
2830
from zarr.core.chunk_key_encodings import (
2931
ChunkKeyEncoding,
3032
DefaultChunkKeyEncoding,
@@ -3450,3 +3452,165 @@ def _get_default_codecs(
34503452
dtype_key = "numeric"
34513453

34523454
return [{"name": codec_id, "configuration": {}} for codec_id in default_codecs[dtype_key]]
3455+
3456+
3457+
async def create_array(
3458+
store: str | StoreLike,
3459+
*,
3460+
path: PathLike | None = None,
3461+
shape: ChunkCoords,
3462+
dtype: npt.DTypeLike,
3463+
chunk_shape: ChunkCoords,
3464+
shard_shape: ChunkCoords | None = None,
3465+
filters: Iterable[dict[str, JSON] | Codec] = (),
3466+
compressors: Iterable[dict[str, JSON] | Codec] = (),
3467+
fill_value: Any | None = 0,
3468+
order: MemoryOrder | None = "C",
3469+
zarr_format: ZarrFormat | None = 3,
3470+
attributes: dict[str, JSON] | None = None,
3471+
chunk_key_encoding: (
3472+
ChunkKeyEncoding
3473+
| tuple[Literal["default"], Literal[".", "/"]]
3474+
| tuple[Literal["v2"], Literal[".", "/"]]
3475+
| None
3476+
) = ("default", "/"),
3477+
dimension_names: Iterable[str] | None = None,
3478+
storage_options: dict[str, Any] | None = None,
3479+
overwrite: bool = False,
3480+
config: ArrayConfig | ArrayConfigParams | None = None,
3481+
data: np.ndarray | None = None,
3482+
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
3483+
"""Create an array.
3484+
3485+
Parameters
3486+
----------
3487+
store : str or Store
3488+
Store or path to directory in file system or name of zip file.
3489+
path : str or None, optional
3490+
The name of the array within the store. If ``path`` is ``None``, the array will be located
3491+
at the root of the store.
3492+
shape : ChunkCoords
3493+
Shape of the array.
3494+
dtype : npt.DTypeLike
3495+
Data type of the array.
3496+
chunk_shape : ChunkCoords
3497+
Chunk shape of the array.
3498+
shard_shape : ChunkCoords, optional
3499+
Shard shape of the array. The default value of ``None`` results in no sharding at all.
3500+
filters : Iterable[Codec], optional
3501+
List of filters to apply to the array.
3502+
compressors : Iterable[Codec], optional
3503+
List of compressors to apply to the array.
3504+
fill_value : Any, optional
3505+
Fill value for the array.
3506+
order : {"C", "F"}, optional
3507+
Memory layout of the array.
3508+
zarr_format : {2, 3}, optional
3509+
The zarr format to use when saving.
3510+
attributes : dict, optional
3511+
Attributes for the array.
3512+
chunk_key_encoding : ChunkKeyEncoding, optional
3513+
The chunk key encoding to use.
3514+
dimension_names : Iterable[str], optional
3515+
Dimension names for the array.
3516+
storage_options : dict, optional
3517+
If using an fsspec URL to create the store, these will be passed to the backend implementation.
3518+
Ignored otherwise.
3519+
overwrite : bool, default False
3520+
Whether to overwrite an array with the same name in the store, if one exists.
3521+
config : ArrayConfig or ArrayConfigParams, optional
3522+
Runtime configuration for the array.
3523+
data : np.ndarray, optional
3524+
Initial data for the array.
3525+
3526+
Returns
3527+
-------
3528+
z : array
3529+
The array.
3530+
"""
3531+
3532+
if zarr_format is None:
3533+
zarr_format = _default_zarr_version()
3534+
3535+
# TODO: figure out why putting these imports at top-level causes circular imports
3536+
from zarr.codecs.bytes import BytesCodec
3537+
from zarr.codecs.sharding import ShardingCodec
3538+
3539+
# TODO: fix this when modes make sense. It should be `w` for overwriting, `w-` otherwise
3540+
mode: Literal["a"] = "a"
3541+
3542+
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
3543+
sub_codecs = (*filters, BytesCodec(), *compressors)
3544+
_dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format)
3545+
config_parsed = parse_array_config(config)
3546+
result: AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]
3547+
if zarr_format == 2:
3548+
if shard_shape is not None:
3549+
msg = (
3550+
'Zarr v2 arrays can only be created with `shard_shape` set to `None` or `"auto"`.'
3551+
f"Got `shard_shape={shard_shape}` instead."
3552+
)
3553+
3554+
raise ValueError(msg)
3555+
if len(tuple(compressors)) > 1:
3556+
compressor, *rest = compressors
3557+
else:
3558+
compressor = None
3559+
rest = []
3560+
filters = (*filters, *rest)
3561+
if dimension_names is not None:
3562+
raise ValueError("Zarr v2 arrays do not support dimension names.")
3563+
if order is None:
3564+
order_parsed = zarr_config.get("array.order")
3565+
else:
3566+
order_parsed = order
3567+
result = await AsyncArray._create_v2(
3568+
store_path=store_path,
3569+
shape=shape,
3570+
dtype=_dtype_parsed,
3571+
chunks=chunk_shape,
3572+
dimension_separator="/",
3573+
fill_value=fill_value,
3574+
order=order_parsed,
3575+
filters=filters,
3576+
compressor=compressor,
3577+
attributes=attributes,
3578+
overwrite=overwrite,
3579+
config=config_parsed,
3580+
)
3581+
else:
3582+
shard_shape_parsed, chunk_shape_parsed = _auto_partition(
3583+
shape, dtype, shard_shape, chunk_shape
3584+
)
3585+
if shard_shape_parsed is not None:
3586+
sharding_codec = ShardingCodec(chunk_shape=chunk_shape_parsed, codecs=sub_codecs)
3587+
sharding_codec.validate(
3588+
shape=chunk_shape_parsed,
3589+
dtype=dtype,
3590+
chunk_grid=RegularChunkGrid(chunk_shape=shard_shape_parsed),
3591+
)
3592+
codecs = (sharding_codec,)
3593+
chunks_out = shard_shape_parsed
3594+
else:
3595+
chunks_out = chunk_shape_parsed
3596+
codecs = sub_codecs
3597+
3598+
result = await AsyncArray._create_v3(
3599+
store_path=store_path,
3600+
shape=shape,
3601+
dtype=_dtype_parsed,
3602+
fill_value=fill_value,
3603+
attributes=attributes,
3604+
chunk_shape=chunks_out,
3605+
chunk_key_encoding=chunk_key_encoding,
3606+
codecs=codecs,
3607+
dimension_names=dimension_names,
3608+
overwrite=overwrite,
3609+
config=config_parsed,
3610+
)
3611+
3612+
if data is not None:
3613+
await result.setitem(
3614+
selection=slice(None), value=data, prototype=default_buffer_prototype()
3615+
)
3616+
return result

0 commit comments

Comments
 (0)