|
3 | 3 | import json |
4 | 4 | import warnings |
5 | 5 | from asyncio import gather |
| 6 | +from collections.abc import Iterable |
6 | 7 | from dataclasses import dataclass, field |
7 | 8 | from itertools import starmap |
8 | 9 | from logging import getLogger |
|
14 | 15 |
|
15 | 16 | from zarr._compat import _deprecate_positional_args |
16 | 17 | from zarr.abc.store import Store, set_or_delete |
| 18 | +from zarr.api.asynchronous import PathLike, _default_zarr_version |
17 | 19 | from zarr.codecs._v2 import V2Codec |
18 | 20 | from zarr.core._info import ArrayInfo |
19 | 21 | from zarr.core.array_spec import ArrayConfig, ArrayConfigParams, parse_array_config |
|
24 | 26 | NDBuffer, |
25 | 27 | default_buffer_prototype, |
26 | 28 | ) |
27 | | -from zarr.core.chunk_grids import RegularChunkGrid, normalize_chunks |
| 29 | +from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition, normalize_chunks |
28 | 30 | from zarr.core.chunk_key_encodings import ( |
29 | 31 | ChunkKeyEncoding, |
30 | 32 | DefaultChunkKeyEncoding, |
@@ -3450,3 +3452,165 @@ def _get_default_codecs( |
3450 | 3452 | dtype_key = "numeric" |
3451 | 3453 |
|
3452 | 3454 | return [{"name": codec_id, "configuration": {}} for codec_id in default_codecs[dtype_key]] |
| 3455 | + |
| 3456 | + |
| 3457 | +async def create_array( |
| 3458 | + store: str | StoreLike, |
| 3459 | + *, |
| 3460 | + path: PathLike | None = None, |
| 3461 | + shape: ChunkCoords, |
| 3462 | + dtype: npt.DTypeLike, |
| 3463 | + chunk_shape: ChunkCoords, |
| 3464 | + shard_shape: ChunkCoords | None = None, |
| 3465 | + filters: Iterable[dict[str, JSON] | Codec] = (), |
| 3466 | + compressors: Iterable[dict[str, JSON] | Codec] = (), |
| 3467 | + fill_value: Any | None = 0, |
| 3468 | + order: MemoryOrder | None = "C", |
| 3469 | + zarr_format: ZarrFormat | None = 3, |
| 3470 | + attributes: dict[str, JSON] | None = None, |
| 3471 | + chunk_key_encoding: ( |
| 3472 | + ChunkKeyEncoding |
| 3473 | + | tuple[Literal["default"], Literal[".", "/"]] |
| 3474 | + | tuple[Literal["v2"], Literal[".", "/"]] |
| 3475 | + | None |
| 3476 | + ) = ("default", "/"), |
| 3477 | + dimension_names: Iterable[str] | None = None, |
| 3478 | + storage_options: dict[str, Any] | None = None, |
| 3479 | + overwrite: bool = False, |
| 3480 | + config: ArrayConfig | ArrayConfigParams | None = None, |
| 3481 | + data: np.ndarray | None = None, |
| 3482 | +) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: |
| 3483 | + """Create an array. |
| 3484 | +
|
| 3485 | + Parameters |
| 3486 | + ---------- |
| 3487 | + store : str or Store |
| 3488 | + Store or path to directory in file system or name of zip file. |
| 3489 | + path : str or None, optional |
| 3490 | + The name of the array within the store. If ``path`` is ``None``, the array will be located |
| 3491 | + at the root of the store. |
| 3492 | + shape : ChunkCoords |
| 3493 | + Shape of the array. |
| 3494 | + dtype : npt.DTypeLike |
| 3495 | + Data type of the array. |
| 3496 | + chunk_shape : ChunkCoords |
| 3497 | + Chunk shape of the array. |
| 3498 | + shard_shape : ChunkCoords, optional |
| 3499 | + Shard shape of the array. The default value of ``None`` results in no sharding at all. |
| 3500 | + filters : Iterable[Codec], optional |
| 3501 | + List of filters to apply to the array. |
| 3502 | + compressors : Iterable[Codec], optional |
| 3503 | + List of compressors to apply to the array. |
| 3504 | + fill_value : Any, optional |
| 3505 | + Fill value for the array. |
| 3506 | + order : {"C", "F"}, optional |
| 3507 | + Memory layout of the array. |
| 3508 | + zarr_format : {2, 3}, optional |
| 3509 | + The zarr format to use when saving. |
| 3510 | + attributes : dict, optional |
| 3511 | + Attributes for the array. |
| 3512 | + chunk_key_encoding : ChunkKeyEncoding, optional |
| 3513 | + The chunk key encoding to use. |
| 3514 | + dimension_names : Iterable[str], optional |
| 3515 | + Dimension names for the array. |
| 3516 | + storage_options : dict, optional |
| 3517 | + If using an fsspec URL to create the store, these will be passed to the backend implementation. |
| 3518 | + Ignored otherwise. |
| 3519 | + overwrite : bool, default False |
| 3520 | + Whether to overwrite an array with the same name in the store, if one exists. |
| 3521 | + config : ArrayConfig or ArrayConfigParams, optional |
| 3522 | + Runtime configuration for the array. |
| 3523 | + data : np.ndarray, optional |
| 3524 | + Initial data for the array. |
| 3525 | + |
| 3526 | + Returns |
| 3527 | + ------- |
| 3528 | + z : array |
| 3529 | + The array. |
| 3530 | + """ |
| 3531 | + |
| 3532 | + if zarr_format is None: |
| 3533 | + zarr_format = _default_zarr_version() |
| 3534 | + |
| 3535 | + # TODO: figure out why putting these imports at top-level causes circular imports |
| 3536 | + from zarr.codecs.bytes import BytesCodec |
| 3537 | + from zarr.codecs.sharding import ShardingCodec |
| 3538 | + |
| 3539 | + # TODO: fix this when modes make sense. It should be `w` for overwriting, `w-` otherwise |
| 3540 | + mode: Literal["a"] = "a" |
| 3541 | + |
| 3542 | + store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options) |
| 3543 | + sub_codecs = (*filters, BytesCodec(), *compressors) |
| 3544 | + _dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format) |
| 3545 | + config_parsed = parse_array_config(config) |
| 3546 | + result: AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata] |
| 3547 | + if zarr_format == 2: |
| 3548 | + if shard_shape is not None: |
| 3549 | + msg = ( |
| 3550 | + 'Zarr v2 arrays can only be created with `shard_shape` set to `None` or `"auto"`.' |
| 3551 | + f"Got `shard_shape={shard_shape}` instead." |
| 3552 | + ) |
| 3553 | + |
| 3554 | + raise ValueError(msg) |
| 3555 | + if len(tuple(compressors)) > 1: |
| 3556 | + compressor, *rest = compressors |
| 3557 | + else: |
| 3558 | + compressor = None |
| 3559 | + rest = [] |
| 3560 | + filters = (*filters, *rest) |
| 3561 | + if dimension_names is not None: |
| 3562 | + raise ValueError("Zarr v2 arrays do not support dimension names.") |
| 3563 | + if order is None: |
| 3564 | + order_parsed = zarr_config.get("array.order") |
| 3565 | + else: |
| 3566 | + order_parsed = order |
| 3567 | + result = await AsyncArray._create_v2( |
| 3568 | + store_path=store_path, |
| 3569 | + shape=shape, |
| 3570 | + dtype=_dtype_parsed, |
| 3571 | + chunks=chunk_shape, |
| 3572 | + dimension_separator="/", |
| 3573 | + fill_value=fill_value, |
| 3574 | + order=order_parsed, |
| 3575 | + filters=filters, |
| 3576 | + compressor=compressor, |
| 3577 | + attributes=attributes, |
| 3578 | + overwrite=overwrite, |
| 3579 | + config=config_parsed, |
| 3580 | + ) |
| 3581 | + else: |
| 3582 | + shard_shape_parsed, chunk_shape_parsed = _auto_partition( |
| 3583 | + shape, dtype, shard_shape, chunk_shape |
| 3584 | + ) |
| 3585 | + if shard_shape_parsed is not None: |
| 3586 | + sharding_codec = ShardingCodec(chunk_shape=chunk_shape_parsed, codecs=sub_codecs) |
| 3587 | + sharding_codec.validate( |
| 3588 | + shape=chunk_shape_parsed, |
| 3589 | + dtype=dtype, |
| 3590 | + chunk_grid=RegularChunkGrid(chunk_shape=shard_shape_parsed), |
| 3591 | + ) |
| 3592 | + codecs = (sharding_codec,) |
| 3593 | + chunks_out = shard_shape_parsed |
| 3594 | + else: |
| 3595 | + chunks_out = chunk_shape_parsed |
| 3596 | + codecs = sub_codecs |
| 3597 | + |
| 3598 | + result = await AsyncArray._create_v3( |
| 3599 | + store_path=store_path, |
| 3600 | + shape=shape, |
| 3601 | + dtype=_dtype_parsed, |
| 3602 | + fill_value=fill_value, |
| 3603 | + attributes=attributes, |
| 3604 | + chunk_shape=chunks_out, |
| 3605 | + chunk_key_encoding=chunk_key_encoding, |
| 3606 | + codecs=codecs, |
| 3607 | + dimension_names=dimension_names, |
| 3608 | + overwrite=overwrite, |
| 3609 | + config=config_parsed, |
| 3610 | + ) |
| 3611 | + |
| 3612 | + if data is not None: |
| 3613 | + await result.setitem( |
| 3614 | + selection=slice(None), value=data, prototype=default_buffer_prototype() |
| 3615 | + ) |
| 3616 | + return result |
0 commit comments