Skip to content

Commit 03e2500

Browse files
committed
extract to zarr.from_array
1 parent 2df18a0 commit 03e2500

File tree

5 files changed

+352
-40
lines changed

5 files changed

+352
-40
lines changed

src/zarr/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
create_group,
1111
empty,
1212
empty_like,
13+
from_array,
1314
full,
1415
full_like,
1516
group,
@@ -52,6 +53,7 @@
5253
"create_group",
5354
"empty",
5455
"empty_like",
56+
"from_array",
5557
"full",
5658
"full_like",
5759
"group",

src/zarr/api/asynchronous.py

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import numpy.typing as npt
1010
from typing_extensions import deprecated
1111

12-
from zarr.core.array import Array, AsyncArray, create_array, get_array_metadata
12+
from zarr.core.array import Array, AsyncArray, create_array, from_array, get_array_metadata
1313
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike
1414
from zarr.core.buffer import NDArrayLike
1515
from zarr.core.common import (
@@ -21,10 +21,8 @@
2121
_default_zarr_format,
2222
_warn_order_kwarg,
2323
_warn_write_empty_chunks_kwarg,
24-
concurrent_map,
2524
parse_dtype,
2625
)
27-
from zarr.core.config import config
2826
from zarr.core.group import AsyncGroup, ConsolidatedMetadata, GroupMetadata
2927
from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata
3028
from zarr.core.metadata.v2 import _default_compressor, _default_filters
@@ -52,6 +50,7 @@
5250
"create_array",
5351
"empty",
5452
"empty_like",
53+
"from_array",
5554
"full",
5655
"full_like",
5756
"group",
@@ -547,28 +546,7 @@ async def array(
547546
"""
548547

549548
if isinstance(data, Array):
550-
# fill missing arguments with metadata of data Array
551-
kwargs.setdefault("dtype", data.dtype)
552-
kwargs.setdefault("attributes", data.attrs)
553-
kwargs.setdefault("chunks", data.chunks)
554-
kwargs.setdefault("fill_value", data.fill_value)
555-
556-
new_array = await create(data.shape, **kwargs)
557-
558-
async def _copy_chunk(chunk_coords: ChunkCoords | slice, _data: Array) -> None:
559-
arr = await _data._async_array.getitem(chunk_coords)
560-
await new_array.setitem(chunk_coords, arr)
561-
562-
if new_array.chunks == data.chunks:
563-
# Stream data from the source array to the new array
564-
await concurrent_map(
565-
[(region, data) for region in data._iter_chunk_regions()],
566-
_copy_chunk,
567-
config.get("async.concurrency"),
568-
)
569-
else:
570-
await _copy_chunk(slice(None), data)
571-
return new_array
549+
return await from_array(data, **kwargs)
572550

573551
# ensure data is array-like
574552
if not hasattr(data, "shape") or not hasattr(data, "dtype"):

src/zarr/api/synchronous.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
"create_array",
4848
"empty",
4949
"empty_like",
50+
"from_array",
5051
"full",
5152
"full_like",
5253
"group",
@@ -893,6 +894,138 @@ def create_array(
893894
)
894895

895896

897+
def from_array(
898+
data: Array,
899+
store: str | StoreLike,
900+
*,
901+
name: str | None = None,
902+
chunks: ChunkCoords | Literal["auto"] = "auto",
903+
shards: ShardsLike | None = None,
904+
filters: FiltersLike = "auto",
905+
compressors: CompressorsLike = "auto",
906+
serializer: SerializerLike = "auto",
907+
fill_value: Any | None = None,
908+
order: MemoryOrder | None = None,
909+
zarr_format: ZarrFormat | None = 3,
910+
attributes: dict[str, JSON] | None = None,
911+
chunk_key_encoding: ChunkKeyEncoding | ChunkKeyEncodingLike | None = None,
912+
dimension_names: Iterable[str] | None = None,
913+
storage_options: dict[str, Any] | None = None,
914+
overwrite: bool = False,
915+
config: ArrayConfig | ArrayConfigLike | None = None,
916+
) -> Array:
917+
"""Create an array from an existing array.
918+
919+
Parameters
920+
----------
921+
data : Array
922+
The array to copy.
923+
store : str or Store
924+
Store or path to directory in file system or name of zip file for the new array.
925+
name : str or None, optional
926+
The name of the array within the store. If ``name`` is ``None``, the array will be located
927+
at the root of the store.
928+
chunks : ChunkCoords, optional
929+
Chunk shape of the array.
930+
If not specified, defaults to the chunk shape of the data array.
931+
shards : ChunkCoords, optional
932+
Shard shape of the array. The default value of ``None`` results in no sharding at all.
933+
filters : Iterable[Codec], optional
934+
Iterable of filters to apply to each chunk of the array, in order, before serializing that
935+
chunk to bytes.
936+
937+
For Zarr format 3, a "filter" is a codec that takes an array and returns an array,
938+
and these values must be instances of ``ArrayArrayCodec``, or dict representations
939+
of ``ArrayArrayCodec``.
940+
941+
For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the
942+
the order if your filters is consistent with the behavior of each filter.
943+
944+
If no ``filters`` are provided, defaults to the filters of the data array.
945+
compressors : Iterable[Codec], optional
946+
List of compressors to apply to the array. Compressors are applied in order, and after any
947+
filters are applied (if any are specified) and the data is serialized into bytes.
948+
949+
For Zarr format 3, a "compressor" is a codec that takes a bytestream, and
950+
returns another bytestream. Multiple compressors my be provided for Zarr format 3.
951+
952+
For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may
953+
be provided for Zarr format 2.
954+
955+
If no ``compressors`` are provided, defaults to the compressors of the data array.
956+
serializer : dict[str, JSON] | ArrayBytesCodec, optional
957+
Array-to-bytes codec to use for encoding the array data.
958+
Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion.
959+
960+
If no ``serializer`` is provided, defaults to the serializer of the input array.
961+
fill_value : Any, optional
962+
Fill value for the array.
963+
If not specified, defaults to the fill value of the data array.
964+
order : {"C", "F"}, optional
965+
The memory of the array (default is "C").
966+
For Zarr format 2, this parameter sets the memory order of the array.
967+
For Zarr format 3, this parameter is deprecated, because memory order
968+
is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory
969+
order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``.
970+
If not specified, defaults to the memory order of the data array.
971+
zarr_format : {2, 3}, optional
972+
The zarr format to use when saving.
973+
If not specified, defaults to the zarr format of the data array.
974+
attributes : dict, optional
975+
Attributes for the array.
976+
If not specified, defaults to the attributes of the data array.
977+
chunk_key_encoding : ChunkKeyEncoding, optional
978+
A specification of how the chunk keys are represented in storage.
979+
For Zarr format 3, the default is ``{"name": "default", "separator": "/"}}``.
980+
For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``.
981+
If not specified and the data array has the same zarr format as the target array,
982+
the chunk key encoding of the data array is used.
983+
dimension_names : Iterable[str], optional
984+
The names of the dimensions (default is None).
985+
Zarr format 3 only. Zarr format 2 arrays should not use this parameter.
986+
If not specified, defaults to the dimension names of the data array.
987+
storage_options : dict, optional
988+
If using an fsspec URL to create the store, these will be passed to the backend implementation.
989+
Ignored otherwise.
990+
overwrite : bool, default False
991+
Whether to overwrite an array with the same name in the store, if one exists.
992+
config : ArrayConfig or ArrayConfigLike, optional
993+
Runtime configuration for the array.
994+
995+
Returns
996+
-------
997+
AsyncArray
998+
The array.
999+
1000+
Examples
1001+
--------
1002+
#TODO
1003+
"""
1004+
return Array(
1005+
sync(
1006+
zarr.core.array.from_array(
1007+
data,
1008+
store,
1009+
name=name,
1010+
chunks=chunks,
1011+
shards=shards,
1012+
filters=filters,
1013+
compressors=compressors,
1014+
serializer=serializer,
1015+
fill_value=fill_value,
1016+
order=order,
1017+
zarr_format=zarr_format,
1018+
attributes=attributes,
1019+
chunk_key_encoding=chunk_key_encoding,
1020+
dimension_names=dimension_names,
1021+
storage_options=storage_options,
1022+
overwrite=overwrite,
1023+
config=config,
1024+
)
1025+
)
1026+
)
1027+
1028+
8961029
# TODO: add type annotations for kwargs
8971030
def empty(shape: ChunkCoords, **kwargs: Any) -> Array:
8981031
"""Create an empty array.

0 commit comments

Comments
 (0)