33import json
44import warnings
55from asyncio import gather
6- from collections .abc import Iterable , Mapping
6+ from collections .abc import Iterable , Mapping , Sequence
77from dataclasses import dataclass , field , replace
88from itertools import starmap
99from logging import getLogger
4040 default_buffer_prototype ,
4141)
4242from zarr .core .buffer .cpu import buffer_prototype as cpu_buffer_prototype
43- from zarr .core .chunk_grids import RegularChunkGrid , _auto_partition , normalize_chunks
43+ from zarr .core .chunk_grids import ChunkGrid , RegularChunkGrid , _auto_partition , normalize_chunks
4444from zarr .core .chunk_key_encodings import (
4545 ChunkKeyEncoding ,
4646 ChunkKeyEncodingLike ,
@@ -737,15 +737,25 @@ async def _create(
737737 def _create_metadata_v3 (
738738 shape : ShapeLike ,
739739 dtype : ZDType [TBaseDType , TBaseScalar ],
740- chunk_shape : tuple [int , ...],
740+ chunk_shape : tuple [int , ...] | None = None ,
741741 fill_value : Any | None = DEFAULT_FILL_VALUE ,
742742 chunk_key_encoding : ChunkKeyEncodingLike | None = None ,
743743 codecs : Iterable [Codec | dict [str , JSON ]] | None = None ,
744744 dimension_names : DimensionNames = None ,
745745 attributes : dict [str , JSON ] | None = None ,
746+ chunk_grid : ChunkGrid | None = None ,
746747 ) -> ArrayV3Metadata :
747748 """
748749 Create an instance of ArrayV3Metadata.
750+
751+ Parameters
752+ ----------
753+ chunk_grid : ChunkGrid, optional
754+ Custom chunk grid to use. If provided, chunk_shape is ignored.
755+ If not provided, a RegularChunkGrid is created from chunk_shape.
756+ chunk_shape : tuple[int, ...], optional
757+ Shape of chunks for creating a RegularChunkGrid.
758+ Only used if chunk_grid is not provided.
749759 """
750760 filters : tuple [ArrayArrayCodec , ...]
751761 compressors : tuple [BytesBytesCodec , ...]
@@ -773,7 +783,14 @@ def _create_metadata_v3(
773783 else :
774784 fill_value_parsed = fill_value
775785
776- chunk_grid_parsed = RegularChunkGrid (chunk_shape = chunk_shape )
786+ # Use provided chunk_grid or create RegularChunkGrid from chunk_shape
787+ if chunk_grid is not None :
788+ chunk_grid_parsed = chunk_grid
789+ elif chunk_shape is not None :
790+ chunk_grid_parsed = RegularChunkGrid (chunk_shape = chunk_shape )
791+ else :
792+ raise ValueError ("Either chunk_grid or chunk_shape must be provided" )
793+
777794 return ArrayV3Metadata (
778795 shape = shape ,
779796 data_type = dtype ,
@@ -4564,6 +4581,7 @@ async def init_array(
45644581 dimension_names : DimensionNames = None ,
45654582 overwrite : bool = False ,
45664583 config : ArrayConfigLike | None = None ,
4584+ chunk_grid : ChunkGrid | None = None ,
45674585) -> AsyncArray [ArrayV3Metadata ] | AsyncArray [ArrayV2Metadata ]:
45684586 """Create and persist an array metadata document.
45694587
@@ -4641,6 +4659,10 @@ async def init_array(
46414659 Configuration for this array.
46424660 If ``None``, the default array runtime configuration will be used. This default
46434661 is stored in the global configuration object.
4662+ chunk_grid : ChunkGrid, optional
4663+ Custom chunk grid to use for the array. If provided, the ``chunks`` parameter is ignored.
4664+ Zarr format 3 only. Use this to create arrays with variable-sized chunks (e.g., RectilinearChunkGrid).
4665+ If not provided, a RegularChunkGrid is created from the ``chunks`` parameter.
46444666
46454667 Returns
46464668 -------
@@ -4721,6 +4743,17 @@ async def init_array(
47214743 )
47224744 sub_codecs = cast ("tuple[Codec, ...]" , (* array_array , array_bytes , * bytes_bytes ))
47234745 codecs_out : tuple [Codec , ...]
4746+
4747+ # Validate that RectilinearChunkGrid is not used with sharding
4748+ if shard_shape_parsed is not None and chunk_grid is not None :
4749+ from zarr .core .chunk_grids import RectilinearChunkGrid
4750+
4751+ if isinstance (chunk_grid , RectilinearChunkGrid ):
4752+ raise ValueError (
4753+ "Sharding is not supported with RectilinearChunkGrid (variable-sized chunks). "
4754+ "Use RegularChunkGrid (uniform chunks) with sharding, or use RectilinearChunkGrid without sharding."
4755+ )
4756+
47244757 if shard_shape_parsed is not None :
47254758 index_location = None
47264759 if isinstance (shards , dict ):
@@ -4731,9 +4764,11 @@ async def init_array(
47314764 chunk_shape = chunk_shape_parsed , codecs = sub_codecs , index_location = index_location
47324765 )
47334766 sharding_codec .validate (
4734- shape = chunk_shape_parsed ,
4767+ shape = chunk_shape_parsed , # Original code: inner chunk shape
47354768 dtype = zdtype ,
4736- chunk_grid = RegularChunkGrid (chunk_shape = shard_shape_parsed ),
4769+ chunk_grid = RegularChunkGrid (
4770+ chunk_shape = shard_shape_parsed
4771+ ), # Original code: shard shape
47374772 )
47384773 codecs_out = (sharding_codec ,)
47394774 chunks_out = shard_shape_parsed
@@ -4748,11 +4783,12 @@ async def init_array(
47484783 shape = shape_parsed ,
47494784 dtype = zdtype ,
47504785 fill_value = fill_value ,
4751- chunk_shape = chunks_out ,
4786+ chunk_shape = chunks_out if chunk_grid is None else None ,
47524787 chunk_key_encoding = chunk_key_encoding_parsed ,
47534788 codecs = codecs_out ,
47544789 dimension_names = dimension_names ,
47554790 attributes = attributes ,
4791+ chunk_grid = chunk_grid ,
47564792 )
47574793
47584794 arr = AsyncArray (metadata = meta , store_path = store_path , config = config )
@@ -4767,7 +4803,7 @@ async def create_array(
47674803 shape : ShapeLike | None = None ,
47684804 dtype : ZDTypeLike | None = None ,
47694805 data : np .ndarray [Any , np .dtype [Any ]] | None = None ,
4770- chunks : tuple [int , ...] | Literal ["auto" ] = "auto" ,
4806+ chunks : tuple [int , ...] | Sequence [ Sequence [ int ]] | ChunkGrid | Literal ["auto" ] = "auto" ,
47714807 shards : ShardsLike | None = None ,
47724808 filters : FiltersLike = "auto" ,
47734809 compressors : CompressorsLike = "auto" ,
@@ -4801,9 +4837,14 @@ async def create_array(
48014837 data : np.ndarray, optional
48024838 Array-like data to use for initializing the array. If this parameter is provided, the
48034839 ``shape`` and ``dtype`` parameters must be ``None``.
4804- chunks : tuple[int, ...] | Literal["auto"], default="auto"
4805- Chunk shape of the array.
4806- If chunks is "auto", a chunk shape is guessed based on the shape of the array and the dtype.
4840+ chunks : tuple[int, ...] | Sequence[Sequence[int]] | ChunkGrid | Literal["auto"], default="auto"
4841+ Chunk shape of the array. Several formats are supported:
4842+
4843+ - tuple of ints: Creates a RegularChunkGrid with uniform chunks, e.g., ``(10, 10)``
4844+ - nested sequence: Creates a RectilinearChunkGrid with variable-sized chunks (Zarr format 3 only),
4845+ e.g., ``[[10, 20, 30], [5, 5]]`` creates variable chunks along each dimension
4846+ - ChunkGrid instance: Uses the provided chunk grid directly (Zarr format 3 only)
4847+ - "auto": Automatically determines chunk shape based on array shape and dtype
48074848 shards : tuple[int, ...], optional
48084849 Shard shape of the array. The default value of ``None`` results in no sharding at all.
48094850 filters : Iterable[Codec] | Literal["auto"], optional
@@ -4900,16 +4941,72 @@ async def create_array(
49004941 >>> fill_value=0)
49014942 <AsyncArray memory://140349042942400 shape=(100, 100) dtype=int32>
49024943 """
4944+ # Handle chunks as ChunkGrid or nested sequence - convert to chunk_grid for init_array
4945+ chunk_grid : ChunkGrid | None = None
4946+
4947+ if isinstance (chunks , ChunkGrid ):
4948+ chunk_grid = chunks
4949+ chunks = "auto" # Will be ignored since chunk_grid is set
4950+ elif chunks != "auto" and not isinstance (chunks , (tuple , int )):
4951+ # Check if it's a nested sequence for RectilinearChunkGrid
4952+ # We need to distinguish between flat sequences like [10, 10] and nested like [[10, 20], [5, 5]]
4953+ is_nested = False
4954+ try :
4955+ # Try to iterate and check if elements are sequences
4956+ if hasattr (chunks , "__iter__" ) and not isinstance (chunks , (str , bytes )): # type: ignore[unreachable]
4957+ first_elem = next (iter (chunks ), None )
4958+ if (
4959+ first_elem is not None
4960+ and hasattr (first_elem , "__iter__" )
4961+ and not isinstance (first_elem , (str , bytes , int ))
4962+ ):
4963+ is_nested = True
4964+ except (TypeError , StopIteration ):
4965+ pass
4966+
4967+ if is_nested :
4968+ # It's a nested sequence - create RectilinearChunkGrid
4969+ from zarr .core .chunk_grids import RectilinearChunkGrid
4970+
4971+ if zarr_format == 2 :
4972+ raise ValueError (
4973+ "Variable chunks (nested sequences) are only supported in Zarr format 3. "
4974+ "Use zarr_format=3 or provide a regular tuple for chunks."
4975+ )
4976+
4977+ try :
4978+ # Convert nested sequence to list of lists for RectilinearChunkGrid
4979+ chunk_shapes = [list (dim ) for dim in chunks ]
4980+ chunk_grid = RectilinearChunkGrid (chunk_shapes = chunk_shapes )
4981+ chunks = "auto" # Will be ignored since chunk_grid is set
4982+ except (TypeError , ValueError ) as e :
4983+ raise TypeError (
4984+ f"Invalid chunks argument: { chunks } . "
4985+ "Expected a tuple of integers, a nested sequence for variable chunks, "
4986+ f"a ChunkGrid instance, or 'auto'. Got error: { e } "
4987+ ) from e
4988+ # else: it's a flat sequence like [10, 10] or single int, let it pass through to existing code
4989+
49034990 data_parsed , shape_parsed , dtype_parsed = _parse_data_params (
49044991 data = data , shape = shape , dtype = dtype
49054992 )
49064993 if data_parsed is not None :
4994+ # from_array doesn't support ChunkGrid parameter, so error if chunk_grid was set
4995+ if chunk_grid is not None :
4996+ raise ValueError (
4997+ "Cannot use ChunkGrid or nested sequences for chunks when creating array from data. "
4998+ "Use a regular tuple for chunks instead."
4999+ )
5000+ # At this point, chunks must be Literal["auto"] | tuple[int, ...] since chunk_grid is None
5001+ from typing import cast
5002+
5003+ chunks_narrowed = cast ("Literal['auto', 'keep'] | tuple[int, ...]" , chunks )
49075004 return await from_array (
49085005 store ,
49095006 data = data_parsed ,
49105007 write_data = write_data ,
49115008 name = name ,
4912- chunks = chunks ,
5009+ chunks = chunks_narrowed ,
49135010 shards = shards ,
49145011 filters = filters ,
49155012 compressors = compressors ,
@@ -4930,11 +5027,15 @@ async def create_array(
49305027 store_path = await make_store_path (
49315028 store , path = name , mode = mode , storage_options = storage_options
49325029 )
5030+ # At this point, chunks must be Literal["auto"] | tuple[int, ...] since we set it to "auto" when chunk_grid is set
5031+ from typing import cast
5032+
5033+ chunks_narrowed = cast ("tuple[int, ...] | Literal['auto']" , chunks )
49335034 return await init_array (
49345035 store_path = store_path ,
49355036 shape = shape_parsed ,
49365037 dtype = dtype_parsed ,
4937- chunks = chunks ,
5038+ chunks = chunks_narrowed ,
49385039 shards = shards ,
49395040 filters = filters ,
49405041 compressors = compressors ,
@@ -4947,6 +5048,7 @@ async def create_array(
49475048 dimension_names = dimension_names ,
49485049 overwrite = overwrite ,
49495050 config = config ,
5051+ chunk_grid = chunk_grid ,
49505052 )
49515053
49525054
0 commit comments