9898 ArrayV3MetadataDict ,
9999 T_ArrayMetadata ,
100100)
101- from zarr .core .metadata .dtype import DTypeWrapper
101+ from zarr .core .metadata .dtype import DTypeWrapper , VariableLengthString
102102from zarr .core .metadata .v2 import (
103103 _default_compressor ,
104104 _default_filters ,
@@ -549,7 +549,7 @@ async def _create(
549549 * ,
550550 # v2 and v3
551551 shape : ShapeLike ,
552- dtype : npt .DTypeLike ,
552+ dtype : npt .DTypeLike [ Any ] ,
553553 zarr_format : ZarrFormat = 3 ,
554554 fill_value : Any | None = None ,
555555 attributes : dict [str , JSON ] | None = None ,
@@ -578,18 +578,22 @@ async def _create(
578578 See :func:`AsyncArray.create` for more details.
579579 Deprecated in favor of :func:`zarr.api.asynchronous.create_array`.
580580 """
581+ # TODO: delete this and be more strict about where parsing occurs
582+ if not isinstance (dtype , DTypeWrapper ):
583+ dtype_parsed = get_data_type_from_numpy (np .dtype (dtype ))
584+ else :
585+ dtype_parsed = dtype
581586 store_path = await make_store_path (store )
582587
583- dtype_parsed = parse_dtype (dtype , zarr_format = zarr_format )
584588 shape = parse_shapelike (shape )
585589
586590 if chunks is not None and chunk_shape is not None :
587591 raise ValueError ("Only one of chunk_shape or chunks can be provided." )
588592
589593 if chunks :
590- _chunks = normalize_chunks (chunks , shape , dtype_parsed .itemsize )
594+ _chunks = normalize_chunks (chunks , shape , dtype_parsed .unwrap (). itemsize )
591595 else :
592- _chunks = normalize_chunks (chunk_shape , shape , dtype_parsed .itemsize )
596+ _chunks = normalize_chunks (chunk_shape , shape , dtype_parsed .unwrap (). itemsize )
593597 config_parsed = parse_array_config (config )
594598
595599 result : AsyncArray [ArrayV3Metadata ] | AsyncArray [ArrayV2Metadata ]
@@ -666,7 +670,7 @@ async def _create(
666670 @staticmethod
667671 def _create_metadata_v3 (
668672 shape : ShapeLike ,
669- dtype : np . dtype [ Any ],
673+ dtype : DTypeWrapper [ Any , Any ],
670674 chunk_shape : ChunkCoords ,
671675 fill_value : Any | None = None ,
672676 chunk_key_encoding : ChunkKeyEncodingLike | None = None ,
@@ -694,19 +698,16 @@ def _create_metadata_v3(
694698 stacklevel = 2 ,
695699 )
696700
697- # resolve the numpy dtype into zarr v3 datatype
698- zarr_data_type = get_data_type_from_numpy (dtype )
699-
700701 if fill_value is None :
701702 # v3 spec will not allow a null fill value
702- fill_value_parsed = zarr_data_type .default_value
703+ fill_value_parsed = dtype .default_value
703704 else :
704705 fill_value_parsed = fill_value
705706
706707 chunk_grid_parsed = RegularChunkGrid (chunk_shape = chunk_shape )
707708 return ArrayV3Metadata (
708709 shape = shape ,
709- data_type = zarr_data_type ,
710+ data_type = dtype ,
710711 chunk_grid = chunk_grid_parsed ,
711712 chunk_key_encoding = chunk_key_encoding_parsed ,
712713 fill_value = fill_value_parsed ,
@@ -769,7 +770,7 @@ async def _create_v3(
769770 @staticmethod
770771 def _create_metadata_v2 (
771772 shape : ChunkCoords ,
772- dtype : np . dtype [ Any ],
773+ dtype : DTypeWrapper [ Any , Any ],
773774 chunks : ChunkCoords ,
774775 order : MemoryOrder ,
775776 dimension_separator : Literal ["." , "/" ] | None = None ,
@@ -781,10 +782,8 @@ def _create_metadata_v2(
781782 if dimension_separator is None :
782783 dimension_separator = "."
783784
784- dtype = parse_dtype (dtype , zarr_format = 2 )
785-
786785 # inject VLenUTF8 for str dtype if not already present
787- if np . issubdtype (dtype , np . str_ ):
786+ if isinstance (dtype , VariableLengthString ):
788787 filters = filters or []
789788 from numcodecs .vlen import VLenUTF8
790789
@@ -793,7 +792,7 @@ def _create_metadata_v2(
793792
794793 return ArrayV2Metadata (
795794 shape = shape ,
796- dtype = np . dtype ( dtype ) ,
795+ dtype = dtype ,
797796 chunks = chunks ,
798797 order = order ,
799798 dimension_separator = dimension_separator ,
@@ -2046,7 +2045,7 @@ def dtype(self) -> np.dtype[Any]:
20462045 np.dtype
20472046 The NumPy data type.
20482047 """
2049- return self ._async_array .dtype
2048+ return self ._async_array .dtype . unwrap ()
20502049
20512050 @property
20522051 def attrs (self ) -> Attributes :
@@ -3919,7 +3918,7 @@ async def init_array(
39193918
39203919 from zarr .codecs .sharding import ShardingCodec , ShardingCodecIndexLocation
39213920
3922- dtype_parsed = parse_dtype (dtype , zarr_format = zarr_format )
3921+ dtype_wrapped = parse_dtype (dtype , zarr_format = zarr_format )
39233922 shape_parsed = parse_shapelike (shape )
39243923 chunk_key_encoding_parsed = _parse_chunk_key_encoding (
39253924 chunk_key_encoding , zarr_format = zarr_format
@@ -3934,7 +3933,10 @@ async def init_array(
39343933 await ensure_no_existing_node (store_path , zarr_format = zarr_format )
39353934
39363935 shard_shape_parsed , chunk_shape_parsed = _auto_partition (
3937- array_shape = shape_parsed , shard_shape = shards , chunk_shape = chunks , dtype = dtype_parsed
3936+ array_shape = shape_parsed ,
3937+ shard_shape = shards ,
3938+ chunk_shape = chunks ,
3939+ item_size = dtype_wrapped .unwrap ().itemsize ,
39383940 )
39393941 chunks_out : tuple [int , ...]
39403942 meta : ArrayV2Metadata | ArrayV3Metadata
@@ -3950,9 +3952,8 @@ async def init_array(
39503952 raise ValueError ("Zarr format 2 arrays do not support `serializer`." )
39513953
39523954 filters_parsed , compressor_parsed = _parse_chunk_encoding_v2 (
3953- compressor = compressors , filters = filters , dtype = np . dtype ( dtype )
3955+ compressor = compressors , filters = filters , dtype = dtype_wrapped
39543956 )
3955-
39563957 if dimension_names is not None :
39573958 raise ValueError ("Zarr format 2 arrays do not support dimension names." )
39583959 if order is None :
@@ -3962,7 +3963,7 @@ async def init_array(
39623963
39633964 meta = AsyncArray ._create_metadata_v2 (
39643965 shape = shape_parsed ,
3965- dtype = dtype_parsed ,
3966+ dtype = dtype_wrapped ,
39663967 chunks = chunk_shape_parsed ,
39673968 dimension_separator = chunk_key_encoding_parsed .separator ,
39683969 fill_value = fill_value ,
@@ -3976,7 +3977,7 @@ async def init_array(
39763977 compressors = compressors ,
39773978 filters = filters ,
39783979 serializer = serializer ,
3979- dtype = dtype_parsed ,
3980+ dtype = dtype_wrapped ,
39803981 )
39813982 sub_codecs = cast (tuple [Codec , ...], (* array_array , array_bytes , * bytes_bytes ))
39823983 codecs_out : tuple [Codec , ...]
@@ -3991,7 +3992,7 @@ async def init_array(
39913992 )
39923993 sharding_codec .validate (
39933994 shape = chunk_shape_parsed ,
3994- dtype = dtype_parsed ,
3995+ dtype = dtype_wrapped ,
39953996 chunk_grid = RegularChunkGrid (chunk_shape = shard_shape_parsed ),
39963997 )
39973998 codecs_out = (sharding_codec ,)
@@ -4002,7 +4003,7 @@ async def init_array(
40024003
40034004 meta = AsyncArray ._create_metadata_v3 (
40044005 shape = shape_parsed ,
4005- dtype = dtype_parsed ,
4006+ dtype = dtype_wrapped ,
40064007 fill_value = fill_value ,
40074008 chunk_shape = chunks_out ,
40084009 chunk_key_encoding = chunk_key_encoding_parsed ,
@@ -4210,12 +4211,11 @@ def _parse_chunk_key_encoding(
42104211
42114212
42124213def _get_default_chunk_encoding_v3 (
4213- np_dtype : np . dtype [ Any ],
4214+ dtype : DTypeWrapper [ Any , Any ],
42144215) -> tuple [tuple [ArrayArrayCodec , ...], ArrayBytesCodec , tuple [BytesBytesCodec , ...]]:
42154216 """
42164217 Get the default ArrayArrayCodecs, ArrayBytesCodec, and BytesBytesCodec for a given dtype.
42174218 """
4218- dtype = get_data_type_from_numpy (np_dtype )
42194219
42204220 default_filters = zarr_config .get ("array.v3_default_filters" ).get (dtype .kind )
42214221 default_serializer = zarr_config .get ("array.v3_default_serializer" ).get (dtype .kind )
@@ -4229,14 +4229,14 @@ def _get_default_chunk_encoding_v3(
42294229
42304230
42314231def _get_default_chunk_encoding_v2 (
4232- np_dtype : np . dtype [ Any ],
4232+ dtype : DTypeWrapper [ Any , Any ],
42334233) -> tuple [tuple [numcodecs .abc .Codec , ...] | None , numcodecs .abc .Codec | None ]:
42344234 """
42354235 Get the default chunk encoding for Zarr format 2 arrays, given a dtype
42364236 """
42374237
4238- compressor_dict = _default_compressor (np_dtype )
4239- filter_dicts = _default_filters (np_dtype )
4238+ compressor_dict = _default_compressor (dtype )
4239+ filter_dicts = _default_filters (dtype )
42404240
42414241 compressor = None
42424242 if compressor_dict is not None :
@@ -4253,13 +4253,12 @@ def _parse_chunk_encoding_v2(
42534253 * ,
42544254 compressor : CompressorsLike ,
42554255 filters : FiltersLike ,
4256- dtype : np . dtype [ Any ],
4256+ dtype : DTypeWrapper [ Any , Any ],
42574257) -> tuple [tuple [numcodecs .abc .Codec , ...] | None , numcodecs .abc .Codec | None ]:
42584258 """
42594259 Generate chunk encoding classes for Zarr format 2 arrays with optional defaults.
42604260 """
42614261 default_filters , default_compressor = _get_default_chunk_encoding_v2 (dtype )
4262-
42634262 _filters : tuple [numcodecs .abc .Codec , ...] | None
42644263 _compressor : numcodecs .abc .Codec | None
42654264
0 commit comments