1616if TYPE_CHECKING :
1717 from zarr .core .common import ZarrFormat
1818
19+ from collections .abc import Mapping
20+
1921import numpy as np
2022import numpy .typing as npt
2123
2224from zarr .core .common import JSON
2325from zarr .core .dtype .npy .string import (
24- _NUMPY_SUPPORTS_VLEN_STRING ,
2526 FixedLengthASCII ,
2627 FixedLengthUTF32 ,
2728 VariableLengthString ,
102103)
103104
104105# This type models inputs that can be coerced to a ZDType
105- ZDTypeLike : TypeAlias = npt .DTypeLike | ZDType [TBaseDType , TBaseScalar ] | dict [str , JSON ] | str
106+ ZDTypeLike : TypeAlias = npt .DTypeLike | ZDType [TBaseDType , TBaseScalar ] | Mapping [str , JSON ] | str
106107
107108for dtype in ANY_DTYPE :
108109 # mypy does not know that all the elements of ANY_DTYPE are subclasses of ZDType
@@ -114,42 +115,41 @@ def get_data_type_from_native_dtype(dtype: npt.DTypeLike) -> ZDType[TBaseDType,
114115 """
115116 Get a data type wrapper (an instance of ``ZDType``) from a native data type, e.g. a numpy dtype.
116117 """
117- data_type_registry .lazy_load ()
118118 if not isinstance (dtype , np .dtype ):
119- # TODO: This check has a lot of assumptions in it! Chiefly, we assume that the
120- # numpy object dtype contains variable length strings, which is not in general true
121- # When / if zarr python supports ragged arrays, for example, this check will fail!
122- if dtype in (str , "str" , "|T16" , "O" , "|O" , np .dtypes .ObjectDType ()):
123- if _NUMPY_SUPPORTS_VLEN_STRING :
124- na_dtype = np .dtype ("T" )
125- else :
126- na_dtype = np .dtype ("O" )
127- elif isinstance (dtype , list ):
119+ na_dtype : np .dtype [np .generic ]
120+ if isinstance (dtype , list ):
128121 # this is a valid _VoidDTypeLike check
129122 na_dtype = np .dtype ([tuple (d ) for d in dtype ])
130123 else :
131124 na_dtype = np .dtype (dtype )
132125 else :
133126 na_dtype = dtype
134- return data_type_registry .match_dtype (na_dtype )
127+ return data_type_registry .match_dtype (dtype = na_dtype )
128+
129+
130+ def get_data_type_from_json_v3 (
131+ dtype_spec : JSON ,
132+ ) -> ZDType [TBaseDType , TBaseScalar ]:
133+ return data_type_registry .match_json_v3 (dtype_spec )
135134
136135
137- def get_data_type_from_json (
138- dtype : JSON , zarr_format : ZarrFormat
136+ def get_data_type_from_json_v2 (
137+ dtype_spec : JSON , * , object_codec_id : str | None = None
139138) -> ZDType [TBaseDType , TBaseScalar ]:
140- return data_type_registry .match_json ( dtype , zarr_format = zarr_format )
139+ return data_type_registry .match_json_v2 ( dtype_spec , object_codec_id = object_codec_id )
141140
142141
143- def parse_data_type (dtype : ZDTypeLike , zarr_format : ZarrFormat ) -> ZDType [TBaseDType , TBaseScalar ]:
142+ def parse_data_type (
143+ dtype_spec : ZDTypeLike , * , zarr_format : ZarrFormat , object_codec_id : str | None = None
144+ ) -> ZDType [TBaseDType , TBaseScalar ]:
144145 """
145146 Interpret the input as a ZDType instance.
146147 """
147- if isinstance (dtype , ZDType ):
148- return dtype
149- elif isinstance (dtype , dict ):
150- # This branch assumes that the data type has been specified in the JSON form
151- # but it's also possible for numpy data types to be specified as dictionaries, which will
152- # cause an error in the `get_data_type_from_json`, but that's ok for now
153- return get_data_type_from_json (dtype , zarr_format = zarr_format ) # type: ignore[arg-type]
154- else :
155- return get_data_type_from_native_dtype (dtype )
148+ if isinstance (dtype_spec , ZDType ):
149+ return dtype_spec
150+ # dict and zarr_format 3 means that we have a JSON object representation of the dtype
151+ if zarr_format == 3 and isinstance (dtype_spec , Mapping ):
152+ return get_data_type_from_json_v3 (dtype_spec ) # type: ignore[arg-type]
153+ # otherwise, we have either a numpy dtype string, or a zarr v3 dtype string, and in either case
154+ # we can create a numpy dtype from it, and do the dtype inference from that
155+ return get_data_type_from_native_dtype (dtype_spec ) # type: ignore[arg-type]
0 commit comments