zarr-developers
diff --git a/‎docs/user-guide/arrays.rst‎
Lines changed: 2 additions & 2 deletions b/‎docs/user-guide/arrays.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/user-guide/data_types.rst‎
Lines changed: 5 additions & 5 deletions b/‎docs/user-guide/data_types.rst‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/zarr/codecs/_v2.py‎
Lines changed: 3 additions & 3 deletions b/‎src/zarr/codecs/_v2.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/zarr/codecs/bytes.py‎
Lines changed: 1 addition & 1 deletion b/‎src/zarr/codecs/bytes.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/zarr/codecs/sharding.py‎
Lines changed: 2 additions & 2 deletions b/‎src/zarr/codecs/sharding.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/zarr/codecs/vlen_utf8.py‎
Lines changed: 1 addition & 1 deletion b/‎src/zarr/codecs/vlen_utf8.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/zarr/core/array.py‎
Lines changed: 3 additions & 3 deletions b/‎src/zarr/core/array.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/zarr/core/codec_pipeline.py‎
Lines changed: 5 additions & 3 deletions b/‎src/zarr/core/codec_pipeline.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎src/zarr/core/common.py‎
Lines changed: 10 additions & 0 deletions b/‎src/zarr/core/common.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎src/zarr/core/dtype/__init__.py‎
Lines changed: 26 additions & 26 deletions b/‎src/zarr/core/dtype/__init__.py‎
Lines changed: 26 additions & 26 deletions
@@ -211,8 +211,8 @@ prints additional diagnostics, e.g.::
    Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
    Compressors        : (BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0),)
    No. bytes          : 400000000 (381.5M)
-   No. bytes stored   : 3558573
-   Storage ratio      : 112.4
+   No. bytes stored   : 9696520
+   Storage ratio      : 41.3
    Chunks Initialized : 100
 
 .. note::
 
@@ -128,20 +128,20 @@ Create a ``ZDType`` from a native data type:
 
   >>> from zarr.core.dtype import Int8
   >>> import numpy as np
-  >>> int8 = Int8.from_dtype(np.dtype('int8'))
+  >>> int8 = Int8.from_native_dtype(np.dtype('int8'))
 
 Convert back to native data type:
 
 .. code-block:: python
 
-  >>> native_dtype = int8.to_dtype()
+  >>> native_dtype = int8.to_native_dtype()
   >>> assert native_dtype == np.dtype('int8')
 
 Get the default scalar value for the data type:
 
 .. code-block:: python
 
-  >>> default_value = int8.default_value()
+  >>> default_value = int8.default_scalar()
   >>> assert default_value == np.int8(0)
 
 
@@ -160,13 +160,13 @@ Serialize a scalar value to JSON:
 
 .. code-block:: python
 
-  >>> json_value = int8.to_json_value(42, zarr_format=3)
+  >>> json_value = int8.to_json_scalar(42, zarr_format=3)
   >>> json_value
   42
 
 Deserialize a scalar value from JSON:
 
 .. code-block:: python
 
-  >>> scalar_value = int8.from_json_value(42, zarr_format=3)
+  >>> scalar_value = int8.from_json_scalar(42, zarr_format=3)
   >>> assert scalar_value == np.int8(42)
@@ -48,15 +48,15 @@ async def _decode_single(
         # segfaults and other bad things happening
         if chunk_spec.dtype.dtype_cls is not np.dtypes.ObjectDType:
             try:
-                chunk = chunk.view(chunk_spec.dtype.to_dtype())
+                chunk = chunk.view(chunk_spec.dtype.to_native_dtype())
             except TypeError:
                 # this will happen if the dtype of the chunk
                 # does not match the dtype of the array spec i.g. if
                 # the dtype of the chunk_spec is a string dtype, but the chunk
                 # is an object array. In this case, we need to convert the object
                 # array to the correct dtype.
 
-                chunk = np.array(chunk).astype(chunk_spec.dtype.to_dtype())
+                chunk = np.array(chunk).astype(chunk_spec.dtype.to_native_dtype())
 
         elif chunk.dtype != object:
             # If we end up here, someone must have hacked around with the filters.
@@ -80,7 +80,7 @@ async def _encode_single(
         chunk = chunk_array.as_ndarray_like()
 
         # ensure contiguous and correct order
-        chunk = chunk.astype(chunk_spec.dtype.to_dtype(), order=chunk_spec.order, copy=False)
+        chunk = chunk.astype(chunk_spec.dtype.to_native_dtype(), order=chunk_spec.order, copy=False)
 
         # apply filters
         if self.filters:
 
@@ -79,7 +79,7 @@ async def _decode_single(
             "Endianness | None", self.endian.value if self.endian is not None else None
         )
         new_byte_order = endianness_to_numpy_str(endian_str)
-        dtype = chunk_spec.dtype.to_dtype().newbyteorder(new_byte_order)
+        dtype = chunk_spec.dtype.to_native_dtype().newbyteorder(new_byte_order)
 
         as_array_like = chunk_bytes.as_array_like()
         if isinstance(as_array_like, NDArrayLike):
 
@@ -452,7 +452,7 @@ async def _decode_single(
         # setup output array
         out = chunk_spec.prototype.nd_buffer.create(
             shape=shard_shape,
-            dtype=shard_spec.dtype.to_dtype(),
+            dtype=shard_spec.dtype.to_native_dtype(),
             order=shard_spec.order,
             fill_value=0,
         )
@@ -499,7 +499,7 @@ async def _decode_partial_single(
         # setup output array
         out = shard_spec.prototype.nd_buffer.create(
             shape=indexer.shape,
-            dtype=shard_spec.dtype.to_dtype(),
+            dtype=shard_spec.dtype.to_native_dtype(),
             order=shard_spec.order,
             fill_value=0,
         )
 
@@ -60,7 +60,7 @@ async def _decode_single(
         decoded = _vlen_utf8_codec.decode(raw_bytes)
         assert decoded.dtype == np.object_
         decoded.shape = chunk_spec.shape
-        as_string_dtype = decoded.astype(chunk_spec.dtype.to_dtype(), copy=False)
+        as_string_dtype = decoded.astype(chunk_spec.dtype.to_native_dtype(), copy=False)
         return chunk_spec.prototype.nd_buffer.from_numpy_array(as_string_dtype)
 
     async def _encode_single(
 
@@ -700,7 +700,7 @@ def _create_metadata_v3(
 
         if fill_value is None:
             # v3 spec will not allow a null fill value
-            fill_value_parsed = dtype.default_value()
+            fill_value_parsed = dtype.default_scalar()
         else:
             fill_value_parsed = fill_value
 
@@ -782,7 +782,7 @@ def _create_metadata_v2(
         if dimension_separator is None:
             dimension_separator = "."
         if fill_value is None:
-            fill_value = dtype.default_value()  # type: ignore[assignment]
+            fill_value = dtype.default_scalar()  # type: ignore[assignment]
         return ArrayV2Metadata(
             shape=shape,
             dtype=dtype,
@@ -1056,7 +1056,7 @@ def dtype(self) -> TBaseDType:
         np.dtype
             Data type of the array
         """
-        return self._zdtype.to_dtype()
+        return self._zdtype.to_native_dtype()
 
     @property
     def order(self) -> MemoryOrder:
 
@@ -62,7 +62,7 @@ def fill_value_or_default(chunk_spec: ArraySpec) -> Any:
         # validated when decoding the metadata, but we support reading
         # Zarr V2 data and need to support the case where fill_value
         # is None.
-        return chunk_spec.dtype.default_value()
+        return chunk_spec.dtype.default_scalar()
     else:
         return fill_value
 
@@ -296,7 +296,9 @@ def _merge_chunk_array(
         is_complete_chunk: bool,
         drop_axes: tuple[int, ...],
     ) -> NDBuffer:
-        if chunk_selection == () or is_scalar(value.as_ndarray_like(), chunk_spec.dtype.to_dtype()):
+        if chunk_selection == () or is_scalar(
+            value.as_ndarray_like(), chunk_spec.dtype.to_native_dtype()
+        ):
             chunk_value = value
         else:
             chunk_value = value[out_selection]
@@ -317,7 +319,7 @@ def _merge_chunk_array(
         if existing_chunk_array is None:
             chunk_array = chunk_spec.prototype.nd_buffer.create(
                 shape=chunk_spec.shape,
-                dtype=chunk_spec.dtype.to_dtype(),
+                dtype=chunk_spec.dtype.to_native_dtype(),
                 order=chunk_spec.order,
                 fill_value=fill_value_or_default(chunk_spec),
             )
 
@@ -10,7 +10,9 @@
 from typing import (
     TYPE_CHECKING,
     Any,
+    Generic,
     Literal,
+    TypedDict,
     TypeVar,
     cast,
     overload,
@@ -39,6 +41,14 @@
 AccessModeLiteral = Literal["r", "r+", "a", "w", "w-"]
 DimensionNames = Iterable[str | None] | None
 
+TName = TypeVar("TName", bound=str)
+TConfig = TypeVar("TConfig", bound=Mapping[str, object])
+
+
+class NamedConfig(TypedDict, Generic[TName, TConfig]):
+    name: TName
+    configuration: TConfig
+
 
 def product(tup: ChunkCoords) -> int:
     return functools.reduce(operator.mul, tup, 1)
 
@@ -16,12 +16,13 @@
 if TYPE_CHECKING:
     from zarr.core.common import ZarrFormat
 
+from collections.abc import Mapping
+
 import numpy as np
 import numpy.typing as npt
 
 from zarr.core.common import JSON
 from zarr.core.dtype.npy.string import (
-    _NUMPY_SUPPORTS_VLEN_STRING,
     FixedLengthASCII,
     FixedLengthUTF32,
     VariableLengthString,
@@ -102,7 +103,7 @@
 )
 
 # This type models inputs that can be coerced to a ZDType
-ZDTypeLike: TypeAlias = npt.DTypeLike | ZDType[TBaseDType, TBaseScalar] | dict[str, JSON] | str
+ZDTypeLike: TypeAlias = npt.DTypeLike | ZDType[TBaseDType, TBaseScalar] | Mapping[str, JSON] | str
 
 for dtype in ANY_DTYPE:
     # mypy does not know that all the elements of ANY_DTYPE are subclasses of ZDType
@@ -114,42 +115,41 @@ def get_data_type_from_native_dtype(dtype: npt.DTypeLike) -> ZDType[TBaseDType,
     """
     Get a data type wrapper (an instance of ``ZDType``) from a native data type, e.g. a numpy dtype.
     """
-    data_type_registry.lazy_load()
     if not isinstance(dtype, np.dtype):
-        # TODO: This check has a lot of assumptions in it! Chiefly, we assume that the
-        # numpy object dtype contains variable length strings, which is not in general true
-        # When / if zarr python supports ragged arrays, for example, this check will fail!
-        if dtype in (str, "str", "|T16", "O", "|O", np.dtypes.ObjectDType()):
-            if _NUMPY_SUPPORTS_VLEN_STRING:
-                na_dtype = np.dtype("T")
-            else:
-                na_dtype = np.dtype("O")
-        elif isinstance(dtype, list):
+        na_dtype: np.dtype[np.generic]
+        if isinstance(dtype, list):
             # this is a valid _VoidDTypeLike check
             na_dtype = np.dtype([tuple(d) for d in dtype])
         else:
             na_dtype = np.dtype(dtype)
     else:
         na_dtype = dtype
-    return data_type_registry.match_dtype(na_dtype)
+    return data_type_registry.match_dtype(dtype=na_dtype)
+
+
+def get_data_type_from_json_v3(
+    dtype_spec: JSON,
+) -> ZDType[TBaseDType, TBaseScalar]:
+    return data_type_registry.match_json_v3(dtype_spec)
 
 
-def get_data_type_from_json(
-    dtype: JSON, zarr_format: ZarrFormat
+def get_data_type_from_json_v2(
+    dtype_spec: JSON, *, object_codec_id: str | None = None
 ) -> ZDType[TBaseDType, TBaseScalar]:
-    return data_type_registry.match_json(dtype, zarr_format=zarr_format)
+    return data_type_registry.match_json_v2(dtype_spec, object_codec_id=object_codec_id)
 
 
-def parse_data_type(dtype: ZDTypeLike, zarr_format: ZarrFormat) -> ZDType[TBaseDType, TBaseScalar]:
+def parse_data_type(
+    dtype_spec: ZDTypeLike, *, zarr_format: ZarrFormat, object_codec_id: str | None = None
+) -> ZDType[TBaseDType, TBaseScalar]:
     """
     Interpret the input as a ZDType instance.
     """
-    if isinstance(dtype, ZDType):
-        return dtype
-    elif isinstance(dtype, dict):
-        # This branch assumes that the data type has been specified in the JSON form
-        # but it's also possible for numpy data types to be specified as dictionaries, which will
-        # cause an error in the `get_data_type_from_json`, but that's ok for now
-        return get_data_type_from_json(dtype, zarr_format=zarr_format)  # type: ignore[arg-type]
-    else:
-        return get_data_type_from_native_dtype(dtype)
+    if isinstance(dtype_spec, ZDType):
+        return dtype_spec
+    # dict and zarr_format 3 means that we have a JSON object representation of the dtype
+    if zarr_format == 3 and isinstance(dtype_spec, Mapping):
+        return get_data_type_from_json_v3(dtype_spec)  # type: ignore[arg-type]
+    # otherwise, we have either a numpy dtype string, or a zarr v3 dtype string, and in either case
+    # we can create a numpy dtype from it, and do the dtype inference from that
+    return get_data_type_from_native_dtype(dtype_spec)  # type: ignore[arg-type]
Original file line number	Diff line number	Diff line change
`@@ -79,7 +79,7 @@ async def _decode_single(`
`79`	`79`	`"Endianness \| None", self.endian.value if self.endian is not None else None`
`80`	`80`	`)`
`81`	`81`	`new_byte_order = endianness_to_numpy_str(endian_str)`
`82`		`- dtype = chunk_spec.dtype.to_dtype().newbyteorder(new_byte_order)`
	`82`	`+ dtype = chunk_spec.dtype.to_native_dtype().newbyteorder(new_byte_order)`
`83`	`83`
`84`	`84`	`as_array_like = chunk_bytes.as_array_like()`
`85`	`85`	`if isinstance(as_array_like, NDArrayLike):`