add warnings when using non-spec features with v3

normanrz · normanrz · commit 48a492e8e238 · 2024-12-13T15:59:38.000+01:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -376,6 +376,7 @@ filterwarnings = [
     "ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning",
     "ignore:Creating a zarr.buffer.gpu.*:UserWarning",
     "ignore:Duplicate name:UserWarning",  # from ZipFile
+    "ignore:.*is currently not part in the Zarr version 3 specification.*:UserWarning",
 ]
 markers = [
     "gpu: mark a test as requiring CuPy and GPU"
diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
@@ -195,6 +195,14 @@ async def consolidate_metadata(
             v = dataclasses.replace(v, consolidated_metadata=ConsolidatedMetadata(metadata={}))
             members_metadata[k] = v
 
+    if any(m.zarr_format == 3 for m in members_metadata.values()):
+        warnings.warn(
+            "Consolidated metadata is currently not part in the Zarr version 3 specification and "
+            "may not be supported by other zarr implementations.",
+            category=UserWarning,
+            stacklevel=1,
+        )
+
     ConsolidatedMetadata._flat_to_nested(members_metadata)
 
     consolidated_metadata = ConsolidatedMetadata(metadata=members_metadata)
@@ -203,6 +211,7 @@ async def consolidate_metadata(
         group,
         metadata=metadata,
     )
+
     await group._save_metadata()
     return group
 
diff --git a/src/zarr/codecs/vlen_utf8.py b/src/zarr/codecs/vlen_utf8.py
@@ -2,6 +2,7 @@
 
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
+from warnings import warn
 
 import numpy as np
 from numcodecs.vlen import VLenBytes, VLenUTF8
@@ -25,6 +26,15 @@
 
 @dataclass(frozen=True)
 class VLenUTF8Codec(ArrayBytesCodec):
+    def __init__(self) -> None:
+        warn(
+            "The codec `vlen-utf8` is currently not part in the Zarr version 3 specification and "
+            "may not be supported by other zarr implementations.",
+            category=UserWarning,
+            stacklevel=2,
+        )
+        super().__init__()
+
     @classmethod
     def from_dict(cls, data: dict[str, JSON]) -> Self:
         _, configuration_parsed = parse_named_configuration(
@@ -71,6 +81,15 @@ def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -
 
 @dataclass(frozen=True)
 class VLenBytesCodec(ArrayBytesCodec):
+    def __init__(self) -> None:
+        warn(
+            "The codec `vlen-bytes` is currently not part in the Zarr version 3 specification and "
+            "may not be supported by other zarr implementations.",
+            category=UserWarning,
+            stacklevel=2,
+        )
+        super().__init__()
+
     @classmethod
     def from_dict(cls, data: dict[str, JSON]) -> Self:
         _, configuration_parsed = parse_named_configuration(
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
@@ -6,6 +6,7 @@
 from itertools import starmap
 from logging import getLogger
 from typing import TYPE_CHECKING, Any, Generic, Literal, cast, overload
+from warnings import warn
 
 import numpy as np
 import numpy.typing as npt
@@ -580,6 +581,14 @@ async def _create_v3(
                 else DefaultChunkKeyEncoding(separator=chunk_key_encoding[1])
             )
 
+        if dtype.kind in "UTS":
+            warn(
+                f"The dtype `{dtype}` is currently not part in the Zarr version 3 specification and "
+                "may not be supported by other zarr implementations.",
+                category=UserWarning,
+                stacklevel=2,
+            )
+
         metadata = ArrayV3Metadata(
             shape=shape,
             data_type=dtype,
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
@@ -95,14 +95,14 @@ def validate_codecs(codecs: tuple[Codec, ...], dtype: DataType) -> None:
 
     # we need to have special codecs if we are decoding vlen strings or bytestrings
     # TODO: use codec ID instead of class name
-    codec_id = abc.__class__.__name__
-    if dtype == DataType.string and not codec_id == "VLenUTF8Codec":
+    codec_class_name = abc.__class__.__name__
+    if dtype == DataType.string and not codec_class_name == "VLenUTF8Codec":
         raise ValueError(
-            f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_id}`."
+            f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_class_name}`."
         )
-    if dtype == DataType.bytes and not codec_id == "VLenBytesCodec":
+    if dtype == DataType.bytes and not codec_class_name == "VLenBytesCodec":
         raise ValueError(
-            f"For bytes dtype, ArrayBytesCodec must be `VLenBytesCodec`, got `{codec_id}`."
+            f"For bytes dtype, ArrayBytesCodec must be `VLenBytesCodec`, got `{codec_class_name}`."
         )
 
 

Original file line number	Diff line number	Diff line change
`@@ -376,6 +376,7 @@ filterwarnings = [`
`376`	`376`	`"ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning",`
`377`	`377`	`"ignore:Creating a zarr.buffer.gpu.*:UserWarning",`
`378`	`378`	`"ignore:Duplicate name:UserWarning", # from ZipFile`
	`379`	`+ "ignore:.is currently not part in the Zarr version 3 specification.:UserWarning",`
`379`	`380`	`]`
`380`	`381`	`markers = [`
`381`	`382`	`"gpu: mark a test as requiring CuPy and GPU"`