Skip to content

Commit 48a492e

Browse files
committed
add warnings when using non-spec features with v3
1 parent 01b73a7 commit 48a492e

File tree

5 files changed

+43
-5
lines changed

5 files changed

+43
-5
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,7 @@ filterwarnings = [
376376
"ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning",
377377
"ignore:Creating a zarr.buffer.gpu.*:UserWarning",
378378
"ignore:Duplicate name:UserWarning", # from ZipFile
379+
"ignore:.*is currently not part in the Zarr version 3 specification.*:UserWarning",
379380
]
380381
markers = [
381382
"gpu: mark a test as requiring CuPy and GPU"

src/zarr/api/asynchronous.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,14 @@ async def consolidate_metadata(
195195
v = dataclasses.replace(v, consolidated_metadata=ConsolidatedMetadata(metadata={}))
196196
members_metadata[k] = v
197197

198+
if any(m.zarr_format == 3 for m in members_metadata.values()):
199+
warnings.warn(
200+
"Consolidated metadata is currently not part in the Zarr version 3 specification and "
201+
"may not be supported by other zarr implementations.",
202+
category=UserWarning,
203+
stacklevel=1,
204+
)
205+
198206
ConsolidatedMetadata._flat_to_nested(members_metadata)
199207

200208
consolidated_metadata = ConsolidatedMetadata(metadata=members_metadata)
@@ -203,6 +211,7 @@ async def consolidate_metadata(
203211
group,
204212
metadata=metadata,
205213
)
214+
206215
await group._save_metadata()
207216
return group
208217

src/zarr/codecs/vlen_utf8.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from dataclasses import dataclass
44
from typing import TYPE_CHECKING
5+
from warnings import warn
56

67
import numpy as np
78
from numcodecs.vlen import VLenBytes, VLenUTF8
@@ -25,6 +26,15 @@
2526

2627
@dataclass(frozen=True)
2728
class VLenUTF8Codec(ArrayBytesCodec):
29+
def __init__(self) -> None:
30+
warn(
31+
"The codec `vlen-utf8` is currently not part in the Zarr version 3 specification and "
32+
"may not be supported by other zarr implementations.",
33+
category=UserWarning,
34+
stacklevel=2,
35+
)
36+
super().__init__()
37+
2838
@classmethod
2939
def from_dict(cls, data: dict[str, JSON]) -> Self:
3040
_, configuration_parsed = parse_named_configuration(
@@ -71,6 +81,15 @@ def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -
7181

7282
@dataclass(frozen=True)
7383
class VLenBytesCodec(ArrayBytesCodec):
84+
def __init__(self) -> None:
85+
warn(
86+
"The codec `vlen-bytes` is currently not part in the Zarr version 3 specification and "
87+
"may not be supported by other zarr implementations.",
88+
category=UserWarning,
89+
stacklevel=2,
90+
)
91+
super().__init__()
92+
7493
@classmethod
7594
def from_dict(cls, data: dict[str, JSON]) -> Self:
7695
_, configuration_parsed = parse_named_configuration(

src/zarr/core/array.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from itertools import starmap
77
from logging import getLogger
88
from typing import TYPE_CHECKING, Any, Generic, Literal, cast, overload
9+
from warnings import warn
910

1011
import numpy as np
1112
import numpy.typing as npt
@@ -580,6 +581,14 @@ async def _create_v3(
580581
else DefaultChunkKeyEncoding(separator=chunk_key_encoding[1])
581582
)
582583

584+
if dtype.kind in "UTS":
585+
warn(
586+
f"The dtype `{dtype}` is currently not part in the Zarr version 3 specification and "
587+
"may not be supported by other zarr implementations.",
588+
category=UserWarning,
589+
stacklevel=2,
590+
)
591+
583592
metadata = ArrayV3Metadata(
584593
shape=shape,
585594
data_type=dtype,

src/zarr/core/metadata/v3.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,14 +95,14 @@ def validate_codecs(codecs: tuple[Codec, ...], dtype: DataType) -> None:
9595

9696
# we need to have special codecs if we are decoding vlen strings or bytestrings
9797
# TODO: use codec ID instead of class name
98-
codec_id = abc.__class__.__name__
99-
if dtype == DataType.string and not codec_id == "VLenUTF8Codec":
98+
codec_class_name = abc.__class__.__name__
99+
if dtype == DataType.string and not codec_class_name == "VLenUTF8Codec":
100100
raise ValueError(
101-
f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_id}`."
101+
f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_class_name}`."
102102
)
103-
if dtype == DataType.bytes and not codec_id == "VLenBytesCodec":
103+
if dtype == DataType.bytes and not codec_class_name == "VLenBytesCodec":
104104
raise ValueError(
105-
f"For bytes dtype, ArrayBytesCodec must be `VLenBytesCodec`, got `{codec_id}`."
105+
f"For bytes dtype, ArrayBytesCodec must be `VLenBytesCodec`, got `{codec_class_name}`."
106106
)
107107

108108

0 commit comments

Comments
 (0)