|
2 | 2 | import re |
3 | 3 | from collections.abc import Sequence |
4 | 4 | from dataclasses import dataclass |
5 | | -from typing import Any, ClassVar, Self, TypeGuard, cast |
| 5 | +from typing import Any, Self, TypeGuard, cast |
6 | 6 |
|
7 | 7 | import numpy as np |
8 | 8 |
|
9 | 9 | from zarr.core.common import JSON, ZarrFormat |
10 | 10 | from zarr.core.dtype.common import ( |
11 | 11 | DataTypeValidationError, |
12 | | - HasEndianness, |
13 | 12 | HasItemSize, |
14 | 13 | HasLength, |
15 | 14 | v3_unstable_dtype_warning, |
16 | 15 | ) |
17 | 16 | from zarr.core.dtype.npy.common import ( |
18 | | - EndiannessNumpy, |
19 | 17 | bytes_from_json, |
20 | 18 | bytes_to_json, |
21 | 19 | check_json_str, |
22 | | - endianness_from_numpy_str, |
23 | | - endianness_to_numpy_str, |
24 | 20 | ) |
25 | 21 | from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType |
26 | 22 |
|
27 | 23 |
|
28 | | -@dataclass(frozen=True, kw_only=True) |
29 | | -class FixedLengthASCII(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength, HasItemSize): |
30 | | - dtype_cls = np.dtypes.BytesDType |
31 | | - _zarr_v3_name = "numpy.fixed_length_ascii" |
32 | | - |
33 | | - @classmethod |
34 | | - def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self: |
35 | | - return cls(length=dtype.itemsize) |
36 | | - |
37 | | - def to_dtype(self) -> np.dtypes.BytesDType[int]: |
38 | | - return self.dtype_cls(self.length) |
39 | | - |
40 | | - @classmethod |
41 | | - def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]: |
42 | | - """ |
43 | | - Check that the input is a valid JSON representation of a numpy S dtype. |
44 | | - """ |
45 | | - if zarr_format == 2: |
46 | | - # match |S1, |S2, etc |
47 | | - return isinstance(data, str) and re.match(r"^\|S\d+$", data) is not None |
48 | | - elif zarr_format == 3: |
49 | | - return ( |
50 | | - isinstance(data, dict) |
51 | | - and set(data.keys()) == {"name", "configuration"} |
52 | | - and data["name"] == cls._zarr_v3_name |
53 | | - and isinstance(data["configuration"], dict) |
54 | | - and "length_bytes" in data["configuration"] |
55 | | - ) |
56 | | - raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover |
57 | | - |
58 | | - def to_json(self, zarr_format: ZarrFormat) -> JSON: |
59 | | - if zarr_format == 2: |
60 | | - return self.to_dtype().str |
61 | | - elif zarr_format == 3: |
62 | | - return { |
63 | | - "name": self._zarr_v3_name, |
64 | | - "configuration": {"length_bytes": self.length}, |
65 | | - } |
66 | | - raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover |
67 | | - |
68 | | - @classmethod |
69 | | - def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self: |
70 | | - if zarr_format == 2: |
71 | | - return cls.from_dtype(np.dtype(data)) # type: ignore[arg-type] |
72 | | - elif zarr_format == 3: |
73 | | - return cls(length=data["configuration"]["length_bytes"]) # type: ignore[arg-type, index, call-overload] |
74 | | - raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover |
75 | | - |
76 | | - def default_value(self) -> np.bytes_: |
77 | | - return np.bytes_(b"") |
78 | | - |
79 | | - def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str: |
80 | | - return base64.standard_b64encode(data).decode("ascii") # type: ignore[arg-type] |
81 | | - |
82 | | - def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_: |
83 | | - if check_json_str(data): |
84 | | - return self.to_dtype().type(base64.standard_b64decode(data.encode("ascii"))) |
85 | | - raise TypeError(f"Invalid type: {data}. Expected a string.") # pragma: no cover |
86 | | - |
87 | | - def check_value(self, data: object) -> bool: |
88 | | - # this is generous for backwards compatibility |
89 | | - return isinstance(data, np.bytes_ | str | bytes | int) |
90 | | - |
91 | | - def _cast_value_unsafe(self, value: object) -> np.bytes_: |
92 | | - return self.to_dtype().type(value) |
93 | | - |
94 | | - @property |
95 | | - def item_size(self) -> int: |
96 | | - return self.length |
97 | | - |
98 | | - |
99 | 24 | @dataclass(frozen=True, kw_only=True) |
100 | 25 | class FixedLengthBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength, HasItemSize): |
101 | 26 | # np.dtypes.VoidDType is specified in an odd way in numpy |
@@ -190,87 +115,6 @@ def item_size(self) -> int: |
190 | 115 | return self.length |
191 | 116 |
|
192 | 117 |
|
193 | | -@dataclass(frozen=True, kw_only=True) |
194 | | -class FixedLengthUTF32( |
195 | | - ZDType[np.dtypes.StrDType[int], np.str_], HasEndianness, HasLength, HasItemSize |
196 | | -): |
197 | | - dtype_cls = np.dtypes.StrDType |
198 | | - _zarr_v3_name = "numpy.fixed_length_utf32" |
199 | | - code_point_bytes: ClassVar[int] = 4 # utf32 is 4 bytes per code point |
200 | | - |
201 | | - @classmethod |
202 | | - def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self: |
203 | | - byte_order = cast("EndiannessNumpy", dtype.byteorder) |
204 | | - return cls( |
205 | | - length=dtype.itemsize // (cls.code_point_bytes), |
206 | | - endianness=endianness_from_numpy_str(byte_order), |
207 | | - ) |
208 | | - |
209 | | - def to_dtype(self) -> np.dtypes.StrDType[int]: |
210 | | - byte_order = endianness_to_numpy_str(self.endianness) |
211 | | - return self.dtype_cls(self.length).newbyteorder(byte_order) |
212 | | - |
213 | | - @classmethod |
214 | | - def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]: |
215 | | - """ |
216 | | - Check that the input is a valid JSON representation of a numpy S dtype. |
217 | | - """ |
218 | | - if zarr_format == 2: |
219 | | - # match >U1, <U2, etc |
220 | | - return isinstance(data, str) and re.match(r"^[><]U\d+$", data) is not None |
221 | | - elif zarr_format == 3: |
222 | | - return ( |
223 | | - isinstance(data, dict) |
224 | | - and set(data.keys()) == {"name", "configuration"} |
225 | | - and data["name"] == cls._zarr_v3_name |
226 | | - and "configuration" in data |
227 | | - and isinstance(data["configuration"], dict) |
228 | | - and set(data["configuration"].keys()) == {"length_bytes"} |
229 | | - and isinstance(data["configuration"]["length_bytes"], int) |
230 | | - ) |
231 | | - raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover |
232 | | - |
233 | | - def to_json(self, zarr_format: ZarrFormat) -> JSON: |
234 | | - if zarr_format == 2: |
235 | | - return self.to_dtype().str |
236 | | - elif zarr_format == 3: |
237 | | - return { |
238 | | - "name": self._zarr_v3_name, |
239 | | - "configuration": {"length_bytes": self.length * self.code_point_bytes}, |
240 | | - } |
241 | | - raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover |
242 | | - |
243 | | - @classmethod |
244 | | - def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self: |
245 | | - if zarr_format == 2: |
246 | | - return cls.from_dtype(np.dtype(data)) # type: ignore[arg-type] |
247 | | - elif zarr_format == 3: |
248 | | - return cls(length=data["configuration"]["length_bytes"] // cls.code_point_bytes) # type: ignore[arg-type, index, call-overload, operator] |
249 | | - raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover |
250 | | - |
251 | | - def default_value(self) -> np.str_: |
252 | | - return np.str_("") |
253 | | - |
254 | | - def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str: |
255 | | - return str(data) |
256 | | - |
257 | | - def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_: |
258 | | - if check_json_str(data): |
259 | | - return self.to_dtype().type(data) |
260 | | - raise TypeError(f"Invalid type: {data}. Expected a string.") # pragma: no cover |
261 | | - |
262 | | - def check_value(self, data: object) -> bool: |
263 | | - # this is generous for backwards compatibility |
264 | | - return isinstance(data, str | np.str_ | bytes | int) |
265 | | - |
266 | | - def _cast_value_unsafe(self, data: object) -> np.str_: |
267 | | - return self.to_dtype().type(data) |
268 | | - |
269 | | - @property |
270 | | - def item_size(self) -> int: |
271 | | - return self.length * self.code_point_bytes |
272 | | - |
273 | | - |
274 | 118 | @dataclass(frozen=True, kw_only=True) |
275 | 119 | class Structured(ZDType[np.dtypes.VoidDType[int], np.void], HasItemSize): |
276 | 120 | dtype_cls = np.dtypes.VoidDType # type: ignore[assignment] |
|
0 commit comments