|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | | -import base64 |
4 | 3 | import warnings |
5 | 4 | from collections.abc import Iterable, Sequence |
6 | 5 | from functools import cached_property |
@@ -52,7 +51,7 @@ class ArrayV2Metadata(Metadata): |
52 | 51 | shape: ChunkCoords |
53 | 52 | chunks: ChunkCoords |
54 | 53 | dtype: ZDType[TBaseDType, TBaseScalar] |
55 | | - fill_value: int | float | str | bytes | None = 0 |
| 54 | + fill_value: int | float | str | bytes | None = None |
56 | 55 | order: MemoryOrder = "C" |
57 | 56 | filters: tuple[numcodecs.abc.Codec, ...] | None = None |
58 | 57 | dimension_separator: Literal[".", "/"] = "." |
@@ -85,7 +84,11 @@ def __init__( |
85 | 84 | order_parsed = parse_indexing_order(order) |
86 | 85 | dimension_separator_parsed = parse_separator(dimension_separator) |
87 | 86 | filters_parsed = parse_filters(filters) |
88 | | - fill_value_parsed = parse_fill_value(fill_value, dtype=dtype.to_dtype()) |
| 87 | + fill_value_parsed: TBaseScalar | None |
| 88 | + if fill_value is not None: |
| 89 | + fill_value_parsed = dtype.cast_value(fill_value) |
| 90 | + else: |
| 91 | + fill_value_parsed = fill_value |
89 | 92 | attributes_parsed = parse_attributes(attributes) |
90 | 93 |
|
91 | 94 | object.__setattr__(self, "shape", shape_parsed) |
@@ -134,11 +137,10 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata: |
134 | 137 | _ = parse_zarr_format(_data.pop("zarr_format")) |
135 | 138 | dtype = get_data_type_from_native_dtype(_data["dtype"]) |
136 | 139 | _data["dtype"] = dtype |
137 | | - if dtype.to_dtype().kind in "SV": |
138 | | - fill_value_encoded = _data.get("fill_value") |
139 | | - if fill_value_encoded is not None: |
140 | | - fill_value = base64.standard_b64decode(fill_value_encoded) |
141 | | - _data["fill_value"] = fill_value |
| 140 | + fill_value_encoded = _data.get("fill_value") |
| 141 | + if fill_value_encoded is not None: |
| 142 | + fill_value = dtype.from_json_value(fill_value_encoded, zarr_format=2) |
| 143 | + _data["fill_value"] = fill_value |
142 | 144 |
|
143 | 145 | # zarr v2 allowed arbitrary keys here. |
144 | 146 | # We don't want the ArrayV2Metadata constructor to fail just because someone put an |
@@ -281,76 +283,3 @@ def parse_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata: |
281 | 283 | ) |
282 | 284 | raise ValueError(msg) |
283 | 285 | return data |
284 | | - |
285 | | - |
286 | | -def _parse_structured_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> Any: |
287 | | - """Handle structured dtype/fill value pairs""" |
288 | | - try: |
289 | | - if isinstance(fill_value, list): |
290 | | - return np.array([tuple(fill_value)], dtype=dtype)[0] |
291 | | - elif isinstance(fill_value, tuple): |
292 | | - return np.array([fill_value], dtype=dtype)[0] |
293 | | - elif isinstance(fill_value, bytes): |
294 | | - return np.frombuffer(fill_value, dtype=dtype)[0] |
295 | | - elif isinstance(fill_value, str): |
296 | | - decoded = base64.standard_b64decode(fill_value) |
297 | | - return np.frombuffer(decoded, dtype=dtype)[0] |
298 | | - else: |
299 | | - return np.array(fill_value, dtype=dtype)[()] |
300 | | - except Exception as e: |
301 | | - raise ValueError(f"Fill_value {fill_value} is not valid for dtype {dtype}.") from e |
302 | | - |
303 | | - |
304 | | -def parse_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> Any: |
305 | | - """ |
306 | | - Parse a potential fill value into a value that is compatible with the provided dtype. |
307 | | -
|
308 | | - Parameters |
309 | | - ---------- |
310 | | - fill_value : Any |
311 | | - A potential fill value. |
312 | | - dtype : np.dtype[Any] |
313 | | - A numpy dtype. |
314 | | -
|
315 | | - Returns |
316 | | - ------- |
317 | | - An instance of `dtype`, or `None`, or any python object (in the case of an object dtype) |
318 | | - """ |
319 | | - |
320 | | - if fill_value is None or dtype.hasobject: |
321 | | - pass |
322 | | - elif dtype.fields is not None: |
323 | | - # the dtype is structured (has multiple fields), so the fill_value might be a |
324 | | - # compound value (e.g., a tuple or dict) that needs field-wise processing. |
325 | | - # We use parse_structured_fill_value to correctly convert each component. |
326 | | - fill_value = _parse_structured_fill_value(fill_value, dtype) |
327 | | - elif not isinstance(fill_value, np.void) and fill_value == 0: |
328 | | - # this should be compatible across numpy versions for any array type, including |
329 | | - # structured arrays |
330 | | - fill_value = np.zeros((), dtype=dtype)[()] |
331 | | - elif dtype.kind == "U": |
332 | | - # special case unicode because of encoding issues on Windows if passed through numpy |
333 | | - # https://github.com/alimanfoo/zarr/pull/172#issuecomment-343782713 |
334 | | - |
335 | | - if not isinstance(fill_value, str): |
336 | | - raise ValueError( |
337 | | - f"fill_value {fill_value!r} is not valid for dtype {dtype}; must be a unicode string" |
338 | | - ) |
339 | | - elif dtype.kind in "SV" and isinstance(fill_value, str): |
340 | | - fill_value = base64.standard_b64decode(fill_value) |
341 | | - elif dtype.kind == "c" and isinstance(fill_value, list) and len(fill_value) == 2: |
342 | | - complex_val = complex(float(fill_value[0]), float(fill_value[1])) |
343 | | - fill_value = np.array(complex_val, dtype=dtype)[()] |
344 | | - else: |
345 | | - try: |
346 | | - if isinstance(fill_value, bytes) and dtype.kind == "V": |
347 | | - # special case for numpy 1.14 compatibility |
348 | | - fill_value = np.array(fill_value, dtype=dtype.str).view(dtype)[()] |
349 | | - else: |
350 | | - fill_value = np.array(fill_value, dtype=dtype)[()] |
351 | | - |
352 | | - except Exception as e: |
353 | | - msg = f"Fill_value {fill_value} is not valid for dtype {dtype}." |
354 | | - raise ValueError(msg) from e |
355 | | - |
356 | | - return fill_value |
0 commit comments