Skip to content

Commit 042fac1

Browse files
committed
get json de/serialization largely working, and start making tests pass
1 parent 9cd5c51 commit 042fac1

File tree

10 files changed

+291
-354
lines changed

10 files changed

+291
-354
lines changed

src/zarr/api/asynchronous.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -982,7 +982,7 @@ async def create(
982982
if zarr_format == 2:
983983
if chunks is None:
984984
chunks = shape
985-
dtype = parse_dtype(dtype, zarr_format)
985+
dtype = parse_dtype(dtype, zarr_format=zarr_format)
986986
if not filters:
987987
filters = _default_filters(dtype)
988988
if not compressor:

src/zarr/codecs/sharding.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -355,9 +355,10 @@ def __init__(
355355
object.__setattr__(self, "index_location", index_location_parsed)
356356

357357
# Use instance-local lru_cache to avoid memory leaks
358-
object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec))
359-
object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec))
360-
object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard))
358+
# TODO: fix these when we don't get hashability errors for certain numpy dtypes
359+
# object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec))
360+
# object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec))
361+
# object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard))
361362

362363
# todo: typedict return type
363364
def __getstate__(self) -> dict[str, Any]:

src/zarr/core/array.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,7 @@ async def _create(
580580
"""
581581
store_path = await make_store_path(store)
582582

583-
dtype_parsed = parse_dtype(dtype, zarr_format)
583+
dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format)
584584
shape = parse_shapelike(shape)
585585

586586
if chunks is not None and chunk_shape is not None:
@@ -693,13 +693,23 @@ def _create_metadata_v3(
693693
category=UserWarning,
694694
stacklevel=2,
695695
)
696+
697+
# resolve the numpy dtype into zarr v3 datatype
698+
zarr_data_type = get_data_type_from_numpy(dtype)
699+
700+
if fill_value is None:
701+
# v3 spec will not allow a null fill value
702+
fill_value_parsed = dtype.type(zarr_data_type.default)
703+
else:
704+
fill_value_parsed = fill_value
705+
696706
chunk_grid_parsed = RegularChunkGrid(chunk_shape=chunk_shape)
697707
return ArrayV3Metadata(
698708
shape=shape,
699-
data_type=dtype,
709+
data_type=zarr_data_type,
700710
chunk_grid=chunk_grid_parsed,
701711
chunk_key_encoding=chunk_key_encoding_parsed,
702-
fill_value=fill_value,
712+
fill_value=fill_value_parsed,
703713
codecs=codecs,
704714
dimension_names=tuple(dimension_names) if dimension_names else None,
705715
attributes=attributes or {},
@@ -3909,7 +3919,7 @@ async def init_array(
39093919

39103920
from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation
39113921

3912-
dtype_parsed = parse_dtype(dtype)
3922+
dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format)
39133923
shape_parsed = parse_shapelike(shape)
39143924
chunk_key_encoding_parsed = _parse_chunk_key_encoding(
39153925
chunk_key_encoding, zarr_format=zarr_format

src/zarr/core/common.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import numpy as np
2020

2121
from zarr.core.config import config as zarr_config
22+
from zarr.core.strings import _VLEN_STRING_DTYPE
2223

2324
if TYPE_CHECKING:
2425
from collections.abc import Awaitable, Callable, Iterator
@@ -166,7 +167,13 @@ def parse_bool(data: Any) -> bool:
166167
raise ValueError(f"Expected bool, got {data} instead.")
167168

168169

169-
def parse_dtype(dtype: Any) -> np.dtype[Any]:
170+
def parse_dtype(dtype: Any, zarr_format: ZarrFormat) -> np.dtype[Any]:
171+
if dtype is str or dtype == "str":
172+
if zarr_format == 2:
173+
# special case as object
174+
return np.dtype("object")
175+
else:
176+
return _VLEN_STRING_DTYPE
170177
return np.dtype(dtype)
171178

172179

src/zarr/core/config.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,17 @@ def enable_gpu(self) -> ConfigSet:
8888
"bytes": [{"id": "vlen-bytes"}],
8989
"raw": None,
9090
},
91-
"v3_default_filters": {"numeric": [], "string": [], "bytes": []},
91+
"v3_default_filters": {"boolean": [], "numeric": [], "string": [], "bytes": []},
9292
"v3_default_serializer": {
93+
"boolean": {"name": "bytes", "configuration": {"endian": "little"}},
9394
"numeric": {"name": "bytes", "configuration": {"endian": "little"}},
9495
"string": {"name": "vlen-utf8"},
9596
"bytes": {"name": "vlen-bytes"},
9697
},
9798
"v3_default_compressors": {
99+
"boolean": [
100+
{"name": "zstd", "configuration": {"level": 0, "checksum": False}},
101+
],
98102
"numeric": [
99103
{"name": "zstd", "configuration": {"level": 0, "checksum": False}},
100104
],

0 commit comments

Comments
 (0)