Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/zarr/core/buffer/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,9 @@ def all_equal(self, other: Any, equal_nan: bool = True) -> bool:
# every single time we have to write data?
_data, other = np.broadcast_arrays(self._data, other)
return np.array_equal(
self._data, other, equal_nan=equal_nan if self._data.dtype.kind not in "USTO" else False
self._data,
other,
equal_nan=equal_nan if self._data.dtype.kind not in "USTOV" else False,
)

def fill(self, value: Any) -> None:
Expand Down
1 change: 1 addition & 0 deletions src/zarr/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def reset(self) -> None:
"numeric": None,
"string": [{"id": "vlen-utf8"}],
"bytes": [{"id": "vlen-bytes"}],
"raw": None,
},
"v3_default_filters": {"numeric": [], "string": [], "bytes": []},
"v3_default_serializer": {
Expand Down
13 changes: 11 additions & 2 deletions src/zarr/core/metadata/v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,12 @@ def to_dict(self) -> dict[str, JSON]:
zarray_dict["fill_value"] = fill_value

_ = zarray_dict.pop("dtype")
zarray_dict["dtype"] = self.dtype.str
dtype_json: JSON
if self.dtype.kind == "V":
dtype_json = tuple(self.dtype.descr)
else:
dtype_json = self.dtype.str
zarray_dict["dtype"] = dtype_json

return zarray_dict

Expand All @@ -220,6 +225,8 @@ def update_attributes(self, attributes: dict[str, JSON]) -> Self:


def parse_dtype(data: npt.DTypeLike) -> np.dtype[Any]:
if isinstance(data, list): # this is a valid _VoidDTypeLike check
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any iterable?

Copy link
Contributor Author

@ilan-gold ilan-gold Jan 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to handle the [(field_name, field_dtype, field_shape), ...] case on https://numpy.org/doc/2.1/reference/arrays.dtypes.html#specifying-and-constructing-data-types but at the same time to obey
Screenshot 2025-01-14 at 19 22 14

This might require more stringent checking or tests...Not sure. The reason this tuple conversion happens is that lists (as data types) incoming from on-disk reads contain lists, not tuples. So maybe we should check list and data[0] is also list? And throw an error if it isn't? I'm not sure what else could be in the lists though

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the dtype constructor would make an exception (or our own comprehension fails) in the case the JSON on disk was edited - so I'm not too worried.

data = [tuple(d) for d in data]
return np.dtype(data)


Expand Down Expand Up @@ -376,8 +383,10 @@ def _default_filters(
dtype_key = "numeric"
elif dtype.kind in "U":
dtype_key = "string"
elif dtype.kind in "OSV":
elif dtype.kind in "OS":
dtype_key = "bytes"
elif dtype.kind == "V":
dtype_key = "raw"
else:
raise ValueError(f"Unsupported dtype kind {dtype.kind}")

Expand Down
1 change: 1 addition & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def test_config_defaults_set() -> None:
"numeric": None,
"string": [{"id": "vlen-utf8"}],
"bytes": [{"id": "vlen-bytes"}],
"raw": None,
},
"v3_default_filters": {"numeric": [], "string": [], "bytes": []},
"v3_default_serializer": {
Expand Down
24 changes: 23 additions & 1 deletion tests/test_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ async def test_v2_encode_decode(dtype):
"compressor": None,
"dtype": f"{dtype}0",
"fill_value": "WA==",
"filters": [{"id": "vlen-bytes"}],
"filters": [{"id": "vlen-bytes"}] if dtype == "|S" else None,
"order": "C",
"shape": [3],
"zarr_format": 2,
Expand Down Expand Up @@ -263,3 +263,25 @@ def test_default_filters_and_compressor(dtype_expected: Any) -> None:
assert arr.metadata.compressor.codec_id == expected_compressor
if expected_filter is not None:
assert arr.metadata.filters[0].codec_id == expected_filter


@pytest.mark.parametrize("fill_value", [None, (b"", 0, 0.0)], ids=["no_fill", "fill"])
def test_structured_dtype_roundtrip(fill_value, tmp_path) -> None:
a = np.array(
[(b"aaa", 1, 4.2), (b"bbb", 2, 8.4), (b"ccc", 3, 12.6)],
dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")],
)
array_path = tmp_path / "data.zarr"
za = zarr.create(
shape=(3,),
store=array_path,
chunks=(2,),
fill_value=fill_value,
zarr_format=2,
dtype=a.dtype,
)
if fill_value is not None:
assert (np.array([fill_value] * a.shape[0], dtype=a.dtype) == za[:]).all()
za[...] = a
za = zarr.open_array(store=array_path)
assert (a == za[:]).all()
Loading