zarr-developers · martindurant · Jan 21, 2025 · Jan 9, 2025 · Jan 9, 2025 · Jan 9, 2025
diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py
@@ -470,7 +470,9 @@ def all_equal(self, other: Any, equal_nan: bool = True) -> bool:
         # every single time we have to write data?
         _data, other = np.broadcast_arrays(self._data, other)
         return np.array_equal(
-            self._data, other, equal_nan=equal_nan if self._data.dtype.kind not in "USTO" else False
+            self._data,
+            other,
+            equal_nan=equal_nan if self._data.dtype.kind not in "USTOV" else False,
         )
 
     def fill(self, value: Any) -> None:

diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
@@ -75,6 +75,7 @@ def reset(self) -> None:
                     "numeric": None,
                     "string": [{"id": "vlen-utf8"}],
                     "bytes": [{"id": "vlen-bytes"}],
+                    "raw": None,
                 },
                 "v3_default_filters": {"numeric": [], "string": [], "bytes": []},
                 "v3_default_serializer": {

diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
@@ -193,7 +193,12 @@ def to_dict(self) -> dict[str, JSON]:
             zarray_dict["fill_value"] = fill_value
 
         _ = zarray_dict.pop("dtype")
-        zarray_dict["dtype"] = self.dtype.str
+        dtype_json: JSON
+        if self.dtype.kind == "V":
+            dtype_json = tuple(self.dtype.descr)
+        else:
+            dtype_json = self.dtype.str
+        zarray_dict["dtype"] = dtype_json
 
         return zarray_dict
 
@@ -220,6 +225,8 @@ def update_attributes(self, attributes: dict[str, JSON]) -> Self:
 
 
 def parse_dtype(data: npt.DTypeLike) -> np.dtype[Any]:
+    if isinstance(data, list):  # this is a valid _VoidDTypeLike check
+        data = [tuple(d) for d in data]
     return np.dtype(data)
 
 
@@ -376,8 +383,10 @@ def _default_filters(
         dtype_key = "numeric"
     elif dtype.kind in "U":
         dtype_key = "string"
-    elif dtype.kind in "OSV":
+    elif dtype.kind in "OS":
         dtype_key = "bytes"
+    elif dtype.kind == "V":
+        dtype_key = "raw"
     else:
         raise ValueError(f"Unsupported dtype kind {dtype.kind}")
 

diff --git a/tests/test_config.py b/tests/test_config.py
@@ -61,6 +61,7 @@ def test_config_defaults_set() -> None:
                     "numeric": None,
                     "string": [{"id": "vlen-utf8"}],
                     "bytes": [{"id": "vlen-bytes"}],
+                    "raw": None,
                 },
                 "v3_default_filters": {"numeric": [], "string": [], "bytes": []},
                 "v3_default_serializer": {

diff --git a/tests/test_v2.py b/tests/test_v2.py
@@ -109,7 +109,7 @@ async def test_v2_encode_decode(dtype):
             "compressor": None,
             "dtype": f"{dtype}0",
             "fill_value": "WA==",
-            "filters": [{"id": "vlen-bytes"}],
+            "filters": [{"id": "vlen-bytes"}] if dtype == "|S" else None,
             "order": "C",
             "shape": [3],
             "zarr_format": 2,
@@ -263,3 +263,25 @@ def test_default_filters_and_compressor(dtype_expected: Any) -> None:
         assert arr.metadata.compressor.codec_id == expected_compressor
         if expected_filter is not None:
             assert arr.metadata.filters[0].codec_id == expected_filter
+
+
+@pytest.mark.parametrize("fill_value", [None, (b"", 0, 0.0)], ids=["no_fill", "fill"])
+def test_structured_dtype_roundtrip(fill_value, tmp_path) -> None:
+    a = np.array(
+        [(b"aaa", 1, 4.2), (b"bbb", 2, 8.4), (b"ccc", 3, 12.6)],
+        dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")],
+    )
+    array_path = tmp_path / "data.zarr"
+    za = zarr.create(
+        shape=(3,),
+        store=array_path,
+        chunks=(2,),
+        fill_value=fill_value,
+        zarr_format=2,
+        dtype=a.dtype,
+    )
+    if fill_value is not None:
+        assert (np.array([fill_value] * a.shape[0], dtype=a.dtype) == za[:]).all()
+    za[...] = a
+    za = zarr.open_array(store=array_path)
+    assert (a == za[:]).all()