zarr-developers · TomNicholas · Aug 13, 2025 · Aug 9, 2025 · Aug 9, 2025 · Aug 12, 2025
diff --git a/docs/releases.md b/docs/releases.md
@@ -8,6 +8,8 @@
 
 ### Bug fixes
 
+- Fix handling of big-endian data in Icechunk by making sure that non-default zarr serializers are included in the zarr array metadata [#766](https://github.com/zarr-developers/VirtualiZarr/issues/766). By [Max Jones](https://github.com/maxrjones)
+
 ### Documentation
 
 ### Internal changes

diff --git a/virtualizarr/manifests/store.py b/virtualizarr/manifests/store.py
@@ -184,7 +184,6 @@ async def get(
             return None
         offset = manifest._offsets[chunk_indexes]
         length = manifest._lengths[chunk_indexes]
-
         # Get the configured object store instance that matches the path
         store, path_after_prefix = self._registry.resolve(path)
         if not store:

diff --git a/virtualizarr/tests/test_writers/test_icechunk.py b/virtualizarr/tests/test_writers/test_icechunk.py
@@ -5,12 +5,15 @@
 
 import numpy as np
 import numpy.testing as npt
+import obstore
 import pytest
 import xarray as xr
 import xarray.testing as xrt
 import zarr
+from zarr.codecs import BytesCodec
 from zarr.core.buffer import default_buffer_prototype
 from zarr.core.metadata import ArrayV3Metadata
+from zarr.dtype import parse_data_type
 
 from virtualizarr.manifests import ChunkManifest, ManifestArray
 from virtualizarr.tests.utils import PYTEST_TMP_DIRECTORY_URL_PREFIX
@@ -57,6 +60,48 @@ def icechunk_filestore(icechunk_repo: "Repository") -> "IcechunkStore":
     return session.store
 
 
+@pytest.fixture()
+def big_endian_synthetic_vds(tmpdir: Path):
+    filepath = f"{tmpdir}/data_chunk"
+    store = obstore.store.LocalStore()
+    arr = np.array([1, 2, 3, 4, 5, 6], dtype=">i4").reshape(3, 2)
+    shape = arr.shape
+    dtype = arr.dtype
+    buf = arr.tobytes()
+    obstore.put(
+        store,
+        filepath,
+        buf,
+    )
+    manifest = ChunkManifest(
+        {"0.0": {"path": filepath, "offset": 0, "length": len(buf)}}
+    )
+    zdtype = parse_data_type(dtype, zarr_format=3)
+    metadata = ArrayV3Metadata(
+        shape=shape,
+        data_type=zdtype,
+        chunk_grid={
+            "name": "regular",
+            "configuration": {"chunk_shape": shape},
+        },
+        chunk_key_encoding={"name": "default"},
+        fill_value=zdtype.default_scalar(),
+        codecs=[BytesCodec(endian="big")],
+        attributes={},
+        dimension_names=("y", "x"),
+        storage_transformers=None,
+    )
+    ma = ManifestArray(
+        chunkmanifest=manifest,
+        metadata=metadata,
+    )
+    foo = xr.Variable(data=ma, dims=["y", "x"], encoding={"scale_factor": 2})
+    vds = xr.Dataset(
+        {"foo": foo},
+    )
+    return vds, arr
+
+
 @pytest.mark.parametrize("kwarg", [("group", {}), ("append_dim", {})])
 def test_invalid_kwarg_type(
     icechunk_filestore: "IcechunkStore",
@@ -287,6 +332,20 @@ def test_set_grid_virtual_refs(
     )
 
 
+def test_write_big_endian_value(icechunk_repo: "Repository", big_endian_synthetic_vds):
+    vds, arr = big_endian_synthetic_vds
+    vds = vds.drop_encoding()
+    # Commit the first virtual dataset
+    writable_session = icechunk_repo.writable_session("main")
+    vds.vz.to_icechunk(writable_session.store)
+    writable_session.commit("test commit")
+    read_session = icechunk_repo.readonly_session(branch="main")
+    with (
+        xr.open_zarr(read_session.store, consolidated=False, zarr_format=3) as ds,
+    ):
+        np.testing.assert_equal(ds["foo"].data, arr)
+
+
 def test_write_loadable_variable(
     icechunk_filestore: "IcechunkStore",
     simple_netcdf4: Path,

diff --git a/virtualizarr/writers/icechunk.py b/virtualizarr/writers/icechunk.py
@@ -339,14 +339,15 @@ def write_virtual_variable_to_icechunk(
     else:
         append_axis = None
         # TODO: Should codecs be an argument to zarr's AsyncrGroup.create_array?
-        filters, _, compressors = extract_codecs(metadata.codecs)
+        filters, serializer, compressors = extract_codecs(metadata.codecs)
         arr = group.require_array(
             name=name,
             shape=metadata.shape,
             chunks=metadata.chunks,
             dtype=metadata.data_type.to_native_dtype(),
             filters=filters,
             compressors=compressors,
+            serializer=serializer,
             dimension_names=var.dims,
             fill_value=metadata.fill_value,
         )