Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/releases.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

### Bug fixes

- Fix handling of big-endian data in Icechunk by making sure that non-default zarr serializers are included in the zarr array metadata [#766](https://github.com/zarr-developers/VirtualiZarr/issues/766). By [Max Jones](https://github.com/maxrjones)

### Documentation

### Internal changes
Expand Down
1 change: 0 additions & 1 deletion virtualizarr/manifests/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,6 @@ async def get(
return None
offset = manifest._offsets[chunk_indexes]
length = manifest._lengths[chunk_indexes]

# Get the configured object store instance that matches the path
store, path_after_prefix = self._registry.resolve(path)
if not store:
Expand Down
59 changes: 59 additions & 0 deletions virtualizarr/tests/test_writers/test_icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@

import numpy as np
import numpy.testing as npt
import obstore
import pytest
import xarray as xr
import xarray.testing as xrt
import zarr
from zarr.codecs import BytesCodec
from zarr.core.buffer import default_buffer_prototype
from zarr.core.metadata import ArrayV3Metadata
from zarr.dtype import parse_data_type

from virtualizarr.manifests import ChunkManifest, ManifestArray
from virtualizarr.tests.utils import PYTEST_TMP_DIRECTORY_URL_PREFIX
Expand Down Expand Up @@ -57,6 +60,48 @@ def icechunk_filestore(icechunk_repo: "Repository") -> "IcechunkStore":
return session.store


@pytest.fixture()
def big_endian_synthetic_vds(tmpdir: Path):
filepath = f"{tmpdir}/data_chunk"
store = obstore.store.LocalStore()
arr = np.array([1, 2, 3, 4, 5, 6], dtype=">i4").reshape(3, 2)
shape = arr.shape
dtype = arr.dtype
buf = arr.tobytes()
obstore.put(
store,
filepath,
buf,
)
manifest = ChunkManifest(
{"0.0": {"path": filepath, "offset": 0, "length": len(buf)}}
)
zdtype = parse_data_type(dtype, zarr_format=3)
metadata = ArrayV3Metadata(
shape=shape,
data_type=zdtype,
chunk_grid={
"name": "regular",
"configuration": {"chunk_shape": shape},
},
chunk_key_encoding={"name": "default"},
fill_value=zdtype.default_scalar(),
codecs=[BytesCodec(endian="big")],
attributes={},
dimension_names=("y", "x"),
storage_transformers=None,
)
ma = ManifestArray(
chunkmanifest=manifest,
metadata=metadata,
)
foo = xr.Variable(data=ma, dims=["y", "x"], encoding={"scale_factor": 2})
vds = xr.Dataset(
{"foo": foo},
)
return vds, arr


@pytest.mark.parametrize("kwarg", [("group", {}), ("append_dim", {})])
def test_invalid_kwarg_type(
icechunk_filestore: "IcechunkStore",
Expand Down Expand Up @@ -287,6 +332,20 @@ def test_set_grid_virtual_refs(
)


def test_write_big_endian_value(icechunk_repo: "Repository", big_endian_synthetic_vds):
vds, arr = big_endian_synthetic_vds
vds = vds.drop_encoding()
# Commit the first virtual dataset
writable_session = icechunk_repo.writable_session("main")
vds.vz.to_icechunk(writable_session.store)
writable_session.commit("test commit")
read_session = icechunk_repo.readonly_session(branch="main")
with (
xr.open_zarr(read_session.store, consolidated=False, zarr_format=3) as ds,
):
np.testing.assert_equal(ds["foo"].data, arr)


def test_write_loadable_variable(
icechunk_filestore: "IcechunkStore",
simple_netcdf4: Path,
Expand Down
3 changes: 2 additions & 1 deletion virtualizarr/writers/icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,14 +339,15 @@ def write_virtual_variable_to_icechunk(
else:
append_axis = None
# TODO: Should codecs be an argument to zarr's AsyncrGroup.create_array?
filters, _, compressors = extract_codecs(metadata.codecs)
filters, serializer, compressors = extract_codecs(metadata.codecs)
arr = group.require_array(
name=name,
shape=metadata.shape,
chunks=metadata.chunks,
dtype=metadata.data_type.to_native_dtype(),
filters=filters,
compressors=compressors,
serializer=serializer,
dimension_names=var.dims,
fill_value=metadata.fill_value,
)
Expand Down