Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions virtualizarr/codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import zarr
from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
from zarr.abc.codec import Codec as ZarrCodec
from zarr.codecs import BytesCodec
from zarr.core.codec_pipeline import BatchedCodecPipeline
from zarr.core.metadata.v3 import ArrayV3Metadata

Expand Down Expand Up @@ -64,7 +65,6 @@ def convert_to_codec_pipeline(
-------
BatchedCodecPipeline
"""
from zarr.core.array import _get_default_chunk_encoding_v3
from zarr.registry import get_codec_class

zarr_codecs: tuple[ArrayArrayCodec | ArrayBytesCodec | BytesBytesCodec, ...] = ()
Expand All @@ -78,7 +78,10 @@ def convert_to_codec_pipeline(
arrayarray_codecs, arraybytes_codec, bytesbytes_codecs = extract_codecs(zarr_codecs)

if arraybytes_codec is None:
arraybytes_codec = _get_default_chunk_encoding_v3(dtype)[1]
if dtype.byteorder == ">":
arraybytes_codec = BytesCodec(endian="big")
else:
arraybytes_codec = BytesCodec()

codec_pipeline = BatchedCodecPipeline(
array_array_codecs=arrayarray_codecs,
Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/manifests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def create_v3_array_metadata(
"""
return ArrayV3Metadata(
shape=shape,
data_type=data_type,
data_type=data_type.name if hasattr(data_type, "name") else data_type,
chunk_grid={
"name": "regular",
"configuration": {"chunk_shape": chunk_shape},
Expand Down
2 changes: 2 additions & 0 deletions virtualizarr/tests/test_parsers/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,4 +457,6 @@ def big_endian_dtype_hdf5_file(tmpdir):
filepath = f"{tmpdir}/big_endian.nc"
f = h5py.File(filepath, "w")
f.create_dataset("data", shape=(10,), dtype=">f4")
dset = f["data"]
dset[...] = 10
return filepath
19 changes: 12 additions & 7 deletions virtualizarr/tests/test_parsers/test_hdf/test_hdf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import h5py # type: ignore
import numpy as np
import pytest
import xarray as xr

from virtualizarr import open_virtual_dataset
from virtualizarr.parsers import HDFParser
Expand Down Expand Up @@ -221,19 +222,23 @@ def test_coord_names(
) as vds:
assert set(vds.coords) == {"lat", "lon"}

@pytest.mark.xfail(reason="Requires Zarr v3 big endian dtype support")
def test_big_endian(
self,
big_endian_dtype_hdf5_file,
):
store = obstore_local(file_url=big_endian_dtype_hdf5_file)
parser = HDFParser()
with open_virtual_dataset(
file_url=big_endian_dtype_hdf5_file,
object_store=store,
parser=parser,
) as vds:
print(vds)
with (
parser(
file_url=big_endian_dtype_hdf5_file, object_store=store
) as manifest_store,
xr.open_dataset(big_endian_dtype_hdf5_file) as expected,
):
observed = xr.open_dataset(
manifest_store, engine="zarr", consolidated=False, zarr_format=3
)
assert isinstance(observed, xr.Dataset)
xr.testing.assert_identical(observed.load(), expected.load())


@requires_hdf5plugin
Expand Down
36 changes: 11 additions & 25 deletions virtualizarr/tests/test_parsers/test_netcdf3.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,31 @@
import pytest
import xarray as xr
import xarray.testing as xrt

from virtualizarr import open_virtual_dataset
from virtualizarr.manifests import ChunkManifest, ManifestArray
from virtualizarr.parsers import NetCDF3Parser
from virtualizarr.tests import requires_network, requires_scipy
from virtualizarr.tests import requires_kerchunk, requires_network, requires_scipy
from virtualizarr.tests.utils import obstore_http, obstore_local


@requires_scipy
@pytest.mark.xfail(
reason="Big endian not yet supported by zarr-python 3.0"
) # https://github.com/zarr-developers/zarr-python/issues/2324
def test_read_netcdf3(netcdf3_file, array_v3_metadata):
filepath = str(netcdf3_file)
store = obstore_local(file_url=filepath)
parser = NetCDF3Parser()
with open_virtual_dataset(
file_url=filepath,
parser=parser,
object_store=store,
) as vds:
assert isinstance(vds, xr.Dataset)
assert list(vds.variables.keys()) == ["foo"]
assert isinstance(vds["foo"].data, ManifestArray)

expected_manifest = ChunkManifest(
entries={"0": {"path": filepath, "offset": 80, "length": 12}}
with (
parser(file_url=filepath, object_store=store) as manifest_store,
xr.open_dataset(filepath) as expected,
):
observed = xr.open_dataset(
manifest_store, engine="zarr", consolidated=False, zarr_format=3
)
metadata = array_v3_metadata(shape=(3,), chunks=(3,))
expected_ma = ManifestArray(chunkmanifest=expected_manifest, metadata=metadata)
expected_vds = xr.Dataset({"foo": xr.Variable(data=expected_ma, dims=["x"])})

xrt.assert_identical(vds, expected_vds)
assert isinstance(observed, xr.Dataset)
assert list(observed.variables.keys()) == ["foo"]
xrt.assert_identical(observed.load(), expected.load())


@requires_kerchunk
@requires_network
@pytest.mark.xfail(
reason="Big endian not yet supported by zarr-python 3.0"
) # https://github.com/zarr-developers/zarr-python/issues/2324
def test_read_http_netcdf3(array_v3_metadata):
file_url = "https://github.com/pydata/xarray-data/raw/master/air_temperature.nc"
store = obstore_http(file_url=file_url)
Expand Down