diff --git a/virtualizarr/manifests/store.py b/virtualizarr/manifests/store.py index cfa8941cf..716a4929b 100644 --- a/virtualizarr/manifests/store.py +++ b/virtualizarr/manifests/store.py @@ -16,7 +16,6 @@ from zarr.core.common import BytesLike from virtualizarr.manifests.group import ManifestGroup -from virtualizarr.vendor.zarr.core.metadata import dict_to_buffer if TYPE_CHECKING: from obstore.store import ( @@ -56,36 +55,6 @@ def get_store_prefix(url: str) -> str: return "" if scheme in {"", "file"} else f"{scheme}://{netloc}" -def get_zarr_metadata(manifest_group: ManifestGroup, key: str) -> Buffer: - """ - Generate the expected Zarr V3 metadata from a virtual dataset. - - Group metadata is returned for all Datasets and Array metadata - is returned for all DataArrays. - - Combines the ManifestArray metadata with the attrs from the DataArray - and adds `dimension_names` for all arrays if not already provided. - - Parameters - ---------- - manifest_group : ManifestGroup - key : str - - Returns - ------- - Buffer - """ - # If requesting the root metadata, return the standard group metadata with additional dataset specific attributes - - if key == "zarr.json": - metadata = manifest_group.metadata.to_dict() - return dict_to_buffer(metadata, prototype=default_buffer_prototype()) - else: - var, _ = key.split("/") - metadata = manifest_group.arrays[var].metadata.to_dict() - return dict_to_buffer(metadata, prototype=default_buffer_prototype()) - - def parse_manifest_index(key: str, chunk_key_encoding: str = ".") -> tuple[int, ...]: """ Splits `key` provided to a zarr store into the variable indicated @@ -243,8 +212,19 @@ async def get( byte_range: ByteRequest | None = None, ) -> Buffer | None: # docstring inherited - if key.endswith("zarr.json"): - return get_zarr_metadata(self._group, key) + + if key == "zarr.json": + # Return group metadata + return self._group.metadata.to_buffer_dict( + prototype=default_buffer_prototype() + )["zarr.json"] + elif key.endswith("zarr.json"): + # Return array metadata + # TODO: Handle nested groups + var, _ = key.split("/") + return self._group.arrays[var].metadata.to_buffer_dict( + prototype=default_buffer_prototype() + )["zarr.json"] var = key.split("/")[0] marr = self._group.arrays[var] manifest = marr.manifest @@ -258,22 +238,26 @@ async def get( return None offset = manifest._offsets[chunk_indexes] length = manifest._lengths[chunk_indexes] + # Get the configured object store instance that matches the path store = self._store_registry.get_store(path) if not store: raise ValueError( f"Could not find a store to use for {path} in the store registry" ) + # Truncate path to match Obstore expectations key = urlparse(path).path if hasattr(store, "prefix") and store.prefix: # strip the prefix from key key = key.removeprefix(str(store.prefix)) + # Transform the input byte range to account for the chunk location in the file chunk_end_exclusive = offset + length byte_range = _transform_byte_range( byte_range, chunk_start=offset, chunk_end_exclusive=chunk_end_exclusive ) + # Actually get the bytes try: bytes = await store.get_range_async( diff --git a/virtualizarr/tests/test_parsers/test_fits.py b/virtualizarr/tests/test_parsers/test_fits.py index 3b4d86b1b..f0eadbfcb 100644 --- a/virtualizarr/tests/test_parsers/test_fits.py +++ b/virtualizarr/tests/test_parsers/test_fits.py @@ -11,9 +11,6 @@ @requires_kerchunk @requires_network -@pytest.mark.xfail( - reason="Big endian not yet supported by zarr-python 3.0" -) # https://github.com/zarr-developers/zarr-python/issues/2324 def test_open_hubble_data(): # data from https://registry.opendata.aws/hst/ file_url = "s3://stpubdata/hst/public/f05i/f05i0201m/f05i0201m_a1f.fits" @@ -28,4 +25,4 @@ def test_open_hubble_data(): assert list(vds.variables) == ["PRIMARY"] var = vds["PRIMARY"].variable assert var.sizes == {"y": 17, "x": 589} - assert var.dtype == ">i4" + assert var.dtype == "int32" diff --git a/virtualizarr/vendor/zarr/core/metadata.py b/virtualizarr/vendor/zarr/core/metadata.py deleted file mode 100644 index bc53de847..000000000 --- a/virtualizarr/vendor/zarr/core/metadata.py +++ /dev/null @@ -1,32 +0,0 @@ -import json -from typing import Any - -import numpy as np -from zarr.core.buffer import Buffer, BufferPrototype -from zarr.core.metadata.v3 import V3JsonEncoder - - -def _replace_special_floats(obj: object) -> Any: - """Helper function to replace NaN/Inf/-Inf values with special strings - - Note: this cannot be done in the V3JsonEncoder because Python's `json.dumps` optimistically - converts NaN/Inf values to special types outside of the encoding step. - """ - if isinstance(obj, float): - if np.isnan(obj): - return "NaN" - elif np.isinf(obj): - return "Infinity" if obj > 0 else "-Infinity" - elif isinstance(obj, dict): - # Recursively replace in dictionaries - return {k: _replace_special_floats(v) for k, v in obj.items()} - elif isinstance(obj, list): - # Recursively replace in lists - return [_replace_special_floats(item) for item in obj] - return obj - - -def dict_to_buffer(input: dict, prototype: BufferPrototype) -> Buffer: - # modified from ArrayV3Metadata.to_buffer_dict - d = _replace_special_floats(input) - return prototype.buffer.from_bytes(json.dumps(d, cls=V3JsonEncoder).encode())