Skip to content

Commit 8128451

Browse files
TomNicholaspre-commit-ci[bot]maxrjones
authored
Zarr data types refactor compatibility (#618)
* un-xfail some big endian tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update minimum require zarr version to the as-yet-unreleased zarr 3.1.0 * Revert "update minimum require zarr version to the as-yet-unreleased zarr 3.1.0" This reverts commit 7a2e4f0. * remove need for vendored zarr metadata code * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Handle metadata get requests * Change expected dtype from <i4 to int32 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Max Jones <[email protected]>
1 parent b6787ee commit 8128451

File tree

3 files changed

+18
-69
lines changed

3 files changed

+18
-69
lines changed

virtualizarr/manifests/store.py

Lines changed: 17 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from zarr.core.common import BytesLike
1717

1818
from virtualizarr.manifests.group import ManifestGroup
19-
from virtualizarr.vendor.zarr.core.metadata import dict_to_buffer
2019

2120
if TYPE_CHECKING:
2221
from obstore.store import (
@@ -56,36 +55,6 @@ def get_store_prefix(url: str) -> str:
5655
return "" if scheme in {"", "file"} else f"{scheme}://{netloc}"
5756

5857

59-
def get_zarr_metadata(manifest_group: ManifestGroup, key: str) -> Buffer:
60-
"""
61-
Generate the expected Zarr V3 metadata from a virtual dataset.
62-
63-
Group metadata is returned for all Datasets and Array metadata
64-
is returned for all DataArrays.
65-
66-
Combines the ManifestArray metadata with the attrs from the DataArray
67-
and adds `dimension_names` for all arrays if not already provided.
68-
69-
Parameters
70-
----------
71-
manifest_group : ManifestGroup
72-
key : str
73-
74-
Returns
75-
-------
76-
Buffer
77-
"""
78-
# If requesting the root metadata, return the standard group metadata with additional dataset specific attributes
79-
80-
if key == "zarr.json":
81-
metadata = manifest_group.metadata.to_dict()
82-
return dict_to_buffer(metadata, prototype=default_buffer_prototype())
83-
else:
84-
var, _ = key.split("/")
85-
metadata = manifest_group.arrays[var].metadata.to_dict()
86-
return dict_to_buffer(metadata, prototype=default_buffer_prototype())
87-
88-
8958
def parse_manifest_index(key: str, chunk_key_encoding: str = ".") -> tuple[int, ...]:
9059
"""
9160
Splits `key` provided to a zarr store into the variable indicated
@@ -243,8 +212,19 @@ async def get(
243212
byte_range: ByteRequest | None = None,
244213
) -> Buffer | None:
245214
# docstring inherited
246-
if key.endswith("zarr.json"):
247-
return get_zarr_metadata(self._group, key)
215+
216+
if key == "zarr.json":
217+
# Return group metadata
218+
return self._group.metadata.to_buffer_dict(
219+
prototype=default_buffer_prototype()
220+
)["zarr.json"]
221+
elif key.endswith("zarr.json"):
222+
# Return array metadata
223+
# TODO: Handle nested groups
224+
var, _ = key.split("/")
225+
return self._group.arrays[var].metadata.to_buffer_dict(
226+
prototype=default_buffer_prototype()
227+
)["zarr.json"]
248228
var = key.split("/")[0]
249229
marr = self._group.arrays[var]
250230
manifest = marr.manifest
@@ -258,22 +238,26 @@ async def get(
258238
return None
259239
offset = manifest._offsets[chunk_indexes]
260240
length = manifest._lengths[chunk_indexes]
241+
261242
# Get the configured object store instance that matches the path
262243
store = self._store_registry.get_store(path)
263244
if not store:
264245
raise ValueError(
265246
f"Could not find a store to use for {path} in the store registry"
266247
)
248+
267249
# Truncate path to match Obstore expectations
268250
key = urlparse(path).path
269251
if hasattr(store, "prefix") and store.prefix:
270252
# strip the prefix from key
271253
key = key.removeprefix(str(store.prefix))
254+
272255
# Transform the input byte range to account for the chunk location in the file
273256
chunk_end_exclusive = offset + length
274257
byte_range = _transform_byte_range(
275258
byte_range, chunk_start=offset, chunk_end_exclusive=chunk_end_exclusive
276259
)
260+
277261
# Actually get the bytes
278262
try:
279263
bytes = await store.get_range_async(

virtualizarr/tests/test_parsers/test_fits.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,6 @@
1111

1212
@requires_kerchunk
1313
@requires_network
14-
@pytest.mark.xfail(
15-
reason="Big endian not yet supported by zarr-python 3.0"
16-
) # https://github.com/zarr-developers/zarr-python/issues/2324
1714
def test_open_hubble_data():
1815
# data from https://registry.opendata.aws/hst/
1916
file_url = "s3://stpubdata/hst/public/f05i/f05i0201m/f05i0201m_a1f.fits"
@@ -28,4 +25,4 @@ def test_open_hubble_data():
2825
assert list(vds.variables) == ["PRIMARY"]
2926
var = vds["PRIMARY"].variable
3027
assert var.sizes == {"y": 17, "x": 589}
31-
assert var.dtype == ">i4"
28+
assert var.dtype == "int32"

virtualizarr/vendor/zarr/core/metadata.py

Lines changed: 0 additions & 32 deletions
This file was deleted.

0 commit comments

Comments
 (0)