diff --git a/docs/conf.py b/docs/conf.py index 3cb86826..632bede1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -40,6 +40,7 @@ "numpy": ("https://numpy.org/doc/stable/", None), "zarr": ("https://zarr.readthedocs.io/en/stable/", None), "xarray": ("https://docs.xarray.dev/en/stable/", None), + "obstore": ("https://developmentseed.org/obstore/latest/", None), } # Add any paths that contain templates here, relative to this directory. diff --git a/docs/releases.rst b/docs/releases.rst index 62ee0025..03c6c624 100644 --- a/docs/releases.rst +++ b/docs/releases.rst @@ -9,6 +9,8 @@ v1.3.3 (unreleased) New Features ~~~~~~~~~~~~ +- Added experimental ManifestStore (:pull:`490`). + Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/pyproject.toml b/pyproject.toml index 2d508a27..ef9df5fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,9 @@ remote = [ "aiohttp", "s3fs", ] - +obstore = [ + "obstore>=0.5.1", +] # non-kerchunk-based readers hdf = [ "virtualizarr[remote]", @@ -172,11 +174,11 @@ run-tests-html-cov = { cmd = "pytest -n auto --run-network-tests --verbose --cov [tool.pixi.environments] min-deps = ["dev", "hdf", "hdf5", "hdf5-lib"] # VirtualiZarr/conftest.py using h5py, so the minimum set of dependencies for testing still includes hdf libs # Inherit from min-deps to get all the test commands, along with optional dependencies -test = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib"] -test-py311 = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "py311"] # test against python 3.11 -test-py312 = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "py312"] # test against python 3.12 +test = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore"] +test-py311 = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py311"] # test against python 3.11 +test-py312 = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py312"] # test against python 3.12 upstream = ["dev", "hdf", "hdf5", "hdf5-lib", "netcdf3", "upstream", "icechunk-dev"] -all = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "all_readers", "all_writers"] +all = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "all_readers", "all_writers"] docs = ["docs"] diff --git a/virtualizarr/manifests/__init__.py b/virtualizarr/manifests/__init__.py index c317ed6a..1aca444a 100644 --- a/virtualizarr/manifests/__init__.py +++ b/virtualizarr/manifests/__init__.py @@ -2,4 +2,6 @@ # This is just to avoid conflicting with some type of file called manifest that .gitignore recommends ignoring. from .array import ManifestArray # type: ignore # noqa +from .group import ManifestGroup # type: ignore # noqa from .manifest import ChunkEntry, ChunkManifest # type: ignore # noqa +from .store import ManifestStore # type: ignore # noqa diff --git a/virtualizarr/manifests/group.py b/virtualizarr/manifests/group.py new file mode 100644 index 00000000..70b0879b --- /dev/null +++ b/virtualizarr/manifests/group.py @@ -0,0 +1,37 @@ +from typing import Mapping, TypeAlias + +from zarr.core.group import GroupMetadata + +from virtualizarr.manifests import ManifestArray + +ManifestArrayVariableMapping: TypeAlias = dict[str, ManifestArray] + + +class ManifestGroup: + """ + Virtualized representation of multiple ManifestArrays as a Zarr Group. + """ + + # TODO: Consider refactoring according to https://github.com/zarr-developers/VirtualiZarr/pull/490#discussion_r2007805272 + _manifest_arrays: Mapping[str, ManifestArray] + _metadata: GroupMetadata + + def __init__( + self, + manifest_arrays: ManifestArrayVariableMapping, + attributes: dict, + ) -> None: + """ + Create a ManifestGroup from the dictionary of ManifestArrays and the group / dataset level metadata + + Parameters + ---------- + attributes : attributes to include in Group metadata + manifest_dict : ManifestArrayVariableMapping + """ + + self._metadata = GroupMetadata(attributes=attributes) + self._manifest_arrays = manifest_arrays + + def __str__(self) -> str: + return f"ManifestGroup(manifest_arrays={self._manifest_arrays}, metadata={self._metadata})" diff --git a/virtualizarr/manifests/store.py b/virtualizarr/manifests/store.py new file mode 100644 index 00000000..22c6295d --- /dev/null +++ b/virtualizarr/manifests/store.py @@ -0,0 +1,377 @@ +from __future__ import annotations + +import pickle +from collections.abc import Iterable +from typing import TYPE_CHECKING, Any + +from zarr.abc.store import ( + ByteRequest, + OffsetByteRequest, + RangeByteRequest, + Store, + SuffixByteRequest, +) +from zarr.core.buffer import Buffer +from zarr.core.buffer.core import BufferPrototype + +from virtualizarr.manifests.group import ManifestGroup + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator, Iterable + from typing import Any + + from zarr.core.buffer import BufferPrototype + from zarr.core.common import BytesLike + + +__all__ = ["ManifestStore"] + +_ALLOWED_EXCEPTIONS: tuple[type[Exception], ...] = ( + FileNotFoundError, + IsADirectoryError, + NotADirectoryError, +) + +from collections.abc import AsyncGenerator +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, TypeAlias + +from zarr.core.buffer import default_buffer_prototype + +from virtualizarr.manifests.group import ManifestArrayVariableMapping +from virtualizarr.vendor.zarr.metadata import dict_to_buffer + +if TYPE_CHECKING: + from obstore.store import ObjectStore + + StoreDict: TypeAlias = dict[str, ObjectStore] + + +@dataclass +class StoreRequest: + """Dataclass for matching a key to the store instance""" + + store: ObjectStore + """The ObjectStore instance to use for making the request.""" + key: str + """The key within the store to request.""" + + +async def list_dir_from_manifest_arrays( + manifest_arrays: ManifestArrayVariableMapping, prefix: str +) -> AsyncGenerator[str]: + """Create the expected results for Zarr's `store.list_dir()` from an Xarray DataArrray or Dataset + + Parameters + ---------- + manifest_arrays : ManifestArrayVariableMapping + prefix : str + + + Returns + ------- + AsyncIterator[str] + """ + # Start with expected group level metadata + raise NotImplementedError + + +def get_zarr_metadata(manifest_group: ManifestGroup, key: str) -> Buffer: + """ + Generate the expected Zarr V3 metadata from a virtual dataset. + + Group metadata is returned for all Datasets and Array metadata + is returned for all DataArrays. + + Combines the ManifestArray metadata with the attrs from the DataArray + and adds `dimension_names` for all arrays if not already provided. + + Parameters + ---------- + manifest_group : ManifestGroup + key : str + + Returns + ------- + Buffer + """ + # If requesting the root metadata, return the standard group metadata with additional dataset specific attributes + + if key == "zarr.json": + metadata = manifest_group._metadata.to_dict() + return dict_to_buffer(metadata, prototype=default_buffer_prototype()) + else: + var, _ = key.split("/") + metadata = manifest_group._manifest_arrays[var].metadata.to_dict() + return dict_to_buffer(metadata, prototype=default_buffer_prototype()) + + +def parse_manifest_index( + key: str, chunk_key_encoding: str = "." +) -> tuple[str, tuple[int, ...]]: + """ + Splits `key` provided to a zarr store into the variable indicated + by the first part and the chunk index from the 3rd through last parts, + which can be used to index into the ndarrays containing paths, offsets, + and lengths in ManifestArrays. + + Currently only works for 1d+ arrays with a tree depth of one from the + root Zarr group. + + Parameters + ---------- + key : str + chunk_key_encoding : str + + Returns + ------- + ManifestIndex + """ + parts = key.split("/") + var = parts[0] + # Assume "c" is the second part + # TODO: Handle scalar array case with "c" holds the data + if chunk_key_encoding == "/": + indexes = tuple(int(ind) for ind in parts[2:]) + else: + indexes = tuple(int(ind) for ind in parts[2].split(chunk_key_encoding)) + return var, indexes + + +def find_matching_store(stores: StoreDict, request_key: str) -> StoreRequest: + """ + Find the matching store based on the store keys and the beginning of the URI strings, + to fetch data from the appropriately configured ObjectStore. + + Parameters: + ----------- + stores : StoreDict + A dictionary with URI prefixes for different stores as keys + request_key : str + A string to match against the dictionary keys + + Returns: + -------- + StoreRequest + """ + # Sort keys by length in descending order to ensure longer, more specific matches take precedence + sorted_keys = sorted(stores.keys(), key=len, reverse=True) + + # Check each key to see if it's a prefix of the uri_string + for key in sorted_keys: + if request_key.startswith(key): + return StoreRequest(store=stores[key], key=request_key[len(key) :]) + # if no match is found, raise an error + raise ValueError( + f"Expected the one of stores.keys() to match the data prefix, got {stores.keys()} and {request_key}" + ) + + +class ManifestStore(Store): + """A read-only Zarr store that uses obstore to access data on AWS, GCP, Azure. The requests + from the Zarr API are redirected using the :class:`virtualizarr.manifests.ManifestGroup` containing + multiple :class:`virtualizarr.manifests.ManifestArray`, + allowing for virtually interfacing with underlying data in other file format. + + + Parameters + ---------- + manifest_group : ManifestGroup + Manifest Group containing Group metadata and mapping variable names to ManifestArrays + stores : dict[prefix, :class:`obstore.store.ObjectStore`] + A mapping of url prefixes to obstore Store instances set up with the proper credentials. + + The prefixes are matched to the URIs in the ManifestArrays to determine which store to + use for making requests. + + Warnings + -------- + ManifestStore is experimental and subject to API changes without notice. Please + raise an issue with any comments/concerns about the store. + + Notes + ----- + Modified from https://github.com/zarr-developers/zarr-python/pull/1661 + """ + + _manifest_group: ManifestGroup + _stores: StoreDict + + def __eq__(self, value: object): + NotImplementedError + + def __init__( + self, + manifest_group: ManifestGroup, + *, + stores: StoreDict, # TODO: Consider using a sequence of tuples rather than a dict (see https://github.com/zarr-developers/VirtualiZarr/pull/490#discussion_r2010717898). + ) -> None: + """Instantiate a new ManifestStore + + Parameters + ---------- + manifest_group : ManifestGroup + Manifest Group containing Group metadata and mapping variable names to ManifestArrays + stores : dict[prefix, :class:`obstore.store.ObjectStore`] + A mapping of url prefixes to obstore Store instances set up with the proper credentials. + + The prefixes are matched to the URIs in the ManifestArrays to determine which store to + use for making requests. + """ + for store in stores.values(): + if not store.__class__.__module__.startswith("obstore"): + raise TypeError(f"expected ObjectStore class, got {store!r}") + # TODO: Don't allow stores with prefix + # TODO: Type check the manifest arrays + super().__init__(read_only=True) + self._stores = stores + self._manifest_group = manifest_group + + def __str__(self) -> str: + return f"ManifestStore({self._manifest_group}, {self._stores})" + + def __getstate__(self) -> dict[Any, Any]: + state = self.__dict__.copy() + stores = state["_stores"].copy() + for k, v in stores.items(): + stores[k] = pickle.dumps(v) + state["_stores"] = stores + return state + + def __setstate__(self, state: dict[Any, Any]) -> None: + stores = state["_stores"].copy() + for k, v in stores.items(): + stores[k] = pickle.loads(v) + state["_stores"] = stores + self.__dict__.update(state) + + async def get( + self, + key: str, + prototype: BufferPrototype, + byte_range: ByteRequest | None = None, + ) -> Buffer | None: + # docstring inherited + import obstore as obs + + if key.endswith("zarr.json"): + return get_zarr_metadata(self._manifest_group, key) + var, chunk_key = parse_manifest_index(key) + marr = self._manifest_group._manifest_arrays[var] + manifest = marr._manifest + + path = manifest._paths[*chunk_key] + offset = manifest._offsets[*chunk_key] + length = manifest._lengths[*chunk_key] + # Get the configured object store instance that matches the path + store_request = find_matching_store(stores=self._stores, request_key=path) + # Transform the input byte range to account for the chunk location in the file + chunk_end_exclusive = offset + length + byte_range = _transform_byte_range( + byte_range, chunk_start=offset, chunk_end_exclusive=chunk_end_exclusive + ) + # Actually get the bytes + try: + bytes = await obs.get_range_async( + store_request.store, + store_request.key, + start=byte_range.start, + end=byte_range.end, + ) + return prototype.buffer.from_bytes(bytes) # type: ignore[arg-type] + except _ALLOWED_EXCEPTIONS: + return None + + async def get_partial_values( + self, + prototype: BufferPrototype, + key_ranges: Iterable[tuple[str, ByteRequest | None]], + ) -> list[Buffer | None]: + # docstring inherited + # TODO: Implement using private functions from the upstream Zarr obstore integration + raise NotImplementedError + + async def exists(self, key: str) -> bool: + # docstring inherited + raise NotImplementedError + + @property + def supports_writes(self) -> bool: + # docstring inherited + return False + + async def set(self, key: str, value: Buffer) -> None: + # docstring inherited + raise NotImplementedError + + async def set_if_not_exists(self, key: str, value: Buffer) -> None: + # docstring inherited + raise NotImplementedError + + @property + def supports_deletes(self) -> bool: + # docstring inherited + return False + + async def delete(self, key: str) -> None: + raise NotImplementedError + + @property + def supports_partial_writes(self) -> bool: + # docstring inherited + return False + + async def set_partial_values( + self, key_start_values: Iterable[tuple[str, int, BytesLike]] + ) -> None: + # docstring inherited + raise NotImplementedError + + @property + def supports_listing(self) -> bool: + # docstring inherited + return True + + def list(self) -> AsyncGenerator[str, None]: + # docstring inherited + raise NotImplementedError + + def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited + raise NotImplementedError + + async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited + yield "zarr.json" + for k in self._manifest_group._manifest_arrays.keys(): + yield k + + +def _transform_byte_range( + byte_range: ByteRequest | None, *, chunk_start: int, chunk_end_exclusive: int +) -> RangeByteRequest: + """ + Convert an incoming byte_range which assumes one chunk per file to a + virtual byte range that accounts for the location of a chunk within a file. + """ + if byte_range is None: + byte_range = RangeByteRequest(chunk_start, chunk_end_exclusive) + elif isinstance(byte_range, RangeByteRequest): + if byte_range.end > chunk_end_exclusive: + raise ValueError( + f"Chunk ends before byte {chunk_end_exclusive} but request end was {byte_range.end}" + ) + byte_range = RangeByteRequest( + chunk_start + byte_range.start, chunk_start + byte_range.end + ) + elif isinstance(byte_range, OffsetByteRequest): + byte_range = RangeByteRequest( + chunk_start + byte_range.offset, chunk_end_exclusive + ) # type: ignore[arg-type] + elif isinstance(byte_range, SuffixByteRequest): + byte_range = RangeByteRequest( + chunk_end_exclusive - byte_range.suffix, chunk_end_exclusive + ) # type: ignore[arg-type] + else: + raise ValueError(f"Unexpected byte_range, got {byte_range}") + return byte_range diff --git a/virtualizarr/tests/__init__.py b/virtualizarr/tests/__init__.py index 09d36d3d..6617ac79 100644 --- a/virtualizarr/tests/__init__.py +++ b/virtualizarr/tests/__init__.py @@ -39,6 +39,7 @@ def _importorskip( has_imagecodecs, requires_imagecodecs = _importorskip("imagecodecs") has_hdf5plugin, requires_hdf5plugin = _importorskip("hdf5plugin") has_zarr_python, requires_zarr_python = _importorskip("zarr") +has_obstore, requires_obstore = _importorskip("obstore") parametrize_over_hdf_backends = pytest.mark.parametrize( "hdf_backend", diff --git a/virtualizarr/tests/test_manifests/test_group.py b/virtualizarr/tests/test_manifests/test_group.py new file mode 100644 index 00000000..1e1abd10 --- /dev/null +++ b/virtualizarr/tests/test_manifests/test_group.py @@ -0,0 +1,36 @@ +import pytest +from zarr.core.group import GroupMetadata + +from virtualizarr.manifests import ChunkManifest, ManifestArray, ManifestGroup + + +@pytest.fixture +def manifest_array(array_v3_metadata): + chunk_dict = { + "0.0.0": {"path": "s3://bucket/foo.nc", "offset": 100, "length": 100}, + "0.0.1": {"path": "s3://bucket/foo.nc", "offset": 200, "length": 100}, + "0.1.0": {"path": "s3://bucket/foo.nc", "offset": 300, "length": 100}, + "0.1.1": {"path": "s3://bucket/foo.nc", "offset": 400, "length": 100}, + } + manifest = ChunkManifest(entries=chunk_dict) + chunks = (5, 1, 10) + shape = (5, 2, 20) + array_metadata = array_v3_metadata(shape=shape, chunks=chunks) + return ManifestArray(metadata=array_metadata, chunkmanifest=manifest) + + +class TestManifestGroup: + def test_manifest_array(self, array_v3_metadata, manifest_array): + var = "foo" + manifest_group = ManifestGroup( + manifest_arrays={var: manifest_array}, attributes={} + ) + assert isinstance(manifest_group._manifest_arrays, dict) + assert isinstance(manifest_group._manifest_arrays[var], ManifestArray) + assert isinstance(manifest_group._metadata, GroupMetadata) + + def test_manifest_repr(self, manifest_array): + manifest_group = ManifestGroup( + manifest_arrays={"foo": manifest_array}, attributes={} + ) + assert str(manifest_group) diff --git a/virtualizarr/tests/test_manifests/test_store.py b/virtualizarr/tests/test_manifests/test_store.py new file mode 100644 index 00000000..7ff27d30 --- /dev/null +++ b/virtualizarr/tests/test_manifests/test_store.py @@ -0,0 +1,142 @@ +import json +import pickle + +import pytest +from zarr.abc.store import ( + OffsetByteRequest, + RangeByteRequest, + SuffixByteRequest, +) +from zarr.core.buffer import default_buffer_prototype +from zarr.core.sync import _collect_aiterator + +from virtualizarr.manifests import ( + ChunkManifest, + ManifestArray, + ManifestGroup, + ManifestStore, +) +from virtualizarr.tests import ( + requires_obstore, +) + + +@pytest.fixture() +@requires_obstore +def filepath(tmpdir): + import obstore as obs + + store = obs.store.LocalStore(prefix=tmpdir) + filepath = "data.tmp" + obs.put( + store, + filepath, + b"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15\x16", + ) + return f"{tmpdir}/{filepath}" + + +@requires_obstore +@pytest.fixture() +def manifest_store(filepath, array_v3_metadata): + import obstore as obs + + chunk_dict = { + "0.0": {"path": f"file://{filepath}", "offset": 0, "length": 4}, + "0.1": {"path": f"file://{filepath}", "offset": 4, "length": 4}, + "1.0": {"path": f"file://{filepath}", "offset": 8, "length": 4}, + "1.1": {"path": f"file://{filepath}", "offset": 12, "length": 4}, + } + manifest = ChunkManifest(entries=chunk_dict) + chunks = (1, 4) + shape = (2, 8) + array_metadata = array_v3_metadata(shape=shape, chunks=chunks) + manifest_array = ManifestArray(metadata=array_metadata, chunkmanifest=manifest) + manifest_group = ManifestGroup( + {"foo": manifest_array, "bar": manifest_array}, attributes={"Zarr": "Hooray!"} + ) + return ManifestStore( + stores={"file://": obs.store.LocalStore()}, manifest_group=manifest_group + ) + + +@pytest.mark.asyncio +@requires_obstore +class TestManifestStore: + def test_manifest_store_properties(self, manifest_store): + assert manifest_store.read_only + assert manifest_store.supports_listing + assert not manifest_store.supports_deletes + assert not manifest_store.supports_writes + assert not manifest_store.supports_partial_writes + + async def test_get_data(self, manifest_store): + observed = await manifest_store.get( + "foo/c/0.0", prototype=default_buffer_prototype() + ) + assert observed.to_bytes() == b"\x01\x02\x03\x04" + observed = await manifest_store.get( + "foo/c/1.0", prototype=default_buffer_prototype() + ) + assert observed.to_bytes() == b"\x09\x10\x11\x12" + observed = await manifest_store.get( + "foo/c/0.0", + prototype=default_buffer_prototype(), + byte_range=RangeByteRequest(start=1, end=2), + ) + assert observed.to_bytes() == b"\x02" + observed = await manifest_store.get( + "foo/c/0.0", + prototype=default_buffer_prototype(), + byte_range=OffsetByteRequest(offset=1), + ) + assert observed.to_bytes() == b"\x02\x03\x04" + observed = await manifest_store.get( + "foo/c/0.0", + prototype=default_buffer_prototype(), + byte_range=SuffixByteRequest(suffix=2), + ) + assert observed.to_bytes() == b"\x03\x04" + + async def test_get_metadata(self, manifest_store): + observed = await manifest_store.get( + "foo/zarr.json", prototype=default_buffer_prototype() + ) + metadata = json.loads(observed.to_bytes()) + assert metadata["chunk_grid"]["configuration"]["chunk_shape"] == [1, 4] + assert metadata["node_type"] == "array" + assert metadata["zarr_format"] == 3 + + observed = await manifest_store.get( + "zarr.json", prototype=default_buffer_prototype() + ) + metadata = json.loads(observed.to_bytes()) + assert metadata["node_type"] == "group" + assert metadata["zarr_format"] == 3 + assert metadata["attributes"]["Zarr"] == "Hooray!" + + async def test_pickling(self, manifest_store): + new_store = pickle.loads(pickle.dumps(manifest_store)) + assert isinstance(new_store, ManifestStore) + # Check new store works + observed = await manifest_store.get( + "foo/c/0.0", prototype=default_buffer_prototype() + ) + assert observed.to_bytes() == b"\x01\x02\x03\x04" + # Check old store works + observed = await new_store.get( + "foo/c/0.0", prototype=default_buffer_prototype() + ) + assert observed.to_bytes() == b"\x01\x02\x03\x04" + + async def test_list_dir(self, manifest_store) -> None: + observed = await _collect_aiterator(manifest_store.list_dir("")) + assert observed == ("zarr.json", "foo", "bar") + + async def test_store_raises(self, manifest_store) -> None: + with pytest.raises(NotImplementedError): + await manifest_store.set("foo/zarr.json", 1) + with pytest.raises(NotImplementedError): + await manifest_store.set_if_not_exists("foo/zarr.json", 1) + with pytest.raises(NotImplementedError): + await manifest_store.delete("foo") diff --git a/virtualizarr/vendor/zarr/metadata.py b/virtualizarr/vendor/zarr/metadata.py new file mode 100644 index 00000000..bc53de84 --- /dev/null +++ b/virtualizarr/vendor/zarr/metadata.py @@ -0,0 +1,32 @@ +import json +from typing import Any + +import numpy as np +from zarr.core.buffer import Buffer, BufferPrototype +from zarr.core.metadata.v3 import V3JsonEncoder + + +def _replace_special_floats(obj: object) -> Any: + """Helper function to replace NaN/Inf/-Inf values with special strings + + Note: this cannot be done in the V3JsonEncoder because Python's `json.dumps` optimistically + converts NaN/Inf values to special types outside of the encoding step. + """ + if isinstance(obj, float): + if np.isnan(obj): + return "NaN" + elif np.isinf(obj): + return "Infinity" if obj > 0 else "-Infinity" + elif isinstance(obj, dict): + # Recursively replace in dictionaries + return {k: _replace_special_floats(v) for k, v in obj.items()} + elif isinstance(obj, list): + # Recursively replace in lists + return [_replace_special_floats(item) for item in obj] + return obj + + +def dict_to_buffer(input: dict, prototype: BufferPrototype) -> Buffer: + # modified from ArrayV3Metadata.to_buffer_dict + d = _replace_special_floats(input) + return prototype.buffer.from_bytes(json.dumps(d, cls=V3JsonEncoder).encode())