From 20f23d0d1f5c2095bc500606f5cf28b1372bde5c Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Mon, 31 Mar 2025 17:27:19 -0400 Subject: [PATCH 01/15] Add deps --- pyproject.toml | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b6f8a58a2..52e30c2f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,11 +66,16 @@ fits = [ "kerchunk>=0.2.8", "astropy", ] +tif = [ + "obstore>=0.5.1", + "async-tiff @ git+https://github.com/developmentseed/async-tiff#subdirectory=python", +] all_readers = [ "virtualizarr[hdf]", "virtualizarr[hdf5]", "virtualizarr[netcdf3]", "virtualizarr[fits]", + "virtualizarr[tif]", ] # writers @@ -168,6 +173,9 @@ h5netcdf = ">=1.5.0,<2" [tool.pixi.feature.icechunk-dev.dependencies] rust = "*" +[tool.pixi.feature.rio.dependencies] +rioxarray = "*" + # Define commands to run within the test environments [tool.pixi.feature.test.tasks] run-mypy = { cmd = "mypy virtualizarr" } @@ -181,12 +189,12 @@ run-tests-html-cov = { cmd = "pytest -n auto --run-network-tests --verbose --cov [tool.pixi.environments] min-deps = ["dev", "test", "hdf", "hdf5", "hdf5-lib"] # VirtualiZarr/conftest.py using h5py, so the minimum set of dependencies for testing still includes hdf libs # Inherit from min-deps to get all the test commands, along with optional dependencies -test = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore"] -test-py311 = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py311"] # test against python 3.11 -test-py312 = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py312"] # test against python 3.12 +test = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "tif", "rio"] +test-py311 = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "tif", "rio", "py311"] # test against python 3.11 +test-py312 = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "tif", "rio", "py312"] # test against python 3.12 minio = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py312", "minio"] upstream = ["dev", "test", "hdf", "hdf5", "hdf5-lib", "netcdf3", "upstream", "icechunk-dev"] -all = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "all_readers", "all_writers"] +all = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "tif", "rio", "all_readers", "all_writers"] docs = ["docs"] # Define commands to run within the docs environment @@ -222,7 +230,9 @@ module = [ "minio", "numcodecs.*", "ujson", - "zarr", + "zarr.*", + "async_tiff.*", + "obstore.*", ] ignore_missing_imports = true From d3d2a1de91bcf0aa421868af80c4499a969c3ee3 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Mon, 31 Mar 2025 17:27:35 -0400 Subject: [PATCH 02/15] Add option for dimension names --- virtualizarr/manifests/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/virtualizarr/manifests/utils.py b/virtualizarr/manifests/utils.py index 6b7faa646..fefe75b5d 100644 --- a/virtualizarr/manifests/utils.py +++ b/virtualizarr/manifests/utils.py @@ -17,6 +17,7 @@ def create_v3_array_metadata( fill_value: Any = None, codecs: Optional[list[Dict[str, Any]]] = None, attributes: Optional[Dict[str, Any]] = None, + dimension_names: Optional[tuple[str, ...]] = None, ) -> ArrayV3Metadata: """ Create an ArrayV3Metadata instance with standard configuration. @@ -36,6 +37,8 @@ def create_v3_array_metadata( List of codec configurations attributes : Dict[str, Any], optional Additional attributes for the array + dimension_names : tuple[str], optional + Names of the dimensions Returns ------- @@ -56,7 +59,7 @@ def create_v3_array_metadata( dtype=data_type, ), attributes=attributes or {}, - dimension_names=None, + dimension_names=dimension_names, storage_transformers=None, ) From ed073006b2f4df04c3cd6ff7e7aea6a8e0209ab7 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Mon, 31 Mar 2025 17:28:00 -0400 Subject: [PATCH 03/15] Move filter dataclasses to common --- virtualizarr/readers/common.py | 30 +++++++++++++++++++++++------ virtualizarr/readers/hdf/filters.py | 28 +++++++-------------------- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/virtualizarr/readers/common.py b/virtualizarr/readers/common.py index f21f8544b..4f1c50178 100644 --- a/virtualizarr/readers/common.py +++ b/virtualizarr/readers/common.py @@ -1,17 +1,35 @@ +import dataclasses from collections.abc import Iterable, Mapping -from typing import ( - Any, - Hashable, - MutableMapping, - Optional, -) +from typing import Any, Hashable, MutableMapping, Optional, TypedDict +import numpy as np import xarray as xr import xarray.indexes +from numcodecs.abc import Codec from virtualizarr.utils import _FsspecFSFromFilepath +@dataclasses.dataclass +class ZstdProperties: + level: int + + +@dataclasses.dataclass +class ShuffleProperties: + elementsize: int + + +@dataclasses.dataclass +class ZlibProperties: + level: int + + +class CFCodec(TypedDict): + target_dtype: np.dtype + codec: Codec + + def construct_fully_virtual_dataset( virtual_vars: Mapping[str, xr.Variable], coord_names: Iterable[str] | None = None, diff --git a/virtualizarr/readers/hdf/filters.py b/virtualizarr/readers/hdf/filters.py index f0d1e8eba..ddfb86eaa 100644 --- a/virtualizarr/readers/hdf/filters.py +++ b/virtualizarr/readers/hdf/filters.py @@ -1,5 +1,5 @@ import dataclasses -from typing import TYPE_CHECKING, List, Tuple, TypedDict, Union +from typing import TYPE_CHECKING, List, Tuple, Union import numcodecs.registry as registry import numpy as np @@ -12,6 +12,12 @@ if TYPE_CHECKING: import h5py # type: ignore from h5py import Dataset, Group # type: ignore +from virtualizarr.readers.common import ( + CFCodec, + ShuffleProperties, + ZlibProperties, + ZstdProperties, +) h5py = soft_import("h5py", "For reading hdf files", strict=False) if h5py: @@ -52,26 +58,6 @@ def __post_init__(self): self.cname = blosc_compressor_codes[self.cname] -@dataclasses.dataclass -class ZstdProperties: - level: int - - -@dataclasses.dataclass -class ShuffleProperties: - elementsize: int - - -@dataclasses.dataclass -class ZlibProperties: - level: int - - -class CFCodec(TypedDict): - target_dtype: np.dtype - codec: Codec - - def _filter_to_codec( filter_id: str, filter_properties: Union[int, None, Tuple] = None ) -> Codec: From 18af38f30ae2544b6eed8ac798d5116956a50943 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Mon, 31 Mar 2025 17:28:29 -0400 Subject: [PATCH 04/15] Add test --- .../tests/test_readers/test_tiff/conftest.py | 13 ++++++++++ .../tests/test_readers/test_tiff/test_tiff.py | 24 +++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 virtualizarr/tests/test_readers/test_tiff/conftest.py create mode 100644 virtualizarr/tests/test_readers/test_tiff/test_tiff.py diff --git a/virtualizarr/tests/test_readers/test_tiff/conftest.py b/virtualizarr/tests/test_readers/test_tiff/conftest.py new file mode 100644 index 000000000..12e17ba17 --- /dev/null +++ b/virtualizarr/tests/test_readers/test_tiff/conftest.py @@ -0,0 +1,13 @@ +from pathlib import Path + +import pytest +import xarray as xr + + +@pytest.fixture +def geotiff_file(tmp_path: Path) -> str: + """Create a NetCDF4 file with air temperature data.""" + filepath = tmp_path / "air.tif" + with xr.tutorial.open_dataset("air_temperature") as ds: + ds.isel(time=0).rio.to_raster(filepath) + return str(filepath) diff --git a/virtualizarr/tests/test_readers/test_tiff/test_tiff.py b/virtualizarr/tests/test_readers/test_tiff/test_tiff.py new file mode 100644 index 000000000..63774f581 --- /dev/null +++ b/virtualizarr/tests/test_readers/test_tiff/test_tiff.py @@ -0,0 +1,24 @@ +import numpy as np +import xarray as xr + +from virtualizarr.readers import TIFFVirtualBackend +from virtualizarr.tests import requires_asynctiff, requires_rioxarray + + +@requires_asynctiff +@requires_rioxarray +def test_read_geotiff(geotiff_file): + import rioxarray + from obstore.store import LocalStore + + store = LocalStore() + kwargs = { + "file_id": "file://", + "store": store, + } + ds = TIFFVirtualBackend.open_virtual_dataset( + filepath=geotiff_file, group="0", virtual_backend_kwargs=kwargs + ) + assert isinstance(ds, xr.Dataset) + da_expected = rioxarray.open_rasterio(geotiff_file) + np.testing.assert_allclose(ds["0"].data, da_expected.data.squeeze()) From 35aae28f832bb7b4cbf8beece64ec76257ea0b35 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Mon, 31 Mar 2025 17:28:56 -0400 Subject: [PATCH 05/15] Add importorskip for tests --- virtualizarr/tests/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virtualizarr/tests/__init__.py b/virtualizarr/tests/__init__.py index 58682e32f..a89f310e1 100644 --- a/virtualizarr/tests/__init__.py +++ b/virtualizarr/tests/__init__.py @@ -37,12 +37,13 @@ def _importorskip( has_s3fs, requires_s3fs = _importorskip("s3fs") has_lithops, requires_lithops = _importorskip("lithops") has_scipy, requires_scipy = _importorskip("scipy") -has_tifffile, requires_tifffile = _importorskip("tifffile") +has_asynctiff, requires_asynctiff = _importorskip("async_tiff") has_imagecodecs, requires_imagecodecs = _importorskip("imagecodecs") has_hdf5plugin, requires_hdf5plugin = _importorskip("hdf5plugin") has_zarr_python, requires_zarr_python = _importorskip("zarr") has_dask, requires_dask = _importorskip("dask") has_obstore, requires_obstore = _importorskip("obstore") +has_rioxarray, requires_rioxarray = _importorskip("rioxarray") parametrize_over_hdf_backends = pytest.mark.parametrize( "hdf_backend", From ffd17f7a17ed73944315ef7f176263565a32692f Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Mon, 31 Mar 2025 17:29:11 -0400 Subject: [PATCH 06/15] Start on reader refactor --- virtualizarr/readers/tiff.py | 214 +++++++++++++++++++++++++---------- 1 file changed, 156 insertions(+), 58 deletions(-) diff --git a/virtualizarr/readers/tiff.py b/virtualizarr/readers/tiff.py index 9509564b8..97d2fc5c1 100644 --- a/virtualizarr/readers/tiff.py +++ b/virtualizarr/readers/tiff.py @@ -1,79 +1,177 @@ -import warnings -from pathlib import Path -from typing import Hashable, Iterable, Mapping, Optional +from __future__ import annotations -from xarray import Dataset, Index +import dataclasses +import math +from typing import ( + TYPE_CHECKING, + Any, + Iterable, + Mapping, + Optional, +) -from virtualizarr.readers.api import VirtualBackend -from virtualizarr.readers.common import ( - construct_fully_virtual_dataset, - replace_virtual_with_loadable_vars, +import numcodecs.registry as registry +from zarr.core.sync import sync + +from virtualizarr.codecs import numcodec_config_to_configurable +from virtualizarr.manifests import ( + ChunkManifest, + ManifestArray, + ManifestGroup, + ManifestStore, ) -from virtualizarr.translators.kerchunk import ( - extract_group, - virtual_vars_and_metadata_from_kerchunk_refs, +from virtualizarr.manifests.utils import create_v3_array_metadata +from virtualizarr.readers.api import ( + VirtualBackend, +) + +if TYPE_CHECKING: + from async_tiff import TIFF, ImageFileDirectory + from obstore.store import ObjectStore + from zarr.core.abc.store import Store + + +import numpy as np +import xarray as xr + +from virtualizarr.readers.common import ( + ZlibProperties, ) -from virtualizarr.types.kerchunk import KerchunkStoreRefs + + +def _get_dtype(sample_format, bits_per_sample): + if sample_format[0] == 1 and bits_per_sample[0] == 16: + return np.dtype(np.uint16) + else: + raise NotImplementedError + + +def _get_codecs(compression): + if compression == 8: # Adobe DEFLATE + zlib_props = ZlibProperties(level=6) # type: ignore + conf = dataclasses.asdict(zlib_props) + conf["id"] = "zlib" + else: + raise NotImplementedError + codec = registry.get_codec(conf) + return codec class TIFFVirtualBackend(VirtualBackend): @staticmethod - def open_virtual_dataset( - filepath: str, - group: str | None = None, - drop_variables: Iterable[str] | None = None, - loadable_variables: Iterable[str] | None = None, - decode_times: bool | None = None, - indexes: Mapping[str, Index] | None = None, - virtual_backend_kwargs: Optional[dict] = None, - reader_options: Optional[dict] = None, - ) -> Dataset: - if virtual_backend_kwargs: - raise NotImplementedError( - "TIFF reader does not understand any virtual_backend_kwargs" - ) + def _construct_chunk_manifest( + ifd: ImageFileDirectory, + *, + path: str, + shape: tuple[int, ...], + chunks: tuple[int, ...], + ) -> ChunkManifest: + tile_shape = tuple(math.ceil(a / b) for a, b in zip(shape, chunks)) + # See https://web.archive.org/web/20240329145228/https://www.awaresystems.be/imaging/tiff/tifftags/tileoffsets.html for ordering of offsets. + tile_offsets = np.array(ifd.tile_offsets, dtype=np.uint64).reshape(tile_shape) + tile_counts = np.array(ifd.tile_byte_counts, dtype=np.uint64).reshape( + tile_shape + ) + paths = np.full_like(tile_offsets, path, dtype=np.dtypes.StringDType) + return ChunkManifest.from_arrays( + paths=paths, + offsets=tile_offsets, + lengths=tile_counts, + ) - from kerchunk.tiff import tiff_to_zarr + @staticmethod + async def _open_tiff(*, path: str, store: ObjectStore) -> TIFF: + from async_tiff import TIFF - if reader_options is None: - reader_options = {} + return await TIFF.open(path, store=store) - reader_options.pop("storage_options", {}) - warnings.warn( - "storage_options have been dropped from reader_options as they are not supported by kerchunk.tiff.tiff_to_zarr", - UserWarning, + @staticmethod + def _construct_manifest_array( + *, ifd: ImageFileDirectory, path: str + ) -> ManifestArray: + if not ifd.tile_height or not ifd.tile_width: + raise NotImplementedError( + f"TIFF reader currently only supports tiled TIFFs, but {path} has no internal tiling." + ) + chunks = (ifd.tile_height, ifd.tile_height) + shape = (ifd.image_height, ifd.image_width) + chunk_manifest = TIFFVirtualBackend._construct_chunk_manifest( + ifd, path=path, shape=shape, chunks=chunks ) + codecs = [_get_codecs(ifd.compression)] + codec_configs = [ + numcodec_config_to_configurable(codec.get_config()) for codec in codecs + ] + dimension_names = ("y", "x") # Folllowing rioxarray's behavior - _drop_vars: list[Hashable] = ( - [] if drop_variables is None else list(drop_variables) + metadata = create_v3_array_metadata( + shape=shape, + data_type=_get_dtype( + sample_format=ifd.sample_format, bits_per_sample=ifd.bits_per_sample + ), + chunk_shape=chunks, + fill_value=None, # TODO: Fix fill value + codecs=codec_configs, + dimension_names=dimension_names, ) + return ManifestArray(metadata=metadata, chunkmanifest=chunk_manifest) - # handle inconsistency in kerchunk, see GH issue https://github.com/zarr-developers/VirtualiZarr/issues/160 - refs = KerchunkStoreRefs({"refs": tiff_to_zarr(filepath, **reader_options)}) - - # both group=None and group='' mean to read root group + @staticmethod + def _construct_manifest_group( + store: ObjectStore, + path: str, + *, + group: str | None = None, + ) -> ManifestGroup: + """ + Construct a virtual Group from a tiff file. + """ + # TODO: Make an async approach + tiff = sync(TIFFVirtualBackend._open_tiff(store=store, path=path)) + attrs: dict[str, Any] = {} + manifest_dict = {} if group: - refs = extract_group(refs, group) + manifest_dict[group] = TIFFVirtualBackend._construct_manifest_array( + ifd=tiff.ifds[int(group)], path=path + ) + else: + for ind, ifd in enumerate(tiff.ifds): + manifest_dict[str(ind)] = TIFFVirtualBackend._construct_manifest_array( + ifd=ifd, path=path + ) + return ManifestGroup(manifest_dict=manifest_dict, attributes=attrs) - virtual_vars, attrs, coord_names = virtual_vars_and_metadata_from_kerchunk_refs( - refs, - fs_root=Path.cwd().as_uri(), - ) + @staticmethod + def _create_manifest_store( + filepath: str, + group: str, + file_id: str, + object_store: ObjectStore, + ) -> Store: + # TODO: Make this less sketchy, but it's better to use an AsyncTIFF store rather than an obstore store + from async_tiff.store import LocalStore as ATStore - fully_virtual_dataset = construct_fully_virtual_dataset( - virtual_vars=virtual_vars, - coord_names=coord_names, - attrs=attrs, - ) + newargs = object_store.__getnewargs_ex__() + at_store = ATStore(*newargs[0], **newargs[1]) - vds = replace_virtual_with_loadable_vars( - fully_virtual_dataset, - filepath, - group=group, - loadable_variables=loadable_variables, - reader_options=reader_options, - indexes=indexes, - decode_times=decode_times, + # Create a group containing dataset level metadata and all the manifest arrays + manifest_group = TIFFVirtualBackend._construct_manifest_group( + store=at_store, path=filepath, group=group + ) + # Convert to a manifest store + return ManifestStore( + stores={file_id: object_store}, manifest_group=manifest_group ) - return vds.drop_vars(_drop_vars) + @staticmethod + def open_virtual_dataset( + filepath: str, + group: str | None = None, + drop_variables: Iterable[str] | None = None, + loadable_variables: Iterable[str] | None = None, + decode_times: bool | None = None, + indexes: Mapping[str, xr.Index] | None = None, + virtual_backend_kwargs: Optional[dict] = None, + reader_options: Optional[dict] = None, + ) -> xr.Dataset: + raise NotImplementedError From d65ea920e5f3d83fa1ffbb125bd4a4097a0a1144 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Mon, 31 Mar 2025 17:47:10 -0400 Subject: [PATCH 07/15] Update typing --- pyproject.toml | 1 - virtualizarr/readers/tiff.py | 24 ++++++++++++------------ 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 52e30c2f2..419ff077e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,7 +100,6 @@ upstream = [ # optional dependencies 'astropy @ git+https://github.com/astropy/astropy', 'fsspec @ git+https://github.com/fsspec/filesystem_spec', - 's3fs @ git+https://github.com/fsspec/s3fs', 'kerchunk @ git+https://github.com/fsspec/kerchunk', 'icechunk @ git+https://github.com/earth-mover/icechunk#subdirectory=icechunk-python', ] diff --git a/virtualizarr/readers/tiff.py b/virtualizarr/readers/tiff.py index 97d2fc5c1..e047bf81d 100644 --- a/virtualizarr/readers/tiff.py +++ b/virtualizarr/readers/tiff.py @@ -27,9 +27,11 @@ if TYPE_CHECKING: from async_tiff import TIFF, ImageFileDirectory - from obstore.store import ObjectStore + from obstore.store import AzureStore, GCSStore, HTTPStore, LocalStore, S3Store from zarr.core.abc.store import Store + SupportedStore = AzureStore | GCSStore | HTTPStore | S3Store | LocalStore + import numpy as np import xarray as xr @@ -80,7 +82,7 @@ def _construct_chunk_manifest( ) @staticmethod - async def _open_tiff(*, path: str, store: ObjectStore) -> TIFF: + async def _open_tiff(*, path: str, store: SupportedStore) -> TIFF: from async_tiff import TIFF return await TIFF.open(path, store=store) @@ -118,7 +120,7 @@ def _construct_manifest_array( @staticmethod def _construct_manifest_group( - store: ObjectStore, + store: SupportedStore, path: str, *, group: str | None = None, @@ -129,24 +131,24 @@ def _construct_manifest_group( # TODO: Make an async approach tiff = sync(TIFFVirtualBackend._open_tiff(store=store, path=path)) attrs: dict[str, Any] = {} - manifest_dict = {} + manifest_arrays = {} if group: - manifest_dict[group] = TIFFVirtualBackend._construct_manifest_array( + manifest_arrays[group] = TIFFVirtualBackend._construct_manifest_array( ifd=tiff.ifds[int(group)], path=path ) else: for ind, ifd in enumerate(tiff.ifds): - manifest_dict[str(ind)] = TIFFVirtualBackend._construct_manifest_array( - ifd=ifd, path=path + manifest_arrays[str(ind)] = ( + TIFFVirtualBackend._construct_manifest_array(ifd=ifd, path=path) ) - return ManifestGroup(manifest_dict=manifest_dict, attributes=attrs) + return ManifestGroup(arrays=manifest_arrays, attributes=attrs) @staticmethod def _create_manifest_store( filepath: str, group: str, file_id: str, - object_store: ObjectStore, + object_store: SupportedStore, ) -> Store: # TODO: Make this less sketchy, but it's better to use an AsyncTIFF store rather than an obstore store from async_tiff.store import LocalStore as ATStore @@ -159,9 +161,7 @@ def _create_manifest_store( store=at_store, path=filepath, group=group ) # Convert to a manifest store - return ManifestStore( - stores={file_id: object_store}, manifest_group=manifest_group - ) + return ManifestStore(stores={file_id: object_store}, group=manifest_group) @staticmethod def open_virtual_dataset( From 7410544eaff8d3ed3019c4d9564c09fa15f93e44 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Mon, 31 Mar 2025 17:52:28 -0400 Subject: [PATCH 08/15] Consolidate test fixtures --- virtualizarr/tests/test_readers/conftest.py | 9 +++++++++ .../tests/test_readers/test_tiff/conftest.py | 13 ------------- 2 files changed, 9 insertions(+), 13 deletions(-) delete mode 100644 virtualizarr/tests/test_readers/test_tiff/conftest.py diff --git a/virtualizarr/tests/test_readers/conftest.py b/virtualizarr/tests/test_readers/conftest.py index e49de78c1..ea7c1f313 100644 --- a/virtualizarr/tests/test_readers/conftest.py +++ b/virtualizarr/tests/test_readers/conftest.py @@ -16,6 +16,15 @@ warnings.warn("hdf5plugin is required for HDF reader") +@pytest.fixture +def geotiff_file(tmp_path: Path) -> str: + """Create a NetCDF4 file with air temperature data.""" + filepath = tmp_path / "air.tif" + with xr.tutorial.open_dataset("air_temperature") as ds: + ds.isel(time=0).rio.to_raster(filepath) + return str(filepath) + + @pytest.fixture def empty_chunks_hdf5_file(tmpdir): ds = xr.Dataset({"data": []}) diff --git a/virtualizarr/tests/test_readers/test_tiff/conftest.py b/virtualizarr/tests/test_readers/test_tiff/conftest.py deleted file mode 100644 index 12e17ba17..000000000 --- a/virtualizarr/tests/test_readers/test_tiff/conftest.py +++ /dev/null @@ -1,13 +0,0 @@ -from pathlib import Path - -import pytest -import xarray as xr - - -@pytest.fixture -def geotiff_file(tmp_path: Path) -> str: - """Create a NetCDF4 file with air temperature data.""" - filepath = tmp_path / "air.tif" - with xr.tutorial.open_dataset("air_temperature") as ds: - ds.isel(time=0).rio.to_raster(filepath) - return str(filepath) From c3dbf27a5d3e493da93cc02bc7f7393490bb23d4 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Mon, 31 Mar 2025 20:35:23 -0400 Subject: [PATCH 09/15] Properly extract chunk keys for arrays with a single chunk --- virtualizarr/manifests/store.py | 36 +++++++++++++++------------------ virtualizarr/manifests/utils.py | 11 ++++++++-- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/virtualizarr/manifests/store.py b/virtualizarr/manifests/store.py index e05e2e5ae..ed8179f96 100644 --- a/virtualizarr/manifests/store.py +++ b/virtualizarr/manifests/store.py @@ -108,18 +108,13 @@ def get_zarr_metadata(manifest_group: ManifestGroup, key: str) -> Buffer: return dict_to_buffer(metadata, prototype=default_buffer_prototype()) -def parse_manifest_index( - key: str, chunk_key_encoding: str = "." -) -> tuple[str, tuple[int, ...]]: +def parse_manifest_index(key: str, chunk_key_encoding: str = ".") -> tuple[int, ...]: """ Splits `key` provided to a zarr store into the variable indicated by the first part and the chunk index from the 3rd through last parts, which can be used to index into the ndarrays containing paths, offsets, and lengths in ManifestArrays. - Currently only works for 1d+ arrays with a tree depth of one from the - root Zarr group. - Parameters ---------- key : str @@ -127,17 +122,15 @@ def parse_manifest_index( Returns ------- - ManifestIndex + tuple containing chunk indexes """ - parts = key.split("/") - var = parts[0] - # Assume "c" is the second part - # TODO: Handle scalar array case with "c" holds the data - if chunk_key_encoding == "/": - indexes = tuple(int(ind) for ind in parts[2:]) - else: - indexes = tuple(int(ind) for ind in parts[2].split(chunk_key_encoding)) - return var, indexes + if key.endswith("c"): + # Scalar arrays hold the data in the "c" key + return (0,) + parts = key.split( + "c/" + ) # TODO: Open an issue upstream about the Zarr spec indicating this should be f"c{chunk_key_encoding}" rather than always "c/" + return tuple(int(ind) for ind in parts[1].split(chunk_key_encoding)) def find_matching_store(stores: StoreDict, request_key: str) -> StoreRequest: @@ -263,13 +256,16 @@ async def get( if key.endswith("zarr.json"): return get_zarr_metadata(self._group, key) - var, chunk_key = parse_manifest_index(key) + var = key.split("/")[0] marr = self._group.arrays[var] manifest = marr.manifest - path = manifest._paths[*chunk_key] - offset = manifest._offsets[*chunk_key] - length = manifest._lengths[*chunk_key] + chunk_indexes = parse_manifest_index( + key, marr.metadata.chunk_key_encoding.separator + ) + path = manifest._paths[*chunk_indexes] + offset = manifest._offsets[*chunk_indexes] + length = manifest._lengths[*chunk_indexes] # Get the configured object store instance that matches the path store_request = find_matching_store(stores=self._stores, request_key=path) # Transform the input byte range to account for the chunk location in the file diff --git a/virtualizarr/manifests/utils.py b/virtualizarr/manifests/utils.py index 6b7faa646..f62b0591d 100644 --- a/virtualizarr/manifests/utils.py +++ b/virtualizarr/manifests/utils.py @@ -2,6 +2,7 @@ import numpy as np from zarr import Array +from zarr.core.chunk_key_encodings import ChunkKeyEncodingLike from zarr.core.metadata.v3 import ArrayV3Metadata from virtualizarr.codecs import convert_to_codec_pipeline, get_codecs @@ -14,9 +15,11 @@ def create_v3_array_metadata( shape: tuple[int, ...], data_type: np.dtype, chunk_shape: tuple[int, ...], + chunk_key_encoding: ChunkKeyEncodingLike = {"name": "default"}, fill_value: Any = None, codecs: Optional[list[Dict[str, Any]]] = None, attributes: Optional[Dict[str, Any]] = None, + dimension_names: Optional[tuple[str, ...]] = None, ) -> ArrayV3Metadata: """ Create an ArrayV3Metadata instance with standard configuration. @@ -30,12 +33,16 @@ def create_v3_array_metadata( The numpy dtype of the array chunk_shape : tuple[int, ...] The shape of each chunk + chunk_key_encoding : ChunkKeyEncodingLike + The mapping from chunk grid cell coordinates to keys. fill_value : Any, optional The fill value for the array codecs : list[Dict[str, Any]], optional List of codec configurations attributes : Dict[str, Any], optional Additional attributes for the array + dimension_names : tuple[str], optional + Names of the dimensions Returns ------- @@ -49,14 +56,14 @@ def create_v3_array_metadata( "name": "regular", "configuration": {"chunk_shape": chunk_shape}, }, - chunk_key_encoding={"name": "default"}, + chunk_key_encoding=chunk_key_encoding, fill_value=fill_value, codecs=convert_to_codec_pipeline( codecs=codecs or [], dtype=data_type, ), attributes=attributes or {}, - dimension_names=None, + dimension_names=dimension_names, storage_transformers=None, ) From 68e7560c525e0848c062cbb6699299ef49f4845f Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Mon, 31 Mar 2025 20:43:15 -0400 Subject: [PATCH 10/15] Update test to specify chunk_key_encoding --- .../tests/test_manifests/test_store.py | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/virtualizarr/tests/test_manifests/test_store.py b/virtualizarr/tests/test_manifests/test_store.py index 631e300ea..d77e98224 100644 --- a/virtualizarr/tests/test_manifests/test_store.py +++ b/virtualizarr/tests/test_manifests/test_store.py @@ -2,8 +2,9 @@ import json import pickle -from typing import TYPE_CHECKING, Callable +from typing import TYPE_CHECKING +import numpy as np import pytest from zarr.abc.store import ( OffsetByteRequest, @@ -19,6 +20,7 @@ ManifestGroup, ManifestStore, ) +from virtualizarr.manifests.utils import create_v3_array_metadata from virtualizarr.tests import requires_minio, requires_obstore if TYPE_CHECKING: @@ -26,7 +28,7 @@ def _generate_manifest_store( - store: ObjectStore, *, prefix: str, filepath: str, array_v3_metadata: Callable + store: ObjectStore, *, prefix: str, filepath: str ) -> ManifestStore: """ Generate a ManifestStore for testing. @@ -66,9 +68,15 @@ def _generate_manifest_store( "1.1": {"path": f"{prefix}/{filepath}", "offset": 12, "length": 4}, } manifest = ChunkManifest(entries=chunk_dict) - chunks = (1, 4) - shape = (2, 8) - array_metadata = array_v3_metadata(shape=shape, chunks=chunks) + codecs = [{"configuration": {"endian": "little"}, "name": "bytes"}] + array_metadata = create_v3_array_metadata( + shape=(4, 4), + chunk_shape=(2, 2), + data_type=np.dtype("int32"), + codecs=codecs, + chunk_key_encoding={"name": "default", "separator": "."}, + fill_value=0, + ) manifest_array = ManifestArray(metadata=array_metadata, chunkmanifest=manifest) manifest_group = ManifestGroup( arrays={"foo": manifest_array, "bar": manifest_array}, @@ -78,7 +86,7 @@ def _generate_manifest_store( @pytest.fixture() -def local_store(tmpdir, array_v3_metadata): +def local_store(tmpdir): import obstore as obs store = obs.store.LocalStore() @@ -88,12 +96,11 @@ def local_store(tmpdir, array_v3_metadata): store=store, prefix=prefix, filepath=filepath, - array_v3_metadata=array_v3_metadata, ) @pytest.fixture() -def s3_store(minio_bucket, array_v3_metadata): +def s3_store(minio_bucket): import obstore as obs store = obs.store.S3Store( @@ -110,7 +117,6 @@ def s3_store(minio_bucket, array_v3_metadata): store=store, prefix=prefix, filepath=filepath, - array_v3_metadata=array_v3_metadata, ) @@ -164,7 +170,7 @@ async def test_get_metadata(self, manifest_store, request): "foo/zarr.json", prototype=default_buffer_prototype() ) metadata = json.loads(observed.to_bytes()) - assert metadata["chunk_grid"]["configuration"]["chunk_shape"] == [1, 4] + assert metadata["chunk_grid"]["configuration"]["chunk_shape"] == [2, 2] assert metadata["node_type"] == "array" assert metadata["zarr_format"] == 3 From 7ef00875925413bd5492804a864f2ca0d7240b61 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Mon, 31 Mar 2025 20:53:32 -0400 Subject: [PATCH 11/15] Specify compression in test --- virtualizarr/tests/test_readers/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtualizarr/tests/test_readers/conftest.py b/virtualizarr/tests/test_readers/conftest.py index ea7c1f313..552f8ec5d 100644 --- a/virtualizarr/tests/test_readers/conftest.py +++ b/virtualizarr/tests/test_readers/conftest.py @@ -21,7 +21,7 @@ def geotiff_file(tmp_path: Path) -> str: """Create a NetCDF4 file with air temperature data.""" filepath = tmp_path / "air.tif" with xr.tutorial.open_dataset("air_temperature") as ds: - ds.isel(time=0).rio.to_raster(filepath) + ds.isel(time=0).rio.to_raster(filepath, driver="COG", COMPRESS="DEFLATE") return str(filepath) From 5d557795537276ea0b8d6e78a311553ec5591af1 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Mon, 31 Mar 2025 20:53:59 -0400 Subject: [PATCH 12/15] Add float64 support --- virtualizarr/readers/tiff.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/virtualizarr/readers/tiff.py b/virtualizarr/readers/tiff.py index e047bf81d..acb183884 100644 --- a/virtualizarr/readers/tiff.py +++ b/virtualizarr/readers/tiff.py @@ -44,6 +44,8 @@ def _get_dtype(sample_format, bits_per_sample): if sample_format[0] == 1 and bits_per_sample[0] == 16: return np.dtype(np.uint16) + elif bits_per_sample[0] == 64: # TODO: Check if sample_format matters here + return np.dtype(np.float64) else: raise NotImplementedError From 203cec9752490a7bd04d9c55d23960bb69c48d28 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Mon, 31 Mar 2025 21:01:53 -0400 Subject: [PATCH 13/15] Update test --- virtualizarr/tests/test_readers/test_tiff.py | 21 ++++++++++++++++ .../tests/test_readers/test_tiff/test_tiff.py | 24 ------------------- 2 files changed, 21 insertions(+), 24 deletions(-) create mode 100644 virtualizarr/tests/test_readers/test_tiff.py delete mode 100644 virtualizarr/tests/test_readers/test_tiff/test_tiff.py diff --git a/virtualizarr/tests/test_readers/test_tiff.py b/virtualizarr/tests/test_readers/test_tiff.py new file mode 100644 index 000000000..5b718ccbe --- /dev/null +++ b/virtualizarr/tests/test_readers/test_tiff.py @@ -0,0 +1,21 @@ +import numpy as np +import xarray as xr + +from virtualizarr.readers import TIFFVirtualBackend +from virtualizarr.tests import requires_asynctiff, requires_rioxarray + + +@requires_asynctiff +@requires_rioxarray +def test_read_geotiff(geotiff_file): + import rioxarray + from obstore.store import LocalStore + + ms = TIFFVirtualBackend._create_manifest_store( + filepath=geotiff_file, group="0", file_id="file://", object_store=LocalStore() + ) + ds = xr.open_dataset(ms, engine="zarr", consolidated=False, zarr_format=3).load() + assert isinstance(ds, xr.Dataset) + expected = rioxarray.open_rasterio(geotiff_file).data.squeeze() + observed = ds["0"].data.squeeze() + np.testing.assert_allclose(observed, expected) diff --git a/virtualizarr/tests/test_readers/test_tiff/test_tiff.py b/virtualizarr/tests/test_readers/test_tiff/test_tiff.py deleted file mode 100644 index 63774f581..000000000 --- a/virtualizarr/tests/test_readers/test_tiff/test_tiff.py +++ /dev/null @@ -1,24 +0,0 @@ -import numpy as np -import xarray as xr - -from virtualizarr.readers import TIFFVirtualBackend -from virtualizarr.tests import requires_asynctiff, requires_rioxarray - - -@requires_asynctiff -@requires_rioxarray -def test_read_geotiff(geotiff_file): - import rioxarray - from obstore.store import LocalStore - - store = LocalStore() - kwargs = { - "file_id": "file://", - "store": store, - } - ds = TIFFVirtualBackend.open_virtual_dataset( - filepath=geotiff_file, group="0", virtual_backend_kwargs=kwargs - ) - assert isinstance(ds, xr.Dataset) - da_expected = rioxarray.open_rasterio(geotiff_file) - np.testing.assert_allclose(ds["0"].data, da_expected.data.squeeze()) From 6069ac78a91ed51619e105c348e58fe9a814ff12 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Tue, 1 Apr 2025 13:06:21 -0400 Subject: [PATCH 14/15] Update pyproject.toml Co-authored-by: Tom Nicholas --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 419ff077e..d90068909 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ fits = [ "kerchunk>=0.2.8", "astropy", ] -tif = [ +tiff = [ "obstore>=0.5.1", "async-tiff @ git+https://github.com/developmentseed/async-tiff#subdirectory=python", ] From b110e243e2656ac6808e6989a2e5010194277bc3 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Sun, 13 Jul 2025 17:46:05 -0400 Subject: [PATCH 15/15] Use external virtual_tiff --- pyproject.toml | 15 ++-- virtualizarr/manifests/utils.py | 2 +- virtualizarr/parsers/hdf/filters.py | 28 ++++++-- virtualizarr/parsers/tiff.py | 75 -------------------- virtualizarr/tests/__init__.py | 1 + virtualizarr/tests/test_parsers/test_tiff.py | 7 +- 6 files changed, 36 insertions(+), 92 deletions(-) delete mode 100644 virtualizarr/parsers/tiff.py diff --git a/pyproject.toml b/pyproject.toml index 757d51d2c..9934250db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,9 @@ hdf = [ "imagecodecs", "imagecodecs-numcodecs==2024.6.1", ] - +tiff = [ + "virtual_tiff @ git+https://github.com/virtual-zarr/virtual-tiff", +] # kerchunk-based parsers netcdf3 = [ "virtualizarr[remote]", @@ -165,6 +167,9 @@ h5netcdf = ">=1.5.0,<2" [tool.pixi.feature.icechunk-dev.dependencies] rust = "*" +[tool.pixi.feature.rio.dependencies] +rioxarray = "*" + [tool.pixi.feature.minimum-versions.dependencies] xarray = "==2025.3.0" numpy = "==2.0.0" @@ -187,10 +192,10 @@ run-tests-html-cov = { cmd = "pytest -n auto --run-network-tests --verbose --cov [tool.pixi.environments] min-deps = ["dev", "test", "hdf", "hdf5-lib"] # VirtualiZarr/conftest.py using h5py, so the minimum set of dependencies for testing still includes hdf libs # Inherit from min-deps to get all the test commands, along with optional dependencies -test = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib"] -test-py311 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py311"] # test against python 3.11 -test-py312 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py312"] # test against python 3.12 -minio = ["dev", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "py312", "minio"] +test = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "tiff", "rio"] +test-py311 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py311", "tiff", "rio"] # test against python 3.11 +test-py312 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py312", "tiff", "rio"] # test against python 3.12 +minio = ["dev", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "py312", "minio", "tiff", "rio"] minimum-versions = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "minimum-versions"] upstream = ["dev", "test", "hdf", "hdf5-lib", "netcdf3", "upstream", "icechunk-dev"] all = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk","kerchunk_parquet", "hdf5-lib", "all_parsers", "all_writers"] diff --git a/virtualizarr/manifests/utils.py b/virtualizarr/manifests/utils.py index a6caa3c9b..95f4533a7 100644 --- a/virtualizarr/manifests/utils.py +++ b/virtualizarr/manifests/utils.py @@ -23,7 +23,7 @@ def create_v3_array_metadata( fill_value: Any = None, codecs: Optional[list[Dict[str, Any]]] = None, attributes: Optional[Dict[str, Any]] = None, - dimension_names: Optional[Iterable[str]] = None, + dimension_names: Iterable[str] | None = None, ) -> ArrayV3Metadata: """ Create an ArrayV3Metadata instance with standard configuration. diff --git a/virtualizarr/parsers/hdf/filters.py b/virtualizarr/parsers/hdf/filters.py index 490632e8b..beabbf5ae 100644 --- a/virtualizarr/parsers/hdf/filters.py +++ b/virtualizarr/parsers/hdf/filters.py @@ -1,7 +1,7 @@ from __future__ import annotations import dataclasses -from typing import TYPE_CHECKING, List, Tuple, Union +from typing import TYPE_CHECKING, List, Tuple, TypedDict, Union import numcodecs.registry as registry import numpy as np @@ -9,12 +9,6 @@ from numcodecs.fixedscaleoffset import FixedScaleOffset from xarray.coding.variables import _choose_float_dtype -from virtualizarr.readers.common import ( - CFCodec, - ShuffleProperties, - ZlibProperties, - ZstdProperties, -) from virtualizarr.utils import soft_import h5py = soft_import("h5py", "For reading hdf files", strict=False) @@ -54,6 +48,26 @@ def __post_init__(self): self.cname = blosc_compressor_codes[self.cname] +@dataclasses.dataclass +class ZstdProperties: + level: int + + +@dataclasses.dataclass +class ShuffleProperties: + elementsize: int + + +@dataclasses.dataclass +class ZlibProperties: + level: int + + +class CFCodec(TypedDict): + target_dtype: np.dtype + codec: Codec + + def _filter_to_codec( filter_id: str, filter_properties: Union[int, None, Tuple] = None ) -> Codec: diff --git a/virtualizarr/parsers/tiff.py b/virtualizarr/parsers/tiff.py deleted file mode 100644 index df55c7a92..000000000 --- a/virtualizarr/parsers/tiff.py +++ /dev/null @@ -1,75 +0,0 @@ -from collections.abc import Iterable -from pathlib import Path - -from obstore.store import ObjectStore - -from virtualizarr.manifests import ManifestStore -from virtualizarr.manifests.store import ObjectStoreRegistry, get_store_prefix -from virtualizarr.parsers.kerchunk.translator import manifestgroup_from_kerchunk_refs -from virtualizarr.types.kerchunk import KerchunkStoreRefs - - -class Parser: - def __init__( - self, - group: str | None = None, - skip_variables: Iterable[str] | None = None, - remote_options: dict | None = None, - ): - """ - Instantiate a parser with parser-specific parameters that can be used in the - `__call__` method. - - Parameters - ---------- - group - The group within the file to be used as the Zarr root group for the ManifestStore. - skip_variables - Variables in the file that will be ignored when creating the ManifestStore. - remote_options - Configuration options used internally for kerchunk's fsspec backend - """ - - self.group = group - self.skip_variables = skip_variables - self.remote_options = remote_options or {} - - def __call__( - self, - file_url: str, - object_store: ObjectStore, - ) -> ManifestStore: - """ - Parse the metadata and byte offsets from a given file to product a VirtualiZarr ManifestStore. - - Parameters - ---------- - file_url - The URI or path to the input file (e.g., "s3://bucket/file.tiff"). - object_store - An obstore ObjectStore instance for accessing the file specified in the - `file_url` parameter. - - Returns - ------- - ManifestStore - A ManifestStore which provides a Zarr representation of the parsed file. - """ - - from kerchunk.tiff import tiff_to_zarr - - # handle inconsistency in kerchunk, see GH issue https://github.com/zarr-developers/VirtualiZarr/issues/160 - refs = KerchunkStoreRefs( - {"refs": tiff_to_zarr(file_url, **self.remote_options)} - ) - - manifestgroup = manifestgroup_from_kerchunk_refs( - refs, - group=self.group, - skip_variables=self.skip_variables, - fs_root=Path.cwd().as_uri(), - ) - - registry = ObjectStoreRegistry({get_store_prefix(file_url): object_store}) - - return ManifestStore(group=manifestgroup, store_registry=registry) diff --git a/virtualizarr/tests/__init__.py b/virtualizarr/tests/__init__.py index f6b36e874..ec85c9ca8 100644 --- a/virtualizarr/tests/__init__.py +++ b/virtualizarr/tests/__init__.py @@ -40,3 +40,4 @@ def _importorskip( has_zarr_python, requires_zarr_python = _importorskip("zarr") has_dask, requires_dask = _importorskip("dask") has_obstore, requires_obstore = _importorskip("obstore") +has_rioxarray, requires_rioxarray = _importorskip("rioxarray") diff --git a/virtualizarr/tests/test_parsers/test_tiff.py b/virtualizarr/tests/test_parsers/test_tiff.py index 5b718ccbe..1660f8ea8 100644 --- a/virtualizarr/tests/test_parsers/test_tiff.py +++ b/virtualizarr/tests/test_parsers/test_tiff.py @@ -1,7 +1,7 @@ import numpy as np import xarray as xr +from virtual_tiff import TIFFParser -from virtualizarr.readers import TIFFVirtualBackend from virtualizarr.tests import requires_asynctiff, requires_rioxarray @@ -11,9 +11,8 @@ def test_read_geotiff(geotiff_file): import rioxarray from obstore.store import LocalStore - ms = TIFFVirtualBackend._create_manifest_store( - filepath=geotiff_file, group="0", file_id="file://", object_store=LocalStore() - ) + parser = TIFFParser(ifd=0) + ms = parser(file_url=f"file://{geotiff_file}", object_store=LocalStore()) ds = xr.open_dataset(ms, engine="zarr", consolidated=False, zarr_format=3).load() assert isinstance(ds, xr.Dataset) expected = rioxarray.open_rasterio(geotiff_file).data.squeeze()