diff --git a/pyproject.toml b/pyproject.toml index 757d51d2..9934250d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,9 @@ hdf = [ "imagecodecs", "imagecodecs-numcodecs==2024.6.1", ] - +tiff = [ + "virtual_tiff @ git+https://github.com/virtual-zarr/virtual-tiff", +] # kerchunk-based parsers netcdf3 = [ "virtualizarr[remote]", @@ -165,6 +167,9 @@ h5netcdf = ">=1.5.0,<2" [tool.pixi.feature.icechunk-dev.dependencies] rust = "*" +[tool.pixi.feature.rio.dependencies] +rioxarray = "*" + [tool.pixi.feature.minimum-versions.dependencies] xarray = "==2025.3.0" numpy = "==2.0.0" @@ -187,10 +192,10 @@ run-tests-html-cov = { cmd = "pytest -n auto --run-network-tests --verbose --cov [tool.pixi.environments] min-deps = ["dev", "test", "hdf", "hdf5-lib"] # VirtualiZarr/conftest.py using h5py, so the minimum set of dependencies for testing still includes hdf libs # Inherit from min-deps to get all the test commands, along with optional dependencies -test = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib"] -test-py311 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py311"] # test against python 3.11 -test-py312 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py312"] # test against python 3.12 -minio = ["dev", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "py312", "minio"] +test = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "tiff", "rio"] +test-py311 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py311", "tiff", "rio"] # test against python 3.11 +test-py312 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py312", "tiff", "rio"] # test against python 3.12 +minio = ["dev", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "py312", "minio", "tiff", "rio"] minimum-versions = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "minimum-versions"] upstream = ["dev", "test", "hdf", "hdf5-lib", "netcdf3", "upstream", "icechunk-dev"] all = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk","kerchunk_parquet", "hdf5-lib", "all_parsers", "all_writers"] diff --git a/virtualizarr/parsers/tiff.py b/virtualizarr/parsers/tiff.py deleted file mode 100644 index df55c7a9..00000000 --- a/virtualizarr/parsers/tiff.py +++ /dev/null @@ -1,75 +0,0 @@ -from collections.abc import Iterable -from pathlib import Path - -from obstore.store import ObjectStore - -from virtualizarr.manifests import ManifestStore -from virtualizarr.manifests.store import ObjectStoreRegistry, get_store_prefix -from virtualizarr.parsers.kerchunk.translator import manifestgroup_from_kerchunk_refs -from virtualizarr.types.kerchunk import KerchunkStoreRefs - - -class Parser: - def __init__( - self, - group: str | None = None, - skip_variables: Iterable[str] | None = None, - remote_options: dict | None = None, - ): - """ - Instantiate a parser with parser-specific parameters that can be used in the - `__call__` method. - - Parameters - ---------- - group - The group within the file to be used as the Zarr root group for the ManifestStore. - skip_variables - Variables in the file that will be ignored when creating the ManifestStore. - remote_options - Configuration options used internally for kerchunk's fsspec backend - """ - - self.group = group - self.skip_variables = skip_variables - self.remote_options = remote_options or {} - - def __call__( - self, - file_url: str, - object_store: ObjectStore, - ) -> ManifestStore: - """ - Parse the metadata and byte offsets from a given file to product a VirtualiZarr ManifestStore. - - Parameters - ---------- - file_url - The URI or path to the input file (e.g., "s3://bucket/file.tiff"). - object_store - An obstore ObjectStore instance for accessing the file specified in the - `file_url` parameter. - - Returns - ------- - ManifestStore - A ManifestStore which provides a Zarr representation of the parsed file. - """ - - from kerchunk.tiff import tiff_to_zarr - - # handle inconsistency in kerchunk, see GH issue https://github.com/zarr-developers/VirtualiZarr/issues/160 - refs = KerchunkStoreRefs( - {"refs": tiff_to_zarr(file_url, **self.remote_options)} - ) - - manifestgroup = manifestgroup_from_kerchunk_refs( - refs, - group=self.group, - skip_variables=self.skip_variables, - fs_root=Path.cwd().as_uri(), - ) - - registry = ObjectStoreRegistry({get_store_prefix(file_url): object_store}) - - return ManifestStore(group=manifestgroup, store_registry=registry) diff --git a/virtualizarr/tests/__init__.py b/virtualizarr/tests/__init__.py index f809214f..ec85c9ca 100644 --- a/virtualizarr/tests/__init__.py +++ b/virtualizarr/tests/__init__.py @@ -34,9 +34,10 @@ def _importorskip( has_s3fs, requires_s3fs = _importorskip("s3fs") has_lithops, requires_lithops = _importorskip("lithops") has_scipy, requires_scipy = _importorskip("scipy") -has_tifffile, requires_tifffile = _importorskip("tifffile") +has_asynctiff, requires_asynctiff = _importorskip("async_tiff") has_imagecodecs, requires_imagecodecs = _importorskip("imagecodecs") has_hdf5plugin, requires_hdf5plugin = _importorskip("hdf5plugin") has_zarr_python, requires_zarr_python = _importorskip("zarr") has_dask, requires_dask = _importorskip("dask") has_obstore, requires_obstore = _importorskip("obstore") +has_rioxarray, requires_rioxarray = _importorskip("rioxarray") diff --git a/virtualizarr/tests/test_parsers/conftest.py b/virtualizarr/tests/test_parsers/conftest.py index a761f9cd..435d4829 100644 --- a/virtualizarr/tests/test_parsers/conftest.py +++ b/virtualizarr/tests/test_parsers/conftest.py @@ -16,6 +16,15 @@ warnings.warn("hdf5plugin is required for HDF reader") +@pytest.fixture +def geotiff_file(tmp_path: Path) -> str: + """Create a NetCDF4 file with air temperature data.""" + filepath = tmp_path / "air.tif" + with xr.tutorial.open_dataset("air_temperature") as ds: + ds.isel(time=0).rio.to_raster(filepath, driver="COG", COMPRESS="DEFLATE") + return str(filepath) + + @pytest.fixture def empty_chunks_hdf5_file(tmpdir): ds = xr.Dataset({"data": []}) diff --git a/virtualizarr/tests/test_parsers/test_tiff.py b/virtualizarr/tests/test_parsers/test_tiff.py new file mode 100644 index 00000000..1660f8ea --- /dev/null +++ b/virtualizarr/tests/test_parsers/test_tiff.py @@ -0,0 +1,20 @@ +import numpy as np +import xarray as xr +from virtual_tiff import TIFFParser + +from virtualizarr.tests import requires_asynctiff, requires_rioxarray + + +@requires_asynctiff +@requires_rioxarray +def test_read_geotiff(geotiff_file): + import rioxarray + from obstore.store import LocalStore + + parser = TIFFParser(ifd=0) + ms = parser(file_url=f"file://{geotiff_file}", object_store=LocalStore()) + ds = xr.open_dataset(ms, engine="zarr", consolidated=False, zarr_format=3).load() + assert isinstance(ds, xr.Dataset) + expected = rioxarray.open_rasterio(geotiff_file).data.squeeze() + observed = ds["0"].data.squeeze() + np.testing.assert_allclose(observed, expected)