diff --git a/CHANGELOG.md b/CHANGELOG.md index 992dd7ca..e44e7290 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # Changelog +## [v0.5.0] + +### Features +- Add support for OME-NGFF v0.5 +- Move to zarr-python v3 + +### API Changes + +- The `compressor` argument has been renamed to `compressors` in all relevant functions and methods to reflect the support for multiple compressors in zarr v3. +- The `version` argument has been renamed to `ngff_version` in all relevant functions and methods to specify the OME-NGFF version. + ## [v0.4.4] ### Bug Fixes diff --git a/pyproject.toml b/pyproject.toml index 086c8f7a..273c794a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,8 +35,8 @@ classifiers = [ dependencies = [ "numpy", "filelock", - "zarr<3", - "anndata>=0.8.0,<0.11.4", # To be removed when we transition to zarr v3 + "zarr>3", + "anndata", "pydantic", "pandas>=1.2.0", "requests", @@ -155,7 +155,7 @@ minversion = "7.0" testpaths = ["tests"] filterwarnings = [ "error", - "ignore::FutureWarning", # TODO remove after zarr-python v3 + "ignore::zarr.errors.ZarrUserWarning", # required for anndata ] addopts = [ "-vv", diff --git a/src/ngio/common/_pyramid.py b/src/ngio/common/_pyramid.py index 6011674a..886b60c8 100644 --- a/src/ngio/common/_pyramid.py +++ b/src/ngio/common/_pyramid.py @@ -5,8 +5,9 @@ import dask.array as da import numpy as np import zarr -from zarr.types import DIMENSION_SEPARATOR +from zarr.core.array import CompressorLike +# from zarr.types import DIMENSION_SEPARATOR from ngio.common._zoom import ( InterpolationOrder, _zoom_inputs_check, @@ -26,7 +27,10 @@ def _on_disk_numpy_zoom( target: zarr.Array, order: InterpolationOrder, ) -> None: - target[...] = numpy_zoom(source[...], target_shape=target.shape, order=order) + source_array = source[...] + if not isinstance(source_array, np.ndarray): + raise NgioValueError("source zarr array could not be read as a numpy array") + target[...] = numpy_zoom(source_array, target_shape=target.shape, order=order) def _on_disk_dask_zoom( @@ -37,7 +41,7 @@ def _on_disk_dask_zoom( source_array = da.from_zarr(source) target_array = dask_zoom(source_array, target_shape=target.shape, order=order) - target_array = target_array.rechunk(target.chunks) + target_array = target_array.rechunk(target.chunks) # type: ignore target_array.compute_chunk_sizes() target_array.to_zarr(target) @@ -199,20 +203,24 @@ def init_empty_pyramid( paths: list[str], ref_shape: Sequence[int], scaling_factors: Sequence[float], - chunks: Sequence[int] | None = None, + axes: Sequence[str], + chunks: Sequence[int] | Literal["auto"] = "auto", dtype: str = "uint16", mode: AccessModeLiteral = "a", - dimension_separator: DIMENSION_SEPARATOR = "/", - compressor="default", + dimension_separator: Literal[".", "/"] = "/", + compressors: CompressorLike = "auto", + zarr_format: Literal[2, 3] = 2, ) -> None: # Return the an Image object - if chunks is not None and len(chunks) != len(ref_shape): + if chunks != "auto" and len(chunks) != len(ref_shape): raise NgioValueError( "The shape and chunks must have the same number of dimensions." ) - if chunks is not None: - chunks = [min(c, s) for c, s in zip(chunks, ref_shape, strict=True)] + if chunks != "auto": + chunks = tuple(min(c, s) for c, s in zip(chunks, ref_shape, strict=True)) + else: + chunks = "auto" if len(ref_shape) != len(scaling_factors): raise NgioValueError( @@ -223,7 +231,25 @@ def init_empty_pyramid( # To reduce the risk of floating point issues scaling_factors = [_maybe_int(s) for s in scaling_factors] - root_group = open_group_wrapper(store, mode=mode) + root_group = open_group_wrapper(store, mode=mode, zarr_format=zarr_format) + + array_static_kwargs = { + "dtype": dtype, + "overwrite": True, + "compressors": compressors, + } + + if zarr_format == 2: + array_static_kwargs["chunk_key_encoding"] = { + "name": "v2", + "separator": dimension_separator, + } + else: + array_static_kwargs["chunk_key_encoding"] = { + "name": "default", + "separator": dimension_separator, + } + array_static_kwargs["dimension_names"] = axes for path in paths: if any(s < 1 for s in ref_shape): @@ -231,23 +257,17 @@ def init_empty_pyramid( "Level shape must be at least 1 on all dimensions. " f"Calculated shape: {ref_shape} at level {path}." ) - new_arr = root_group.zeros( + new_arr = root_group.create_array( name=path, - shape=ref_shape, - dtype=dtype, + shape=tuple(ref_shape), chunks=chunks, - dimension_separator=dimension_separator, - overwrite=True, - compressor=compressor, + **array_static_kwargs, ) - _shape = [ + ref_shape = [ math.floor(s / sc) for s, sc in zip(ref_shape, scaling_factors, strict=True) ] - ref_shape = _shape - - if chunks is None: - chunks = new_arr.chunks - assert chunks is not None - chunks = [min(c, s) for c, s in zip(chunks, ref_shape, strict=True)] + chunks = tuple( + min(c, s) for c, s in zip(new_arr.chunks, ref_shape, strict=True) + ) return None diff --git a/src/ngio/images/_create.py b/src/ngio/images/_create.py index 0adabc51..c9ed7d14 100644 --- a/src/ngio/images/_create.py +++ b/src/ngio/images/_create.py @@ -1,9 +1,9 @@ """Utility functions for working with OME-Zarr images.""" from collections.abc import Sequence -from typing import TypeVar +from typing import Literal, TypeVar -from zarr.types import DIMENSION_SEPARATOR +from zarr.core.array import CompressorLike from ngio.common._pyramid import init_empty_pyramid from ngio.ome_zarr_meta import ( @@ -40,7 +40,7 @@ def _init_generic_meta( space_unit: SpaceUnits | str | None = DefaultSpaceUnit, time_unit: TimeUnits | str | None = DefaultTimeUnit, name: str | None = None, - version: NgffVersions = DefaultNgffVersion, + ngff_version: NgffVersions = DefaultNgffVersion, ) -> tuple[_image_or_label_meta, list[float]]: """Initialize the metadata for an image or label.""" scaling_factors = [] @@ -75,7 +75,7 @@ def _init_generic_meta( axes_names=axes_names, pixel_size=pixel_sizes, scaling_factors=scaling_factors, - version=version, + version=ngff_version, ) return meta, scaling_factors @@ -93,12 +93,12 @@ def create_empty_label_container( time_unit: TimeUnits | str | None = DefaultTimeUnit, axes_names: Sequence[str] | None = None, name: str | None = None, - chunks: Sequence[int] | None = None, + chunks: Sequence[int] | Literal["auto"] = "auto", dtype: str = "uint32", - dimension_separator: DIMENSION_SEPARATOR = "/", - compressor="default", + dimension_separator: Literal[".", "/"] = "/", + compressors: CompressorLike = "auto", overwrite: bool = False, - version: NgffVersions = DefaultNgffVersion, + ngff_version: NgffVersions = DefaultNgffVersion, ) -> ZarrGroupHandler: """Create an empty label with the given shape and metadata. @@ -122,15 +122,15 @@ def create_empty_label_container( axes_names (Sequence[str] | None, optional): The names of the axes. If None the canonical names are used. Defaults to None. name (str | None, optional): The name of the image. Defaults to None. - chunks (Sequence[int] | None, optional): The chunk shape. If None the shape + chunks (Sequence[int] | Literal["auto"]): The chunk shape. If None the shape is used. Defaults to None. dimension_separator (DIMENSION_SEPARATOR): The separator to use for dimensions. Defaults to "/". - compressor: The compressor to use. Defaults to "default". + compressors (CompressorLike): The compressors to use. Defaults to "auto". dtype (str, optional): The data type of the image. Defaults to "uint16". overwrite (bool, optional): Whether to overwrite an existing image. Defaults to True. - version (str, optional): The version of the OME-Zarr specification. + ngff_version (str, optional): The version of the OME-Zarr specification. Defaults to DefaultVersion. """ @@ -155,12 +155,16 @@ def create_empty_label_container( time_unit=time_unit, axes_names=axes_names, name=name, - version=version, + ngff_version=ngff_version, ) mode = "w" if overwrite else "w-" - group_handler = ZarrGroupHandler(store=store, mode=mode, cache=False) - image_handler = get_label_meta_handler(version=version, group_handler=group_handler) + group_handler = ZarrGroupHandler( + store=store, mode=mode, cache=False, zarr_format=meta.zarr_format + ) + image_handler = get_label_meta_handler( + version=ngff_version, group_handler=group_handler + ) image_handler.write_meta(meta) init_empty_pyramid( @@ -169,10 +173,11 @@ def create_empty_label_container( scaling_factors=scaling_factors, ref_shape=shape, chunks=chunks, + axes=axes_names, dtype=dtype, mode="a", dimension_separator=dimension_separator, - compressor=compressor, + compressors=compressors, ) group_handler._mode = "r+" return group_handler @@ -191,12 +196,12 @@ def create_empty_image_container( time_unit: TimeUnits | str | None = DefaultTimeUnit, axes_names: Sequence[str] | None = None, name: str | None = None, - chunks: Sequence[int] | None = None, + chunks: Sequence[int] | Literal["auto"] = "auto", dtype: str = "uint16", - dimension_separator: DIMENSION_SEPARATOR = "/", - compressor="default", + dimension_separator: Literal[".", "/"] = "/", + compressors: CompressorLike = "auto", overwrite: bool = False, - version: NgffVersions = DefaultNgffVersion, + ngff_version: NgffVersions = DefaultNgffVersion, ) -> ZarrGroupHandler: """Create an empty OME-Zarr image with the given shape and metadata. @@ -220,15 +225,15 @@ def create_empty_image_container( axes_names (Sequence[str] | None, optional): The names of the axes. If None the canonical names are used. Defaults to None. name (str | None, optional): The name of the image. Defaults to None. - chunks (Sequence[int] | None, optional): The chunk shape. If None the shape + chunks (Sequence[int] | Literal["auto"]): The chunk shape. If None the shape is used. Defaults to None. dtype (str, optional): The data type of the image. Defaults to "uint16". dimension_separator (DIMENSION_SEPARATOR): The separator to use for dimensions. Defaults to "/". - compressor: The compressor to use. Defaults to "default". + compressors (CompressorLike): The compressors to use. Defaults to "auto". overwrite (bool, optional): Whether to overwrite an existing image. Defaults to True. - version (str, optional): The version of the OME-Zarr specification. + ngff_version (str, optional): The version of the OME-Zarr specification. Defaults to DefaultVersion. """ @@ -253,11 +258,15 @@ def create_empty_image_container( time_unit=time_unit, axes_names=axes_names, name=name, - version=version, + ngff_version=ngff_version, ) mode = "w" if overwrite else "w-" - group_handler = ZarrGroupHandler(store=store, mode=mode, cache=False) - image_handler = get_image_meta_handler(version=version, group_handler=group_handler) + group_handler = ZarrGroupHandler( + store=store, mode=mode, cache=False, zarr_format=meta.zarr_format + ) + image_handler = get_image_meta_handler( + version=ngff_version, group_handler=group_handler + ) image_handler.write_meta(meta) init_empty_pyramid( @@ -266,10 +275,12 @@ def create_empty_image_container( scaling_factors=scaling_factors, ref_shape=shape, chunks=chunks, + axes=axes_names, dtype=dtype, mode="a", dimension_separator=dimension_separator, - compressor=compressor, + compressors=compressors, + zarr_format=meta.zarr_format, ) group_handler._mode = "r+" diff --git a/src/ngio/images/_create_synt_container.py b/src/ngio/images/_create_synt_container.py index 8f5901a8..cdb1cdf7 100644 --- a/src/ngio/images/_create_synt_container.py +++ b/src/ngio/images/_create_synt_container.py @@ -1,10 +1,11 @@ """Abstract class for handling OME-NGFF images.""" from collections.abc import Sequence +from typing import Literal import numpy as np import PIL.Image -from zarr.types import DIMENSION_SEPARATOR +from zarr.core.array import CompressorLike from ngio.common._synt_images_utils import fit_to_shape from ngio.images._ome_zarr_container import OmeZarrContainer, create_ome_zarr_from_array @@ -30,16 +31,16 @@ def create_synthetic_ome_zarr( xy_scaling_factor: float = 2, z_scaling_factor: float = 1.0, axes_names: Sequence[str] | None = None, - chunks: Sequence[int] | None = None, + chunks: Sequence[int] | Literal["auto"] = "auto", channel_labels: list[str] | None = None, channel_wavelengths: list[str] | None = None, channel_colors: Sequence[str] | None = None, channel_active: Sequence[bool] | None = None, table_backend: TableBackend = DefaultTableBackend, - dimension_separator: DIMENSION_SEPARATOR = "/", - compressor="default", + dimension_separator: Literal[".", "/"] = "/", + compressors: CompressorLike = "auto", overwrite: bool = False, - version: NgffVersions = DefaultNgffVersion, + ngff_version: NgffVersions = DefaultNgffVersion, ) -> OmeZarrContainer: """Create an empty OME-Zarr image with the given shape and metadata. @@ -55,8 +56,8 @@ def create_synthetic_ome_zarr( Defaults to 1.0. axes_names (Sequence[str] | None, optional): The names of the axes. If None the canonical names are used. Defaults to None. - chunks (Sequence[int] | None, optional): The chunk shape. If None the shape - is used. Defaults to None. + chunks (Sequence[int] | Literal["auto"]): The chunk shape. If None the shape + is used. Defaults to "auto". channel_labels (list[str] | None, optional): The labels of the channels. Defaults to None. channel_wavelengths (list[str] | None, optional): The wavelengths of the @@ -68,10 +69,10 @@ def create_synthetic_ome_zarr( table_backend (TableBackend): Table backend to be used to store tables dimension_separator (DIMENSION_SEPARATOR): The separator to use for dimensions. Defaults to "/". - compressor: The compressor to use. Defaults to "default". + compressors (CompressorLike): The compressors to use. Defaults to "auto". overwrite (bool, optional): Whether to overwrite an existing image. Defaults to True. - version (NgffVersion, optional): The version of the OME-Zarr specification. + ngff_version (NgffVersion, optional): The version of the OME-Zarr specification. Defaults to DefaultNgffVersion. """ if isinstance(reference_sample, str): @@ -103,8 +104,8 @@ def create_synthetic_ome_zarr( chunks=chunks, overwrite=overwrite, dimension_separator=dimension_separator, - compressor=compressor, - version=version, + compressors=compressors, + ngff_version=ngff_version, ) image = ome_zarr.get_image() diff --git a/src/ngio/images/_image.py b/src/ngio/images/_image.py index ba6aea78..7c8cc376 100644 --- a/src/ngio/images/_image.py +++ b/src/ngio/images/_image.py @@ -6,7 +6,7 @@ import dask.array as da import numpy as np from pydantic import BaseModel, model_validator -from zarr.types import DIMENSION_SEPARATOR +from zarr.core.array import CompressorLike from ngio.common import ( Dimensions, @@ -32,6 +32,7 @@ ChannelVisualisation, DefaultSpaceUnit, DefaultTimeUnit, + NgffVersions, SpaceUnits, TimeUnits, ) @@ -40,6 +41,7 @@ StoreOrGroup, ZarrGroupHandler, ) +from ngio.utils._zarr_utils import find_dimension_separator class ChannelSelectionModel(BaseModel): @@ -604,8 +606,9 @@ def derive( name: str | None = None, chunks: Sequence[int] | None = None, dtype: str | None = None, - dimension_separator: DIMENSION_SEPARATOR | None = None, - compressor: str | None = None, + dimension_separator: Literal[".", "/"] | None = None, + compressors: CompressorLike | None = None, + ngff_version: NgffVersions | None = None, overwrite: bool = False, ) -> "ImagesContainer": """Create an empty OME-Zarr image from an existing image. @@ -619,12 +622,13 @@ def derive( pixel_size (PixelSize | None): The pixel size of the new image. axes_names (Sequence[str] | None): The axes names of the new image. name (str | None): The name of the new image. - chunks (Sequence[int] | None): The chunk shape of the new image. + chunks (Sequence[int] | Literal["auto"]): The chunk shape of the new image. dimension_separator (DIMENSION_SEPARATOR | None): The separator to use for dimensions. If None it will use the same as the reference image. - compressor (str | None): The compressor to use. If None it will use + compressors: The compressor to use. If None it will use the same as the reference image. dtype (str | None): The data type of the new image. + ngff_version (NgffVersions): The NGFF version to use. overwrite (bool): Whether to overwrite an existing image. Returns: @@ -642,7 +646,8 @@ def derive( chunks=chunks, dtype=dtype, dimension_separator=dimension_separator, - compressor=compressor, + compressors=compressors, + ngff_version=ngff_version, overwrite=overwrite, ) @@ -725,8 +730,9 @@ def derive_image_container( name: str | None = None, chunks: Sequence[int] | None = None, dtype: str | None = None, - dimension_separator: DIMENSION_SEPARATOR | None = None, - compressor=None, + dimension_separator: Literal[".", "/"] | None = None, + compressors: CompressorLike | None = None, + ngff_version: NgffVersions | None = None, overwrite: bool = False, ) -> ImagesContainer: """Create an empty OME-Zarr image from an existing image. @@ -743,8 +749,9 @@ def derive_image_container( chunks (Sequence[int] | None): The chunk shape of the new image. dimension_separator (DIMENSION_SEPARATOR | None): The separator to use for dimensions. If None it will use the same as the reference image. - compressor: The compressor to use. If None it will use + compressors (CompressorLike | None): The compressors to use. If None it will use the same as the reference image. + ngff_version (NgffVersions): The NGFF version to use. dtype (str | None): The data type of the new image. overwrite (bool): Whether to overwrite an existing image. @@ -790,10 +797,13 @@ def derive_image_container( dtype = ref_image.dtype if dimension_separator is None: - dimension_separator = ref_image.zarr_array._dimension_separator # type: ignore + dimension_separator = find_dimension_separator(ref_image.zarr_array) - if compressor is None: - compressor = ref_image.zarr_array.compressor # type: ignore + if compressors is None: + compressors = ref_image.zarr_array.compressors # type: ignore + + if ngff_version is None: + ngff_version = ref_meta.version handler = create_empty_image_container( store=store, @@ -810,10 +820,10 @@ def derive_image_container( name=name, chunks=chunks, dtype=dtype, - dimension_separator=dimension_separator, # type: ignore - compressor=compressor, # type: ignore + dimension_separator=dimension_separator, + compressors=compressors, overwrite=overwrite, - version=ref_meta.version, + ngff_version=ngff_version, ) image_container = ImagesContainer(handler) diff --git a/src/ngio/images/_label.py b/src/ngio/images/_label.py index 427d356b..ea71d1e2 100644 --- a/src/ngio/images/_label.py +++ b/src/ngio/images/_label.py @@ -3,7 +3,7 @@ from collections.abc import Sequence from typing import Literal -from zarr.types import DIMENSION_SEPARATOR +from zarr.core.array import CompressorLike from ngio.common import compute_masking_roi from ngio.images._abstract_image import AbstractImage @@ -28,6 +28,7 @@ StoreOrGroup, ZarrGroupHandler, ) +from ngio.utils._zarr_utils import find_dimension_separator class Label(AbstractImage[LabelMetaHandler]): @@ -107,7 +108,6 @@ class LabelsContainer: def __init__(self, group_handler: ZarrGroupHandler) -> None: """Initialize the LabelGroupHandler.""" self._group_handler = group_handler - # Validate the group # Either contains a labels attribute or is empty attrs = self._group_handler.load_attrs() @@ -169,8 +169,8 @@ def derive( axes_names: Sequence[str] | None = None, chunks: Sequence[int] | None = None, dtype: str = "uint32", - dimension_separator: DIMENSION_SEPARATOR | None = None, - compressor=None, + dimension_separator: Literal[".", "/"] | None = None, + compressors: CompressorLike | None = None, overwrite: bool = False, ) -> "Label": """Create an empty OME-Zarr label from a reference image. @@ -190,8 +190,8 @@ def derive( dtype (str): The data type of the new label. dimension_separator (DIMENSION_SEPARATOR | None): The separator to use for dimensions. If None it will use the same as the reference image. - compressor: The compressor to use. If None it will use - the same as the reference image. + compressors (CompressorLike | None): The compressors to use. If None it will + use the same as the reference image. overwrite (bool): Whether to overwrite an existing image. Returns: @@ -217,7 +217,7 @@ def derive( chunks=chunks, dtype=dtype, dimension_separator=dimension_separator, - compressor=compressor, + compressors=compressors, overwrite=overwrite, ) @@ -236,8 +236,8 @@ def derive_label( pixel_size: PixelSize | None = None, axes_names: Sequence[str] | None = None, chunks: Sequence[int] | None = None, - dimension_separator: DIMENSION_SEPARATOR | None = None, - compressor=None, + dimension_separator: Literal[".", "/"] | None = None, + compressors: CompressorLike | None = None, dtype: str = "uint32", overwrite: bool = False, ) -> None: @@ -256,7 +256,7 @@ def derive_label( dtype (str): The data type of the new label. dimension_separator (DIMENSION_SEPARATOR | None): The separator to use for dimensions. If None it will use the same as the reference image. - compressor: The compressor to use. If None it will use + compressors (CompressorLike | None): The compressor to use. If None it will use the same as the reference image. overwrite (bool): Whether to overwrite an existing image. @@ -308,9 +308,9 @@ def derive_label( axes_names = axes_names[:c_axis] + axes_names[c_axis + 1 :] if dimension_separator is None: - dimension_separator = ref_image.zarr_array._dimension_separator # type: ignore - if compressor is None: - compressor = ref_image.zarr_array.compressor # type: ignore + dimension_separator = find_dimension_separator(ref_image.zarr_array) + if compressors is None: + compressors = ref_image.zarr_array.compressors # type: ignore _ = create_empty_label_container( store=store, @@ -326,10 +326,10 @@ def derive_label( axes_names=axes_names, chunks=chunks, dtype=dtype, - dimension_separator=dimension_separator, # type: ignore - compressor=compressor, # type: ignore + dimension_separator=dimension_separator, + compressors=compressors, overwrite=overwrite, - version=ref_meta.version, + ngff_version=ref_meta.version, name=name, ) return None diff --git a/src/ngio/images/_ome_zarr_container.py b/src/ngio/images/_ome_zarr_container.py index c4050e73..2f1fb8f1 100644 --- a/src/ngio/images/_ome_zarr_container.py +++ b/src/ngio/images/_ome_zarr_container.py @@ -2,9 +2,10 @@ import warnings from collections.abc import Sequence +from typing import Literal import numpy as np -from zarr.types import DIMENSION_SEPARATOR +from zarr.core.array import CompressorLike from ngio.images._create import create_empty_image_container from ngio.images._image import Image, ImagesContainer @@ -409,10 +410,11 @@ def derive_image( name: str | None = None, chunks: Sequence[int] | None = None, dtype: str | None = None, - dimension_separator: DIMENSION_SEPARATOR | None = None, - compressor=None, + dimension_separator: Literal[".", "/"] | None = None, + compressors: CompressorLike | None = None, copy_labels: bool = False, copy_tables: bool = False, + ngff_version: NgffVersions | None = None, overwrite: bool = False, ) -> "OmeZarrContainer": """Create an empty OME-Zarr container from an existing image. @@ -431,17 +433,18 @@ def derive_image( dimension_separator (DIMENSION_SEPARATOR | None): The dimension separator to use. If None, the dimension separator of the reference image will be used. - compressor: The compressor to use. If None, the compressor of the - reference image will be used. + compressors (CompressorLike | None): The compressors to use. If None, + the compressors of the reference image will be used. copy_labels (bool): Whether to copy the labels from the reference image. copy_tables (bool): Whether to copy the tables from the reference image. + ngff_version (NgffVersions): The NGFF version to use. overwrite (bool): Whether to overwrite an existing image. Returns: OmeZarrContainer: The new image container. """ - _ = self._images_container.derive( + new_container = self._images_container.derive( store=store, ref_path=ref_path, shape=shape, @@ -452,16 +455,13 @@ def derive_image( chunks=chunks, dtype=dtype, dimension_separator=dimension_separator, - compressor=compressor, + compressors=compressors, + ngff_version=ngff_version, overwrite=overwrite, ) - handler = ZarrGroupHandler( - store, cache=self._group_handler.use_cache, mode=self._group_handler.mode - ) - new_ome_zarr = OmeZarrContainer( - group_handler=handler, + group_handler=new_container._group_handler, validate_paths=False, ) @@ -693,8 +693,8 @@ def derive_label( axes_names: Sequence[str] | None = None, chunks: Sequence[int] | None = None, dtype: str = "uint32", - dimension_separator: DIMENSION_SEPARATOR | None = None, - compressor=None, + dimension_separator: Literal[".", "/"] | None = None, + compressors: CompressorLike | None = None, overwrite: bool = False, ) -> "Label": """Create an empty OME-Zarr label from a reference image. @@ -714,8 +714,8 @@ def derive_label( dimension_separator (DIMENSION_SEPARATOR | None): The dimension separator to use. If None, the dimension separator of the reference image will be used. - compressor: The compressor to use. If None, the compressor of the - reference image will be used. + compressors (CompressorLike | None): The compressors to use. If None, + the compressors of the reference image will be used. overwrite (bool): Whether to overwrite an existing image. Returns: @@ -733,7 +733,7 @@ def derive_label( chunks=chunks, dtype=dtype, dimension_separator=dimension_separator, - compressor=compressor, + compressors=compressors, overwrite=overwrite, ) @@ -773,7 +773,7 @@ def open_image( mode (AccessModeLiteral): The access mode for the image. Defaults to "r+". """ - group_handler = ZarrGroupHandler(store, cache, mode) + group_handler = ZarrGroupHandler(store=store, cache=cache, mode=mode) images_container = ImagesContainer(group_handler) return images_container.get( path=path, @@ -806,7 +806,7 @@ def open_label( mode (AccessModeLiteral): The access mode for the image. Defaults to "r+". """ - group_handler = ZarrGroupHandler(store, cache, mode) + group_handler = ZarrGroupHandler(store=store, cache=cache, mode=mode) if name is None: label_meta_handler = find_label_meta_handler(group_handler) path = label_meta_handler.meta.get_dataset( @@ -836,16 +836,16 @@ def create_empty_ome_zarr( time_unit: TimeUnits = DefaultTimeUnit, axes_names: Sequence[str] | None = None, name: str | None = None, - chunks: Sequence[int] | None = None, + chunks: Sequence[int] | Literal["auto"] = "auto", dtype: str = "uint16", - dimension_separator: DIMENSION_SEPARATOR = "/", - compressor="default", + dimension_separator: Literal[".", "/"] = "/", + compressors: CompressorLike = "auto", channel_labels: list[str] | None = None, channel_wavelengths: list[str] | None = None, channel_colors: Sequence[str] | None = None, channel_active: Sequence[bool] | None = None, overwrite: bool = False, - version: NgffVersions = DefaultNgffVersion, + ngff_version: NgffVersions = DefaultNgffVersion, ) -> OmeZarrContainer: """Create an empty OME-Zarr image with the given shape and metadata. @@ -874,7 +874,7 @@ def create_empty_ome_zarr( dtype (str, optional): The data type of the image. Defaults to "uint16". dimension_separator (DIMENSION_SEPARATOR): The dimension separator to use. Defaults to "/". - compressor: The compressor to use. Defaults to "default". + compressors (CompressorLike): The compressor to use. Defaults to "auto". channel_labels (list[str] | None, optional): The labels of the channels. Defaults to None. channel_wavelengths (list[str] | None, optional): The wavelengths of the @@ -885,7 +885,7 @@ def create_empty_ome_zarr( active. Defaults to None. overwrite (bool, optional): Whether to overwrite an existing image. Defaults to True. - version (NgffVersion, optional): The version of the OME-Zarr specification. + ngff_version (NgffVersion, optional): The version of the OME-Zarr specification. Defaults to DefaultNgffVersion. """ handler = create_empty_image_container( @@ -904,9 +904,9 @@ def create_empty_ome_zarr( chunks=chunks, dtype=dtype, dimension_separator=dimension_separator, - compressor=compressor, + compressors=compressors, overwrite=overwrite, - version=version, + ngff_version=ngff_version, ) ome_zarr = OmeZarrContainer(group_handler=handler) @@ -938,11 +938,11 @@ def create_ome_zarr_from_array( channel_colors: Sequence[str] | None = None, channel_active: Sequence[bool] | None = None, name: str | None = None, - chunks: Sequence[int] | None = None, - dimension_separator: DIMENSION_SEPARATOR = "/", - compressor: str = "default", + chunks: Sequence[int] | Literal["auto"] = "auto", + dimension_separator: Literal[".", "/"] = "/", + compressors: CompressorLike = "auto", overwrite: bool = False, - version: NgffVersions = DefaultNgffVersion, + ngff_version: NgffVersions = DefaultNgffVersion, ) -> OmeZarrContainer: """Create an OME-Zarr image from a numpy array. @@ -980,10 +980,10 @@ def create_ome_zarr_from_array( active. Defaults to None. dimension_separator (DIMENSION_SEPARATOR): The separator to use for dimensions. Defaults to "/". - compressor: The compressor to use. Defaults to "default". + compressors (CompressorLike): The compressors to use. Defaults to "auto". overwrite (bool, optional): Whether to overwrite an existing image. Defaults to True. - version (str, optional): The version of the OME-Zarr specification. + ngff_version (str, optional): The version of the OME-Zarr specification. Defaults to DefaultNgffVersion. """ handler = create_empty_image_container( @@ -1003,8 +1003,8 @@ def create_ome_zarr_from_array( dtype=str(array.dtype), overwrite=overwrite, dimension_separator=dimension_separator, - compressor=compressor, - version=version, + compressors=compressors, + ngff_version=ngff_version, ) ome_zarr = OmeZarrContainer(group_handler=handler) diff --git a/src/ngio/ome_zarr_meta/_meta_handlers.py b/src/ngio/ome_zarr_meta/_meta_handlers.py index 9f37d552..203cf196 100644 --- a/src/ngio/ome_zarr_meta/_meta_handlers.py +++ b/src/ngio/ome_zarr_meta/_meta_handlers.py @@ -21,6 +21,16 @@ v04_to_ngio_plate_meta, v04_to_ngio_well_meta, ) +from ngio.ome_zarr_meta.v05 import ( + ngio_to_v05_image_meta, + ngio_to_v05_label_meta, + ngio_to_v05_plate_meta, + ngio_to_v05_well_meta, + v05_to_ngio_image_meta, + v05_to_ngio_label_meta, + v05_to_ngio_plate_meta, + v05_to_ngio_well_meta, +) from ngio.utils import ( NgioValidationError, NgioValueError, @@ -678,17 +688,36 @@ def register_plate_ie( importer=v04_to_ngio_image_meta, exporter=ngio_to_v04_image_meta, ) + +ImplementedMetaImporterExporter().register_label_ie( + version="0.5", + importer=v05_to_ngio_label_meta, + exporter=ngio_to_v05_label_meta, +) + ImplementedMetaImporterExporter().register_label_ie( version="0.4", importer=v04_to_ngio_label_meta, exporter=ngio_to_v04_label_meta, ) +ImplementedMetaImporterExporter().register_image_ie( + version="0.5", + importer=v05_to_ngio_image_meta, + exporter=ngio_to_v05_image_meta, +) + ImplementedMetaImporterExporter().register_well_ie( version="0.4", importer=v04_to_ngio_well_meta, exporter=ngio_to_v04_well_meta ) +ImplementedMetaImporterExporter().register_well_ie( + version="0.5", importer=v05_to_ngio_well_meta, exporter=ngio_to_v05_well_meta +) ImplementedMetaImporterExporter().register_plate_ie( version="0.4", importer=v04_to_ngio_plate_meta, exporter=ngio_to_v04_plate_meta ) +ImplementedMetaImporterExporter().register_plate_ie( + version="0.5", importer=v05_to_ngio_plate_meta, exporter=ngio_to_v05_plate_meta +) ########################################################################### diff --git a/src/ngio/ome_zarr_meta/ngio_specs/_ngio_image.py b/src/ngio/ome_zarr_meta/ngio_specs/_ngio_image.py index d64a7b04..4a792ae2 100644 --- a/src/ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +++ b/src/ngio/ome_zarr_meta/ngio_specs/_ngio_image.py @@ -25,7 +25,7 @@ from ngio.utils import NgioValidationError, NgioValueError T = TypeVar("T") -NgffVersions = Literal["0.4"] +NgffVersions = Literal["0.4", "0.5"] DefaultNgffVersion: Literal["0.4"] = "0.4" @@ -146,6 +146,17 @@ def version(self) -> NgffVersions: """Version of the OME-NFF metadata used to build the object.""" return self._version # type: ignore (version is a Literal type) + @property + def zarr_format(self) -> Literal[2, 3]: + """Zarr version used to store the data.""" + match self.version: + case "0.4": + return 2 + case "0.5": + return 3 + case _: + raise NgioValueError(f"Unsupported NGFF version: {self.version}") + @property def name(self) -> str | None: """Name of the image.""" diff --git a/src/ngio/ome_zarr_meta/v04/_v04_spec_utils.py b/src/ngio/ome_zarr_meta/v04/_v04_spec_utils.py index 0f03c9dd..d1a70129 100644 --- a/src/ngio/ome_zarr_meta/v04/_v04_spec_utils.py +++ b/src/ngio/ome_zarr_meta/v04/_v04_spec_utils.py @@ -9,7 +9,6 @@ - A function to convert a ngio image metadata to a v04 image metadata. """ -from ome_zarr_models.common.multiscales import ValidTransform as ValidTransformV04 from ome_zarr_models.v04.axes import Axis as AxisV04 from ome_zarr_models.v04.coordinate_transformations import VectorScale as VectorScaleV04 from ome_zarr_models.v04.coordinate_transformations import ( @@ -20,6 +19,7 @@ from ome_zarr_models.v04.image_label import ImageLabelAttrs as LabelAttrsV04 from ome_zarr_models.v04.multiscales import Dataset as DatasetV04 from ome_zarr_models.v04.multiscales import Multiscale as MultiscaleV04 +from ome_zarr_models.v04.multiscales import ValidTransform as ValidTransformV04 from ome_zarr_models.v04.omero import Channel as ChannelV04 from ome_zarr_models.v04.omero import Omero as OmeroV04 from ome_zarr_models.v04.omero import Window as WindowV04 @@ -169,7 +169,7 @@ def _v04_to_ngio_datasets( unit = str(unit) axes.append( Axis( - name=v04_axis.name, + name=str(v04_axis.name), axis_type=AxisType(v04_axis.type), # (for some reason the type is a generic JsonValue, # but it should be a string or None) diff --git a/src/ngio/ome_zarr_meta/v05/__init__.py b/src/ngio/ome_zarr_meta/v05/__init__.py new file mode 100644 index 00000000..76ba934e --- /dev/null +++ b/src/ngio/ome_zarr_meta/v05/__init__.py @@ -0,0 +1,23 @@ +"""Utility to read/write OME-Zarr metadata v0.4.""" + +from ngio.ome_zarr_meta.v05._v05_spec_utils import ( + ngio_to_v05_image_meta, + ngio_to_v05_label_meta, + ngio_to_v05_plate_meta, + ngio_to_v05_well_meta, + v05_to_ngio_image_meta, + v05_to_ngio_label_meta, + v05_to_ngio_plate_meta, + v05_to_ngio_well_meta, +) + +__all__ = [ + "ngio_to_v05_image_meta", + "ngio_to_v05_label_meta", + "ngio_to_v05_plate_meta", + "ngio_to_v05_well_meta", + "v05_to_ngio_image_meta", + "v05_to_ngio_label_meta", + "v05_to_ngio_plate_meta", + "v05_to_ngio_well_meta", +] diff --git a/src/ngio/ome_zarr_meta/v05/_custom_models.py b/src/ngio/ome_zarr_meta/v05/_custom_models.py new file mode 100644 index 00000000..0019fbbd --- /dev/null +++ b/src/ngio/ome_zarr_meta/v05/_custom_models.py @@ -0,0 +1,18 @@ +from typing import Annotated + +from ome_zarr_models.v05.well import WellAttrs as WellAttrs05 +from ome_zarr_models.v05.well_types import WellImage as WellImage05 +from ome_zarr_models.v05.well_types import WellMeta as WellMeta05 +from pydantic import SkipValidation + + +class CustomWellImage(WellImage05): + path: Annotated[str, SkipValidation] + + +class CustomWellMeta(WellMeta05): + images: list[CustomWellImage] # type: ignore[valid-type] + + +class CustomWellAttrs(WellAttrs05): + well: CustomWellMeta # type: ignore[valid-type] diff --git a/src/ngio/ome_zarr_meta/v05/_v05_spec_utils.py b/src/ngio/ome_zarr_meta/v05/_v05_spec_utils.py new file mode 100644 index 00000000..04a560c0 --- /dev/null +++ b/src/ngio/ome_zarr_meta/v05/_v05_spec_utils.py @@ -0,0 +1,518 @@ +"""Utilities for OME-Zarr v05 specs. + +This module provides a set of classes to internally handle the metadata +of the OME-Zarr v05 specification. + +For Images and Labels implements the following functionalities: +- A function to find if a dict view of the metadata is a valid OME-Zarr v05 metadata. +- A function to convert a v05 image metadata to a ngio image metadata. +- A function to convert a ngio image metadata to a v05 image metadata. +""" + +from ome_zarr_models.common.omero import Channel as ChannelV05 +from ome_zarr_models.common.omero import Omero as OmeroV05 +from ome_zarr_models.common.omero import Window as WindowV05 +from ome_zarr_models.v05.axes import Axis as AxisV05 +from ome_zarr_models.v05.coordinate_transformations import VectorScale as VectorScaleV05 +from ome_zarr_models.v05.coordinate_transformations import ( + VectorTranslation as VectorTranslationV05, +) +from ome_zarr_models.v05.hcs import HCSAttrs as HCSAttrsV05 +from ome_zarr_models.v05.image import ImageAttrs as ImageAttrsV05 +from ome_zarr_models.v05.image_label import ImageLabelAttrs as LabelAttrsV05 +from ome_zarr_models.v05.multiscales import Dataset as DatasetV05 +from ome_zarr_models.v05.multiscales import Multiscale as MultiscaleV05 +from ome_zarr_models.v05.multiscales import ValidTransform as ValidTransformV05 +from pydantic import BaseModel, ValidationError + +from ngio.ome_zarr_meta.ngio_specs import ( + AxesHandler, + AxesSetup, + Axis, + AxisType, + Channel, + ChannelsMeta, + ChannelVisualisation, + Dataset, + ImageLabelSource, + NgioImageMeta, + NgioLabelMeta, + NgioPlateMeta, + NgioWellMeta, + default_channel_name, +) +from ngio.ome_zarr_meta.v05._custom_models import CustomWellAttrs as WellAttrsV05 + + +class ImageV05AttrsWithOmero(ImageAttrsV05): + omero: OmeroV05 | None = None + + +class ImageV05WithOmero(BaseModel): + ome: ImageV05AttrsWithOmero + + +class ImageLabelV05(BaseModel): + ome: LabelAttrsV05 + + +def _is_v05_image_meta(metadata: dict) -> ImageV05WithOmero | ValidationError: + """Check if the metadata is a valid OME-Zarr v05 metadata. + + Args: + metadata (dict): The metadata to check. + + Returns: + bool: True if the metadata is a valid OME-Zarr v05 metadata, False otherwise. + """ + try: + return ImageV05WithOmero(**metadata) + except ValidationError as e: + return e + + +def _is_v05_label_meta(metadata: dict) -> ImageLabelV05 | ValidationError: + """Check if the metadata is a valid OME-Zarr v05 metadata. + + Args: + metadata (dict): The metadata to check. + + Returns: + bool: True if the metadata is a valid OME-Zarr v05 metadata, False otherwise. + """ + try: + return ImageLabelV05(**metadata) + except ValidationError as e: + return e + + +def _v05_omero_to_channels(v05_omero: OmeroV05 | None) -> ChannelsMeta | None: + if v05_omero is None: + return None + + ngio_channels = [] + for idx, v05_channel in enumerate(v05_omero.channels): + channel_extra = v05_channel.model_extra + + if channel_extra is None: + channel_extra = {} + + if "label" in channel_extra: + label = channel_extra.pop("label") + else: + label = default_channel_name(idx) + + if "wavelength_id" in channel_extra: + wavelength_id = channel_extra.pop("wavelength_id") + else: + wavelength_id = label + + if "active" in channel_extra: + active = channel_extra.pop("active") + else: + active = True + + channel_visualisation = ChannelVisualisation( + color=v05_channel.color, + start=v05_channel.window.start, + end=v05_channel.window.end, + min=v05_channel.window.min, + max=v05_channel.window.max, + active=active, + **channel_extra, + ) + + ngio_channels.append( + Channel( + label=label, + wavelength_id=wavelength_id, + channel_visualisation=channel_visualisation, + ) + ) + + v05_omero_extra = v05_omero.model_extra if v05_omero.model_extra is not None else {} + return ChannelsMeta(channels=ngio_channels, **v05_omero_extra) + + +def _compute_scale_translation( + v05_transforms: ValidTransformV05, + scale: list[float], + translation: list[float], +) -> tuple[list[float], list[float]]: + for v05_transform in v05_transforms: + if isinstance(v05_transform, VectorScaleV05): + scale = [t1 * t2 for t1, t2 in zip(scale, v05_transform.scale, strict=True)] + + elif isinstance(v05_transform, VectorTranslationV05): + translation = [ + t1 + t2 + for t1, t2 in zip(translation, v05_transform.translation, strict=True) + ] + else: + raise NotImplementedError( + f"Coordinate transformation {v05_transform} is not supported." + ) + return scale, translation + + +def _v05_to_ngio_datasets( + v05_multiscale: MultiscaleV05, + axes_setup: AxesSetup, + allow_non_canonical_axes: bool = False, + strict_canonical_order: bool = True, +) -> list[Dataset]: + """Convert a v05 multiscale to a list of ngio datasets.""" + datasets = [] + + global_scale = [1.0] * len(v05_multiscale.axes) + global_translation = [0.0] * len(v05_multiscale.axes) + + if v05_multiscale.coordinateTransformations is not None: + global_scale, global_translation = _compute_scale_translation( + v05_multiscale.coordinateTransformations, global_scale, global_translation + ) + + # Prepare axes handler + axes = [] + for v05_axis in v05_multiscale.axes: + unit = v05_axis.unit + if unit is not None and not isinstance(unit, str): + unit = str(unit) + axes.append( + Axis( + name=str(v05_axis.name), + axis_type=AxisType(v05_axis.type), + # (for some reason the type is a generic JsonValue, + # but it should be a string or None) + unit=v05_axis.unit, # type: ignore + ) + ) + axes_handler = AxesHandler( + axes=axes, + axes_setup=axes_setup, + allow_non_canonical_axes=allow_non_canonical_axes, + strict_canonical_order=strict_canonical_order, + ) + + for v05_dataset in v05_multiscale.datasets: + _scale, _translation = _compute_scale_translation( + v05_dataset.coordinateTransformations, global_scale, global_translation + ) + datasets.append( + Dataset( + path=v05_dataset.path, + axes_handler=axes_handler, + scale=_scale, + translation=_translation, + ) + ) + return datasets + + +def v05_to_ngio_image_meta( + metadata: dict, + axes_setup: AxesSetup | None = None, + allow_non_canonical_axes: bool = False, + strict_canonical_order: bool = True, +) -> tuple[bool, NgioImageMeta | ValidationError]: + """Convert a v05 image metadata to a ngio image metadata. + + Args: + metadata (dict): The v05 image metadata. + axes_setup (AxesSetup, optional): The axes setup. This is + required to convert image with non-canonical axes names. + allow_non_canonical_axes (bool, optional): Allow non-canonical axes. + strict_canonical_order (bool, optional): Strict canonical order. + + Returns: + NgioImageMeta: The ngio image metadata. + """ + v05_image = _is_v05_image_meta(metadata) + if isinstance(v05_image, ValidationError): + return False, v05_image + v05_image = v05_image.ome + if len(v05_image.multiscales) > 1: + raise NotImplementedError( + "Multiple multiscales in a single image are not supported in ngio." + ) + + v05_multiscale = v05_image.multiscales[0] + + channels_meta = _v05_omero_to_channels(v05_image.omero) + axes_setup = axes_setup if axes_setup is not None else AxesSetup() + datasets = _v05_to_ngio_datasets( + v05_multiscale, + axes_setup=axes_setup, + allow_non_canonical_axes=allow_non_canonical_axes, + strict_canonical_order=strict_canonical_order, + ) + + name = v05_multiscale.name + if name is not None and not isinstance(name, str): + name = str(name) + return True, NgioImageMeta( + version="0.5", + name=name, + datasets=datasets, + channels=channels_meta, + ) + + +def v05_to_ngio_label_meta( + metadata: dict, + axes_setup: AxesSetup | None = None, + allow_non_canonical_axes: bool = False, + strict_canonical_order: bool = True, +) -> tuple[bool, NgioLabelMeta | ValidationError]: + """Convert a v05 image metadata to a ngio image metadata. + + Args: + metadata (dict): The v05 image metadata. + axes_setup (AxesSetup, optional): The axes setup. This is + required to convert image with non-canonical axes names. + allow_non_canonical_axes (bool, optional): Allow non-canonical axes. + strict_canonical_order (bool, optional): Strict canonical order. + + Returns: + NgioImageMeta: The ngio image metadata. + """ + v05_label = _is_v05_label_meta(metadata) + if isinstance(v05_label, ValidationError): + return False, v05_label + v05_label = v05_label.ome + + if len(v05_label.multiscales) > 1: + raise NotImplementedError( + "Multiple multiscales in a single image are not supported in ngio." + ) + + v05_multiscale = v05_label.multiscales[0] + + axes_setup = axes_setup if axes_setup is not None else AxesSetup() + datasets = _v05_to_ngio_datasets( + v05_multiscale, + axes_setup=axes_setup, + allow_non_canonical_axes=allow_non_canonical_axes, + strict_canonical_order=strict_canonical_order, + ) + + if v05_label.image_label is not None: + source = v05_label.image_label.source + if source is None: + image_label_source = None + else: + source = v05_label.image_label.source + if source is None: + image_label_source = None + else: + image_label_source = source.image + image_label_source = ImageLabelSource( + version="0.5", + source={"image": image_label_source}, + ) + else: + image_label_source = None + name = v05_multiscale.name + if name is not None and not isinstance(name, str): + name = str(name) + + return True, NgioLabelMeta( + version="0.5", + name=name, + datasets=datasets, + image_label=image_label_source, + ) + + +def _ngio_to_v05_multiscale(name: str | None, datasets: list[Dataset]) -> MultiscaleV05: + """Convert a ngio multiscale to a v05 multiscale. + + Args: + name (str | None): The name of the multiscale. + datasets (list[Dataset]): The ngio datasets. + + Returns: + MultiscaleV05: The v05 multiscale. + """ + ax_mapper = datasets[0].axes_handler + v05_axes = [] + for axis in ax_mapper.axes: + v05_axes.append( + AxisV05( + name=axis.name, + type=axis.axis_type.value if axis.axis_type is not None else None, + unit=axis.unit if axis.unit is not None else None, + ) + ) + + v05_datasets = [] + for dataset in datasets: + transform = [VectorScaleV05(type="scale", scale=list(dataset._scale))] + if sum(dataset._translation) > 0: + transform = ( + VectorScaleV05(type="scale", scale=list(dataset._scale)), + VectorTranslationV05( + type="translation", translation=list(dataset._translation) + ), + ) + else: + transform = (VectorScaleV05(type="scale", scale=list(dataset._scale)),) + + v05_datasets.append( + DatasetV05(path=dataset.path, coordinateTransformations=transform) + ) + return MultiscaleV05(axes=v05_axes, datasets=tuple(v05_datasets), name=name) + + +def _ngio_to_v05_omero(channels: ChannelsMeta | None) -> OmeroV05 | None: + """Convert a ngio channels to a v05 omero.""" + if channels is None: + return None + + v05_channels = [] + for channel in channels.channels: + _model_extra = { + "label": channel.label, + "wavelength_id": channel.wavelength_id, + "active": channel.channel_visualisation.active, + } + if channel.channel_visualisation.model_extra is not None: + _model_extra.update(channel.channel_visualisation.model_extra) + + v05_channels.append( + ChannelV05( + color=channel.channel_visualisation.valid_color, + window=WindowV05( + start=channel.channel_visualisation.start, + end=channel.channel_visualisation.end, + min=channel.channel_visualisation.min, + max=channel.channel_visualisation.max, + ), + **_model_extra, + ) + ) + + _model_extra = channels.model_extra if channels.model_extra is not None else {} + return OmeroV05(channels=v05_channels, **_model_extra) + + +def ngio_to_v05_image_meta(metadata: NgioImageMeta) -> dict: + """Convert a ngio image metadata to a v05 image metadata. + + Args: + metadata (NgioImageMeta): The ngio image metadata. + + Returns: + dict: The v05 image metadata. + """ + v05_muliscale = _ngio_to_v05_multiscale( + name=metadata.name, datasets=metadata.datasets + ) + v05_omero = _ngio_to_v05_omero(metadata._channels_meta) + + v05_image_attrs = ImageV05AttrsWithOmero( + multiscales=[v05_muliscale], omero=v05_omero, version="0.5" + ) + v05_image = ImageV05WithOmero( + ome=v05_image_attrs, + ) + return v05_image.model_dump(exclude_none=True, by_alias=True) + + +def ngio_to_v05_label_meta(metadata: NgioLabelMeta) -> dict: + """Convert a ngio image metadata to a v05 image metadata. + + Args: + metadata (NgioImageMeta): The ngio image metadata. + + Returns: + dict: The v05 image metadata. + """ + v05_muliscale = _ngio_to_v05_multiscale( + name=metadata.name, datasets=metadata.datasets + ) + labels_meta = { + "multiscales": [v05_muliscale], + "image-label": metadata.image_label.model_dump(), + } + v05_label = LabelAttrsV05(**labels_meta, version="0.5") + v05_label = ImageLabelV05( + ome=v05_label, + ) + return v05_label.model_dump(exclude_none=True, by_alias=True) + + +class WellV05(BaseModel): + ome: WellAttrsV05 + + +class HCSV05(BaseModel): + ome: HCSAttrsV05 + + +def v05_to_ngio_well_meta( + metadata: dict, +) -> tuple[bool, NgioWellMeta | ValidationError]: + """Convert a v05 well metadata to a ngio well metadata. + + Args: + metadata (dict): The v05 well metadata. + + Returns: + result (bool): True if the conversion was successful, False otherwise. + ngio_well_meta (NgioWellMeta): The ngio well metadata. + """ + try: + v05_well = WellV05(**metadata) + except ValidationError as e: + return False, e + + return True, NgioWellMeta(**v05_well.ome.model_dump()) + + +def v05_to_ngio_plate_meta( + metadata: dict, +) -> tuple[bool, NgioPlateMeta | ValidationError]: + """Convert a v05 plate metadata to a ngio plate metadata. + + Args: + metadata (dict): The v05 plate metadata. + + Returns: + result (bool): True if the conversion was successful, False otherwise. + ngio_plate_meta (NgioPlateMeta): The ngio plate metadata. + """ + try: + v05_plate = HCSV05(**metadata) + except ValidationError as e: + return False, e + + return True, NgioPlateMeta(**v05_plate.ome.model_dump()) + + +def ngio_to_v05_well_meta(metadata: NgioWellMeta) -> dict: + """Convert a ngio well metadata to a v05 well metadata. + + Args: + metadata (NgioWellMeta): The ngio well metadata. + + Returns: + dict: The v05 well metadata. + """ + v05_well = WellAttrsV05(**metadata.model_dump()) + v05_well = WellV05(ome=v05_well) + return v05_well.model_dump(exclude_none=True, by_alias=True) + + +def ngio_to_v05_plate_meta(metadata: NgioPlateMeta) -> dict: + """Convert a ngio plate metadata to a v05 plate metadata. + + Args: + metadata (NgioPlateMeta): The ngio plate metadata. + + Returns: + dict: The v05 plate metadata. + """ + v05_plate = HCSAttrsV05(**metadata.model_dump()) + v05_plate = HCSV05(ome=v05_plate) + return v05_plate.model_dump(exclude_none=True, by_alias=True) diff --git a/src/ngio/tables/backends/_abstract_backend.py b/src/ngio/tables/backends/_abstract_backend.py index ad02bcc5..906980ea 100644 --- a/src/ngio/tables/backends/_abstract_backend.py +++ b/src/ngio/tables/backends/_abstract_backend.py @@ -198,6 +198,13 @@ def write_metadata(self, metadata: dict | None = None) -> None: if metadata is None: metadata = {} + attrs = self._group_handler.reopen_group().attrs.asdict() + # This is required by anndata to identify the format + if "encoding-type" in attrs: + metadata["encoding-type"] = attrs["encoding-type"] + if "encoding-version" in attrs: + metadata["encoding-version"] = attrs["encoding-version"] + backend_metadata = BackendMeta( backend=self.backend_name(), index_key=self.index_key, diff --git a/src/ngio/tables/backends/_anndata.py b/src/ngio/tables/backends/_anndata.py index fad51982..56495b6f 100644 --- a/src/ngio/tables/backends/_anndata.py +++ b/src/ngio/tables/backends/_anndata.py @@ -1,4 +1,5 @@ from anndata import AnnData +from anndata._settings import settings from pandas import DataFrame from polars import DataFrame as PolarsDataFrame from polars import LazyFrame @@ -40,6 +41,7 @@ def implements_polars() -> bool: def load_as_anndata(self) -> AnnData: """Load the table as an AnnData object.""" + settings.zarr_write_format = self._group_handler.zarr_format anndata = custom_anndata_read_zarr(self._group_handler._group) anndata = normalize_anndata(anndata, index_key=self.index_key) return anndata @@ -58,7 +60,8 @@ def write_from_anndata(self, table: AnnData) -> None: "Please make sure to use a compatible " "store like a zarr.DirectoryStore." ) - table.write_zarr(full_url) # type: ignore (AnnData writer requires a str path) + settings.zarr_write_format = self._group_handler.zarr_format + table.write_zarr(full_url) def write_from_pandas(self, table: DataFrame) -> None: """Serialize the table from a pandas DataFrame.""" diff --git a/src/ngio/tables/backends/_anndata_utils.py b/src/ngio/tables/backends/_anndata_utils.py index 86c5968d..5e7bfcc9 100644 --- a/src/ngio/tables/backends/_anndata_utils.py +++ b/src/ngio/tables/backends/_anndata_utils.py @@ -9,6 +9,7 @@ from anndata._io.zarr import read_dataframe from anndata.compat import _clean_uns from anndata.experimental import read_dispatched +from zarr.storage import LocalStore from ngio.utils import ( NgioValueError, @@ -35,7 +36,7 @@ def custom_anndata_read_zarr( """ group = open_group_wrapper(store=store, mode="r") - if not isinstance(group.store, zarr.DirectoryStore): + if not isinstance(group.store, LocalStore): elem_to_read = ["X", "obs", "var"] if elem_to_read is None: @@ -87,6 +88,8 @@ def callback(func: Callable, elem_name: str, elem: Any, iospec: Any) -> Any: if isinstance(group["obs"], zarr.Array): _clean_uns(adata) + if isinstance(adata, dict): + adata = AnnData(**adata) if not isinstance(adata, AnnData): raise NgioValueError(f"Expected an AnnData object, but got {type(adata)}") return adata diff --git a/src/ngio/tables/backends/_non_zarr_backends.py b/src/ngio/tables/backends/_non_zarr_backends.py index 155aa889..ae60039a 100644 --- a/src/ngio/tables/backends/_non_zarr_backends.py +++ b/src/ngio/tables/backends/_non_zarr_backends.py @@ -5,7 +5,7 @@ from pandas import DataFrame from polars import DataFrame as PolarsDataFrame from polars import LazyFrame -from zarr.storage import DirectoryStore, FSStore +from zarr.storage import FsspecStore, LocalStore from ngio.tables.backends._abstract_backend import AbstractTableBackend from ngio.tables.backends._utils import normalize_pandas_df, normalize_polars_lf @@ -88,9 +88,9 @@ def _load_from_fs_store_lf(self, reader): def load_as_pandas_df(self) -> DataFrame: """Load the table as a pandas DataFrame.""" store = self._group_handler.store - if isinstance(store, DirectoryStore): + if isinstance(store, LocalStore): dataframe = self._load_from_directory_store(reader=self.df_reader) - elif isinstance(store, FSStore): + elif isinstance(store, FsspecStore): dataframe = self._load_from_fs_store_df(reader=self.df_reader) else: ext = self.table_name.split(".")[-1] @@ -117,9 +117,9 @@ def load(self) -> DataFrame: def load_as_polars_lf(self) -> LazyFrame: """Load the table as a polars LazyFrame.""" store = self._group_handler.store - if isinstance(store, DirectoryStore): + if isinstance(store, LocalStore): lazy_frame = self._load_from_directory_store(reader=self.lf_reader) - elif isinstance(store, FSStore): + elif isinstance(store, FsspecStore): lazy_frame = self._load_from_fs_store_lf(reader=self.lf_reader) else: ext = self.table_name.split(".")[-1] @@ -146,7 +146,7 @@ def load_as_polars_lf(self) -> LazyFrame: def _get_store_url(self) -> str: """Get the store URL.""" store = self._group_handler.store - if isinstance(store, DirectoryStore): + if isinstance(store, LocalStore): full_url = self._group_handler.full_url else: ext = self.table_name.split(".")[-1] diff --git a/src/ngio/tables/backends/_utils.py b/src/ngio/tables/backends/_utils.py index e3698854..101483cf 100644 --- a/src/ngio/tables/backends/_utils.py +++ b/src/ngio/tables/backends/_utils.py @@ -403,7 +403,7 @@ def convert_anndata_to_pandas( DataFrame: Converted and normalized pandas DataFrame. """ pandas_df = anndata.to_df() - pandas_df[anndata.obs_keys()] = anndata.obs + pandas_df[anndata.obs.columns.to_list()] = anndata.obs pandas_df = normalize_pandas_df( pandas_df, index_key=index_key, diff --git a/src/ngio/utils/_zarr_utils.py b/src/ngio/utils/_zarr_utils.py index a94ca622..2a35729e 100644 --- a/src/ngio/utils/_zarr_utils.py +++ b/src/ngio/utils/_zarr_utils.py @@ -6,9 +6,10 @@ import fsspec import zarr from filelock import BaseFileLock, FileLock -from zarr.errors import ContainsGroupError, GroupNotFoundError -from zarr.storage import DirectoryStore, FSStore, MemoryStore, Store, StoreLike -from zarr.types import DIMENSION_SEPARATOR +from zarr.abc.store import Store +from zarr.core.array import CompressorLike +from zarr.errors import ContainsGroupError +from zarr.storage import FsspecStore, LocalStore, MemoryStore from ngio.utils import NgioFileExistsError, NgioFileNotFoundError, NgioValueError from ngio.utils._errors import NgioError @@ -18,7 +19,7 @@ # but to make sure we can handle the store correctly # we need to be more restrictive NgioSupportedStore = ( - str | Path | fsspec.mapping.FSMap | FSStore | DirectoryStore | MemoryStore + str | Path | fsspec.mapping.FSMap | FsspecStore | MemoryStore | LocalStore ) GenericStore = Store | NgioSupportedStore StoreOrGroup = GenericStore | zarr.Group @@ -37,25 +38,29 @@ def _check_store(store) -> NgioSupportedStore: def _check_group(group: zarr.Group, mode: AccessModeLiteral) -> zarr.Group: """Check the group and return a valid group.""" - is_read_only = getattr(group, "_read_only", False) - if is_read_only and mode in ["w", "w-"]: + if group.read_only and mode in ["w", "w-"]: raise NgioValueError( "The group is read only. Cannot open in write mode ['w', 'w-']" ) - if mode == "r" and not is_read_only: + if mode == "r" and not group.read_only: # let's make sure we don't accidentally write to the group group = zarr.open_group(store=group.store, path=group.path, mode="r") return group -def open_group_wrapper(store: StoreOrGroup, mode: AccessModeLiteral) -> zarr.Group: +def open_group_wrapper( + store: StoreOrGroup, + mode: AccessModeLiteral, + zarr_format: Literal[2, 3] | None = None, +) -> zarr.Group: """Wrapper around zarr.open_group with some additional checks. Args: store (StoreOrGroup): The store or group to open. - mode (ReadOrEdirLiteral): The mode to open the group in. + mode (AccessModeLiteral): The mode to open the group in. + zarr_format (int): The Zarr format version to use. Returns: zarr.Group: The opened Zarr group. @@ -67,16 +72,21 @@ def open_group_wrapper(store: StoreOrGroup, mode: AccessModeLiteral) -> zarr.Gro try: _check_store(store) - group = zarr.open_group(store=store, mode=mode) + group = zarr.open_group(store=store, mode=mode, zarr_format=zarr_format) - except ContainsGroupError as e: + except FileExistsError as e: raise NgioFileExistsError( f"A Zarr group already exists at {store}, consider setting overwrite=True." ) from e - except GroupNotFoundError as e: + except FileNotFoundError as e: raise NgioFileNotFoundError(f"No Zarr group found at {store}") from e + except ContainsGroupError as e: + raise NgioFileExistsError( + f"A Zarr group already exists at {store}, consider setting overwrite=True." + ) from e + return group @@ -86,6 +96,7 @@ class ZarrGroupHandler: def __init__( self, store: StoreOrGroup, + zarr_format: Literal[2, 3] | None = None, cache: bool = False, mode: AccessModeLiteral = "a", parallel_safe: bool = False, @@ -96,6 +107,7 @@ def __init__( Args: store (StoreOrGroup): The Zarr store or group containing the image data. meta_mode (str): The mode of the metadata handler. + zarr_format (int): The Zarr format version to use. cache (bool): Whether to cache the metadata. mode (str): The mode of the store. parallel_safe (bool): If True, the handler will create a lock file to make @@ -112,20 +124,24 @@ def __init__( "If you want to use the lock mechanism, you should not use the cache." ) - group = open_group_wrapper(store, mode) + group = open_group_wrapper(store=store, mode=mode, zarr_format=zarr_format) _store = group.store # Make sure the cache is set in the attrs # in the same way as the cache in the handler - group.attrs.cache = cache + + ## TODO + # Figure out how to handle the cache in the new zarr version + # group.attrs.cache = cache if parallel_safe: - if not isinstance(_store, DirectoryStore): + if not isinstance(_store, LocalStore): raise NgioValueError( - "The store needs to be a DirectoryStore to use the lock mechanism. " + "The store needs to be a LocalStore to use the lock mechanism. " f"Instead, got {_store.__class__.__name__}." ) - store_path = Path(_store.path) / group.path + + store_path = _store.root / group.path self._lock_path = store_path.with_suffix(".lock") self._lock = FileLock(self._lock_path, timeout=10) @@ -148,23 +164,28 @@ def __repr__(self) -> str: ) @property - def store(self) -> StoreLike: + def store(self) -> Store: """Return the store of the group.""" - return self.group.store + return self._group.store @property def full_url(self) -> str | None: """Return the store path.""" - if isinstance(self.store, DirectoryStore | FSStore): - _store_path = str(self.store.path) - _store_path = _store_path.rstrip("/") - return f"{self.store.path}/{self._group.path}" + if isinstance(self.store, LocalStore): + return (self.store.root / self.group.path).as_posix() + if isinstance(self.store, FsspecStore): + return self.store.fs.map.root_path return None + @property + def zarr_format(self) -> Literal[2, 3]: + """Return the Zarr format version.""" + return self._group.metadata.zarr_format + @property def mode(self) -> AccessModeLiteral: """Return the mode of the group.""" - return self._mode # type: ignore (return type is Literal) + return self._mode # type: ignore @property def lock(self) -> BaseFileLock: @@ -195,9 +216,30 @@ def remove_lock(self) -> None: raise NgioValueError("The lock is still in use. Cannot remove it.") + def reopen_group(self) -> zarr.Group: + """Reopen the group. + + This is useful when the group has been modified + outside of the handler. + """ + if self.mode == "r": + mode = "r" + else: + mode = "r+" + return zarr.open_group( + store=self._group.store, + path=self._group.path, + mode=mode, + zarr_format=self._group.metadata.zarr_format, + ) + @property def group(self) -> zarr.Group: """Return the group.""" + if self._parallel_safe: + # If we are parallel safe, we need to reopen the group + # to make sure that the attributes are up to date + return self.reopen_group() return self._group def add_to_cache(self, key: str, value: object) -> None: @@ -229,8 +271,7 @@ def load_attrs(self) -> dict: def _write_attrs(self, attrs: dict, overwrite: bool = False) -> None: """Write the metadata to the store.""" - is_read_only = getattr(self._group, "_read_only", False) - if is_read_only: + if self.group.read_only: raise NgioValueError("The group is read only. Cannot write metadata.") # we need to invalidate the current attrs cache @@ -342,23 +383,34 @@ def create_array( path: str, shape: tuple[int, ...], dtype: str, - chunks: tuple[int, ...] | None = None, - dimension_separator: DIMENSION_SEPARATOR = "/", - compressor: str = "default", + chunks: tuple[int, ...] | Literal["auto"] = "auto", + compressors: CompressorLike = "auto", + separator: Literal[".", "/"] = "/", overwrite: bool = False, ) -> zarr.Array: if self.mode == "r": raise NgioValueError("Cannot create an array in read only mode.") + if self.zarr_format == 2: + chunks_encoding = { + "name": "v2", + "separator": separator, + } + else: + chunks_encoding = { + "name": "default", + "separator": separator, + } + try: - return self.group.zeros( + return self.group.create_array( name=path, shape=shape, dtype=dtype, chunks=chunks, - dimension_separator=dimension_separator, - compressor=compressor, + chunk_key_encoding=chunks_encoding, overwrite=overwrite, + compressors=compressors, ) except ContainsGroupError as e: raise NgioFileExistsError( @@ -382,6 +434,7 @@ def derive_handler( group = self.get_group(path, create_mode=True, overwrite=overwrite) return ZarrGroupHandler( store=group, + zarr_format=self.zarr_format, cache=self.use_cache, mode=self.mode, parallel_safe=self._parallel_safe, @@ -413,3 +466,26 @@ def copy_handler(self, handler: "ZarrGroupHandler") -> None: f"Error copying group to {handler.full_url}, " f"#{n_skipped} files where skipped." ) + + +def find_dimension_separator(array: zarr.Array) -> Literal[".", "/"]: + """Find the dimension separator used in the Zarr store. + + Args: + array (zarr.Array): The Zarr array to check. + + Returns: + Literal[".", "/"]: The dimension separator used in the store. + """ + from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding + + if array.metadata.zarr_format == 2: + separator = array.metadata.dimension_separator + else: + separator = array.metadata.chunk_key_encoding + if not isinstance(separator, DefaultChunkKeyEncoding): + raise ValueError( + "Only DefaultChunkKeyEncoding is supported in this example." + ) + separator = separator.separator + return separator diff --git a/tests/unit/common/test_pyramid.py b/tests/unit/common/test_pyramid.py index 815e55fd..9329be17 100644 --- a/tests/unit/common/test_pyramid.py +++ b/tests/unit/common/test_pyramid.py @@ -22,9 +22,9 @@ def test_on_disk_zooms( tmp_path: Path, order: InterpolationOrder, mode: Literal["dask", "numpy", "coarsen"] ): source = tmp_path / "source.zarr" - source_array = zarr.open_array(source, shape=(16, 128, 128), dtype="uint8") + source_array = zarr.create_array(source, shape=(16, 128, 128), dtype="uint8") target = tmp_path / "target.zarr" - target_array = zarr.open_array(target, shape=(16, 64, 64), dtype="uint8") + target_array = zarr.create_array(target, shape=(16, 64, 64), dtype="uint8") on_disk_zoom(source_array, target_array, order=order, mode=mode) diff --git a/tests/unit/tables/test_backends.py b/tests/unit/tables/test_backends.py index 71dc3690..7663d503 100644 --- a/tests/unit/tables/test_backends.py +++ b/tests/unit/tables/test_backends.py @@ -155,7 +155,7 @@ def test_parquet_backend(tmp_path: Path): def test_anndata_backend(tmp_path: Path): store = tmp_path / "test_anndata_backend.zarr" - handler = ZarrGroupHandler(store=store, cache=True, mode="a") + handler = ZarrGroupHandler(store=store, cache=True, mode="a", zarr_format=2) backend = AnnDataBackend() backend.set_group_handler(handler, index_type="int") diff --git a/tests/unit/utils/test_zarr_utils.py b/tests/unit/utils/test_zarr_utils.py index 772d5937..f42ad7ac 100644 --- a/tests/unit/utils/test_zarr_utils.py +++ b/tests/unit/utils/test_zarr_utils.py @@ -7,6 +7,7 @@ import numpy as np import pytest import zarr +from zarr.storage import LocalStore from ngio.utils import ( NgioFileExistsError, @@ -23,8 +24,8 @@ def test_group_handler_creation(tmp_path: Path, cache: bool): handler = ZarrGroupHandler(store=store, cache=cache, mode="a") _store = handler.group.store - assert isinstance(_store, zarr.DirectoryStore) - assert Path(_store.path) == store + assert isinstance(_store, LocalStore) + assert Path(_store.root.as_posix()) == store assert handler.use_cache == cache attrs = handler.load_attrs() @@ -65,12 +66,12 @@ def test_group_handler_from_group(tmp_path: Path): def test_group_handler_read(tmp_path: Path): store = tmp_path / "test_group_handler_read.zarr" - group = zarr.group(store=store, overwrite=True) + group = zarr.create_group(store=store, overwrite=True) input_attrs = {"a": 1, "b": 2, "c": 3} group.attrs.update(input_attrs) group.create_group("group1") - group.create_dataset("array1", shape=(10, 10), dtype="int32") + group.create_array("array1", shape=(10, 10), dtype="int32") handler = ZarrGroupHandler(store=store, cache=True, mode="r") @@ -97,10 +98,10 @@ def test_group_handler_read(tmp_path: Path): def test_open_fail(tmp_path: Path): store = tmp_path / "test_open_fail.zarr" - group = zarr.group(store=store, overwrite=True) + group = zarr.create_group(store=store, overwrite=True) read_only_group = open_group_wrapper(store=group, mode="r") - assert read_only_group._read_only + assert read_only_group.read_only with pytest.raises(NgioFileExistsError): open_group_wrapper(store=store, mode="w-")