diff --git a/pyproject.toml b/pyproject.toml index 1143cab94..028d37cc8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,8 @@ dependencies = [ "tifffile!=2022.4.22", "tqdm>=4.50.2", "validators>=0.18.2", + "centrosome>=1.2.3", + "cp-measure>=0.1.4" "xarray>=2024.10.0", "zarr>=2.6.1,<3.0.0", "spatialdata>=0.2.5", diff --git a/src/squidpy/im/__init__.py b/src/squidpy/im/__init__.py index fcf54cf94..e0de6618e 100644 --- a/src/squidpy/im/__init__.py +++ b/src/squidpy/im/__init__.py @@ -3,7 +3,7 @@ from __future__ import annotations from squidpy.im._container import ImageContainer -from squidpy.im._feature import calculate_image_features +from squidpy.im._feature import calculate_image_features, quantify_morphology from squidpy.im._process import process from squidpy.im._segment import ( SegmentationCustom, diff --git a/src/squidpy/im/_feature.py b/src/squidpy/im/_feature.py index 9694ac48e..3c2a76877 100644 --- a/src/squidpy/im/_feature.py +++ b/src/squidpy/im/_feature.py @@ -1,24 +1,142 @@ from __future__ import annotations -from collections.abc import Mapping, Sequence +import typing +import warnings +from collections.abc import Generator, Mapping, Sequence from types import MappingProxyType -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Callable, TypeVar, Union +import numpy as np +import numpy.typing as npt import pandas as pd +import spatialdata as sd +import xarray as xr from anndata import AnnData from scanpy import logging as logg +from skimage.measure import perimeter, regionprops +from spatialdata import SpatialData +from spatialdata.models import Image2DModel, Labels2DModel, TableModel +import squidpy._utils from squidpy._constants._constants import ImageFeature from squidpy._docs import d, inject_docs from squidpy._utils import Signal, SigQueue, _get_n_cores, parallelize from squidpy.gr._utils import _save_data +from squidpy.im import _measurements from squidpy.im._container import ImageContainer -__all__ = ["calculate_image_features"] +__all__ = ["calculate_image_features", "quantify_morphology"] + +IntegerNDArrayType = TypeVar("IntegerNDArrayType", bound=npt.NDArray[np.integer[Any]]) +FloatNDArrayType = TypeVar("FloatNDArrayType", bound=npt.NDArray[np.floating[Any]]) +RegionPropsCallableType = Callable[ + [IntegerNDArrayType, FloatNDArrayType], Union[Union[int, float, list[Union[int, float]]]] +] + +RegionPropsImageCallableType = Callable[[IntegerNDArrayType, FloatNDArrayType], dict[str, Union[int, float]]] + + +def circularity(regionmask: IntegerNDArrayType) -> float: + """ + Calculate the circularity of the region. + + :param regionmask: Region properties object + :return: circularity of the region + """ + perim = perimeter(regionmask) + if perim == 0: + return 0 + area = np.sum(regionmask) + return float((4 * np.pi * area) / (perim**2)) + + +def _get_region_props( + label_element: xr.DataArray, + image_element: xr.DataArray, + props: list[str] | None = None, + extra_methods: list[RegionPropsCallableType] | None = None, +) -> pd.DataFrame: + if not extra_methods: + extra_methods = [] + if props is None: + # if we didn't get any properties, we'll do the bare minimum + props = ["label"] + + np_rgb_image = image_element.values.transpose(1, 2, 0) # (c, y, x) -> (y, x, c) + + # Add custom extra methods here + # Add additional measurements here that handle individual regions + rp_extra_methods = [ + circularity, + _measurements.granularity, + _measurements.radial_distribution, + _measurements.calculate_image_texture, + _measurements.calculate_histogram, + _measurements.calculate_quantiles, + ] + extra_methods + + # Add additional measurements here that calculate on the entire label image + image_extra_methods = [ + _measurements.border_occupied_factor, + _measurements.zernike, + ] # type: list[RegionPropsImageCallableType] + image_extra_methods = {method.__name__: method for method in image_extra_methods} + + # can't use regionprops_table because it only returns int + regions = regionprops( + label_image=label_element.values, + intensity_image=np_rgb_image, + extra_properties=rp_extra_methods, + ) + # dynamically extract specified properties and create a df + extracted_props = {prop: [] for prop in props + [e.__name__ for e in extra_methods]} # type: dict[str, list[int | float]] + for prop in props + [e.__name__ for e in extra_methods]: + if prop in image_extra_methods: + im_extra_result = image_extra_methods[prop](label_element.values, np_rgb_image) + for region in regions: + extracted_props[prop].append(im_extra_result.get(region.label)) + continue + + for region in regions: + try: + extracted_props[prop].append(getattr(region, prop)) + except AttributeError as e: + raise ValueError(f"Property '{prop}' is not available in the region properties.") from e + + return pd.DataFrame(extracted_props) + + +def _subset_image_using_label( + label_element: xr.DataArray, image_element: xr.DataArray +) -> Generator[tuple[int, xr.DataArray], None, None]: + """ + A generator that extracts subsets of the RGB image based on the bitmap. + + :param label_element: xarray.DataArray with cell identifiers + :param image_element: xarray.DataArray with RGB image data + :yield: Subsets of the RGB image corresponding to each cell in the bitmap + """ + unique_cells = np.unique(label_element.values) + + for cell_id in unique_cells: + if cell_id == 0: + # Assuming 0 is the background or non-cell area, skip it + continue + + cell_mask = xr.DataArray( + label_element.values == cell_id, + dims=label_element.dims, + coords=label_element.coords, + ) + + subset = image_element.where(cell_mask, drop=True) + + yield cell_id, subset @d.dedent @inject_docs(f=ImageFeature) +@squidpy._utils.deprecated def calculate_image_features( adata: AnnData, img: ImageContainer, @@ -94,7 +212,15 @@ def calculate_image_features( n_jobs=n_jobs, backend=backend, show_progress_bar=show_progress_bar, - )(adata, img, layer=layer, library_id=library_id, features=features, features_kwargs=features_kwargs, **kwargs) + )( + adata, + img, + layer=layer, + library_id=library_id, + features=features, + features_kwargs=features_kwargs, + **kwargs, + ) if copy: logg.info("Finish", time=start) @@ -103,6 +229,270 @@ def calculate_image_features( _save_data(adata, attr="obsm", key=key_added, data=res, time=start) +def sdata_image_features_helper( + adata: AnnData, + img: ImageContainer, + layer: str | None = None, + library_id: str | Sequence[str] | None = None, + features: str | Sequence[str] = ImageFeature.SUMMARY.s, + features_kwargs: Mapping[str, Mapping[str, Any]] = MappingProxyType({}), + key_added: str = "img_features", + copy: bool = False, + n_jobs: int | None = None, + backend: str = "loky", + show_progress_bar: bool = True, + return_results_as_new_adata: bool = False, + **kwargs: Any, +) -> pd.DataFrame | AnnData | None: + segmentation_kwargs = features_kwargs.get("segmentation", None) + if segmentation_kwargs is None: + raise ValueError( + "A segmentation layer with its " + "`features_kwargs['segmentation']['label_layer']` " + "is necessary for the morphology quantification" + ) + + label_layer = segmentation_kwargs.get("label_layer", None) + if label_layer is None: + raise ValueError( + "`features_kwargs['segmentation']['label_layer']` " "is necessary for the morphology quantification" + ) + + image_layer = layer + if image_layer is None: + raise ValueError( + "A `layer` name for the image layer in the ImageContainer " "is necessary for the morphology quantification" + ) + + adata.uns.update( + {"spatialdata_attrs": {"region": label_layer, "region_key": "region", "instance_key": "instance_id"}} + ) + + adata.obs["region"] = label_layer + adata.obs["instance_id"] = -1 + # adata.uns["spatialdata_attrs"]["region"] = label_layer + # adata.uns["spatialdata_attrs"]["region_key"] = "region" + # adata.uns["spatialdata_attrs"]["instance_key"] = "instance_id" + + try: + sdata = SpatialData( + images={ + image_layer: Image2DModel.parse( + data=img[image_layer].rename({"channels": "c"}).transpose("z", "c", "y", "x")[0, ...] + ), + }, + labels={ + label_layer: Labels2DModel.parse( + data=img[label_layer].rename({"channels:0": "c"}).transpose("z", "c", "y", "x")[0, 0, ...] + ), + }, + tables={"table": TableModel.parse(adata)}, + ) + except ValueError as exc: + raise ValueError( + "Failed to create SpatialData object, " + "possibly due to dimensions in xarray not being named " + "('c', 'z', 'y', 'x') or them not being in this particular order" + ) from exc + + start = logg.info("Calculating features") + + res = quantify_morphology( + sdata, + label=label_layer, + image=image_layer, + ) + + if copy: + return res + + if return_results_as_new_adata: + return AnnData(res) + + _save_data(adata, attr="obsm", key=key_added, data=res, time=start) + + +def quantify_morphology( + sdata: SpatialData, + label: str, + image: str, + methods: list[str | Callable] | None = None, + split_by_channels: bool = True, + **kwargs: Any, +) -> pd.DataFrame | None: + extra_methods, methods = _validate_methods(methods) + + for element in [label, image]: + if element is not None and element not in sdata: + raise KeyError(f"Key `{element}` not found in `sdata`.") + + table_key = _get_table_key(sdata, label, kwargs) + + region_key = sdata[table_key].uns["spatialdata_attrs"]["region_key"] + if not np.any(sdata[table_key].obs[region_key] == label): + raise ValueError(f"Label '{label}' not found in region key ({region_key}) column of sdata table '{table_key}'") + + instance_key = sdata[table_key].uns["spatialdata_attrs"]["instance_key"] + + image_element, label_element = _apply_transformations(image, label, sdata) + + region_props = _get_region_props( + label_element, + image_element, + props=methods, + extra_methods=extra_methods, + ) + + if split_by_channels: + channels = image_element.c.values + for col in region_props.columns: + if isinstance(region_props[col].values[0], (int, float, np.integer, np.floating)): + continue # ignore single value returns + + is_processed = False + + # did the method return a list of values? + if isinstance(region_props[col].values[0], (list, tuple)): + is_processed = _extract_from_list_like(channels, col, is_processed, region_props) + + if isinstance(region_props[col].values[0], np.ndarray): + is_processed = _extract_from_ndarray(channels, col, is_processed, region_props) + if is_processed: + region_props.drop(columns=[col], inplace=True) + else: + raise NotImplementedError( + f"Result of morphology method '{col}' cannot be interpreted, " + f"as its dtype ({type(region_props[col].values[0])} and shape " + f"does not conform to the expected shapes and dtypes." + ) + + region_props.rename(columns={"label": instance_key}, inplace=True) + region_props[region_key] = label + region_props.set_index([region_key, instance_key], inplace=True) + + results = sdata[table_key].obs[[region_key, instance_key]] + results = results.join(region_props, how="left", on=[region_key, instance_key]) + + # region_props = region_props.set_index("label", drop=True) + # region_props.index.name = None + # region_props.index = region_props.index.astype(str) + + sdata[table_key].obsm["morphology"] = results + + return region_props + + +def _apply_transformations(image, label, sdata): + label_transform = sdata[label].transform + image_transform = sdata[image].transform + for transform in [label_transform, image_transform]: + if len(transform) != 1: + raise ValueError("More than one coordinate system detected") + coord_sys_label = next(iter(label_transform)) + coord_sys_image = next(iter(image_transform)) + if coord_sys_label != coord_sys_image: + raise ValueError(f"Coordinate system do not match! label: {coord_sys_label}, image: {coord_sys_image}") + # from here on we should be certain that we have a label + label_element = sdata.transform_element_to_coordinate_system(label, coord_sys_label) + image_element = sdata.transform_element_to_coordinate_system(image, coord_sys_image) if image is not None else None + return image_element, label_element + + +def _validate_methods(methods): + if methods is None: + # default case but without mutable argument as default value + # noinspection PyProtectedMember + methods = _measurements._all_regionprops_names() + elif isinstance(methods, (str, Callable)): + methods = [methods] + if not isinstance(methods, list): + raise ValueError("Argument `methods` must be a list of strings.") + if not all(isinstance(method, (str, Callable)) for method in methods): + raise ValueError("All elements in `methods` must be strings or callables.") + if "label" not in methods: + methods.append("label") + extra_methods = [] + for method in methods: + if callable(method): + extra_methods.append(method) + methods.remove(method) + return extra_methods, methods + + +def _extract_from_ndarray(channels, col, is_processed, region_props): + shape = region_props[col].values[0].shape + if not all(val.shape == shape for val in region_props[col].values): + raise ValueError(f"The results of the morphology method {col} have different shapes, this cannot be handled") + # Handle cases like centroids which return coordinates for each region + if len(shape) == 1 and shape[0] != len(channels): + for prop_idx in range(shape[0]): + region_props[f"{col}_{prop_idx}"] = [val[prop_idx] for val in region_props[col].values] + is_processed = True + # Handle cases like intensity which return one value per channel for each region + if len(shape) == 1 and shape[0] == len(channels): + for prop_idx in range(shape[0]): + if all(isinstance(val[prop_idx], dict) for val in region_props[col].values): + for key in region_props[col][0][prop_idx].keys(): + region_props[f"{col}_ch{prop_idx}_{key}"] = [ + float(val[prop_idx][key]) for val in region_props[col].values + ] + else: + region_props[f"{col}_ch{prop_idx}"] = [val[prop_idx] for val in region_props[col].values] + is_processed = True + # Handle cases like granularity which return many values for each channel for each region + if len(shape) == 2: + if not shape[1] == len(channels): + raise ValueError( + f"Number of channels {len(channels)} do not match " + f"the shape of the returned numpy arrays {shape} of the morphology method {col}. " + f"It is expected that shape[1] should be equal to number of channels." + ) + + for channel_idx, channel in enumerate(channels): + for prop_idx in range(shape[0]): + region_props[f"{col}_ch{channel_idx}_{prop_idx}"] = [ + val[prop_idx, channel_idx] for val in region_props[col].values + ] + + is_processed = True + return is_processed + + +def _extract_from_list_like(channels, col, is_processed, region_props): + # are all lists of the length of the channel list? + length = len(region_props[col].values[0]) + if not all(len(val) == length for val in region_props[col].values): + raise ValueError(f"The results of the morphology method {col} have different lengths, this cannot be handled") + if length == len(channels): + for i, channel in enumerate(channels): + region_props[f"{col}_ch{channel}"] = [val[i] for val in region_props[col].values] + is_processed = True + if length != len(channels): + for prop_idx in range(length): + region_props[f"{col}_{prop_idx}"] = [val[prop_idx] for val in region_props[col].values] + is_processed = True + return is_processed + + +def _get_table_key(sdata: sd.SpatialData, label: str, kwargs: dict[str, typing.Any]) -> str: + table_key = kwargs.get("table_key", None) + if table_key is None: + tables = sd.get_element_annotators(sdata, label) + if len(tables) > 1: + raise ValueError( + f"Multiple tables detected in `sdata` for {label}, " + f"please specify a specific table with the `table_key` parameter" + ) + if len(tables) == 0: + raise ValueError( + f"No tables automatically detected in `sdata` for {label}, " + f"please specify a specific table with the `table_key` parameter" + ) + table_key = next(iter(tables)) + return table_key + + +@squidpy._utils.deprecated def _calculate_image_features_helper( obs_ids: Sequence[str], adata: AnnData, @@ -116,7 +506,12 @@ def _calculate_image_features_helper( ) -> pd.DataFrame: features_list = [] for crop in img.generate_spot_crops( - adata, obs_names=obs_ids, library_id=library_id, return_obs=False, as_array=False, **kwargs + adata, + obs_names=obs_ids, + library_id=library_id, + return_obs=False, + as_array=False, + **kwargs, ): if TYPE_CHECKING: assert isinstance(crop, ImageContainer) @@ -135,6 +530,7 @@ def _calculate_image_features_helper( elif feature == ImageFeature.SUMMARY: res = crop.features_summary(layer=layer, **feature_kwargs) elif feature == ImageFeature.SEGMENTATION: + # TODO: Potential bug here, should be label_layer and intensity layer must be specified via feature_kwargs res = crop.features_segmentation(intensity_layer=layer, **feature_kwargs) elif feature == ImageFeature.CUSTOM: res = crop.features_custom(layer=layer, **feature_kwargs) diff --git a/src/squidpy/im/_measurements.py b/src/squidpy/im/_measurements.py new file mode 100644 index 000000000..1a7442900 --- /dev/null +++ b/src/squidpy/im/_measurements.py @@ -0,0 +1,779 @@ +from __future__ import annotations + +import typing +from typing import Protocol + +import centrosome.cpmorphology +import centrosome.propagate +import centrosome.zernike +import numpy as np +import scipy.ndimage +import skimage.morphology +from skimage.feature import graycomatrix, graycoprops +from skimage.util import img_as_ubyte + +from squidpy.im import ImageContainer + + +def _all_regionprops_names() -> list[str]: + names = [ + "area", + "area_bbox", + "area_convex", + "area_filled", + "axis_major_length", + "axis_minor_length", + "centroid", # TODO might drop centroids + "centroid_local", + "centroid_weighted_local", + "eccentricity", + "equivalent_diameter_area", + "euler_number", + "extent", + "feret_diameter_max", + # "inertia_tensor", + "inertia_tensor_eigvals", + "intensity_max", + "intensity_min", + "intensity_mean", + # "intensity_std", # TODO either bump version for skimage to >=0.23 for intensity_std to be included in regionprops or drop std + # "moments", # TODO evaluate if more moments necessary + "moments_hu", + # "moments_normalized", + "num_pixels", + "orientation", + "perimeter", + "perimeter_crofton", + "solidity", + "border_occupied_factor", + "granularity", + "zernike", + "radial_distribution", + "calculate_image_texture", + "calculate_histogram", + "calculate_quantiles", + ] + return names + + +# class CalcImageFeatureCallable(Protocol): +# def __call__(self, mask: np.ndarray, pixels: np.ndarray, **kwargs: dict[str, typing.Any]) -> dict: ... + + +def calculate_image_feature(feature: typing.Callable, mask: np.ndarray, pixels: np.ndarray, *args, **kwargs) -> dict: + result = {} + for label in np.unique(mask): + if label == 0: + continue + if _is_multichannel(pixels): + result[label] = {} + for channel in range(_get_n_channels(pixels)): + result[label][channel] = feature( + mask=mask, pixels=pixels[..., channel][mask[..., 0] == label], **kwargs + ) + else: + result[label] = feature(mask=mask, pixels=pixels[mask == label], **kwargs) + + return result + + +def calculate_image_texture(mask: np.ndarray, pixels: np.ndarray, **kwargs) -> dict: + distances = kwargs.get("image_texture_distances", (1,)) + angles = kwargs.get("image_texture_angles", (0, np.pi / 4, np.pi / 2, 3 * np.pi / 4)) + props = kwargs.get( + "image_texture_graycoprops", + ( + "contrast", + "dissimilarity", + "homogeneity", + "correlation", + "ASM", + ), + ) + if not np.issubdtype(pixels.dtype, np.uint8): + pixels = img_as_ubyte(pixels, force_copy=False) # values must be in [0, 255] + + features = {} + comatrix = graycomatrix(pixels, distances=distances, angles=angles, levels=256) + for prop in props: + tmp_features = graycoprops(comatrix, prop=prop) + for distance_idx, dist in enumerate(distances): + for angle_idx, a in enumerate(angles): + features[f"{prop}_dist-{dist}_angle-{a:.2f}"] = tmp_features[distance_idx, angle_idx] + return features + + +def calculate_histogram(mask: np.ndarray, pixels: np.ndarray, **kwargs) -> dict: + """ + This is way too slow of an implementation + + + Parameters + ---------- + mask + pixels + kwargs + + Returns + ------- + + """ + bins = kwargs.get("bins", 10) + v_range = kwargs.get("v_range", None) + # if v_range is None, use whole-image range + if v_range is None: + v_range = np.min(pixels), np.max(pixels) + hist, _ = np.histogram( + pixels, + bins=bins, + range=v_range, + weights=None, + density=False, + ) + result = {str(i): count for i, count in enumerate(hist)} + return result + + +def calculate_quantiles(mask: np.ndarray, pixels: np.ndarray, **kwargs) -> dict[str, float]: + quantiles = kwargs.get("quantiles", [0.1, 0.5, 0.9]) + result = {str(quantile): np.quantile(pixels, quantile) for quantile in quantiles} + return result + + +# def calculate_quantiles(mask: np.ndarray, pixels: np.ndarray, *args, **kwargs) -> dict: +# quantiles = kwargs.get("quantiles", [0.1, 0.5, 0.9]) +# +# result = {} +# for label in np.unique(mask): +# if label == 0: +# continue +# if _is_multichannel(pixels): +# result[label] = {} +# for channel in range(_get_n_channels(pixels)): +# # result[label][channel] = _get_quantile(label, mask, pixels[..., channel], quantiles) +# result[label][channel] = {quantile: np.quantile(pixels[..., channel][mask[..., 0] == label], quantile) for quantile in quantiles} +# # current_pixels = pixels[mask == label] +# # result = {} +# # for quantile in quantiles: +# # result[quantile] = np.quantile(current_pixels, quantile) +# # return result +# else: +# result[label] = {quantile: np.quantile(pixels[mask == label], quantile) for quantile in quantiles} +# # result[label] = _get_quantile(label, mask, pixels, quantiles) + + +def _get_n_channels(img: np.ndarray | ImageContainer) -> int: + """ + Returns the number of channels in the image. + Right now, ImageContainer's layer organization is assumed meaning + (y, x, z, channels) + + Parameters + ---------- + img : np.ndarray or ImageContainer The image to test + + Returns + ------- + The number of channels in the image + """ + if len(img.shape) == 4: + return img.shape[3] + else: + raise NotImplementedError(f"Unexpected image shape (got {img.shape}, but expected (y, x, z, channels))") + + +def _is_multichannel(img: np.ndarray) -> bool: + """ + Determines if the image is multichannel, i.e. if it has more than 1 intensity layer. + Right now, ImageContainer's layer organization is assumed meaning + (y, x, z, channels) + + Parameters + ---------- + img : np.ndarray The image to test + + Returns + ------- + True if img has more than one channel + + """ + return _get_n_channels(img) > 1 + + +def border_occupied_factor(mask: np.ndarray, *args, **kwargs) -> dict[int, float]: + """ + Calculates the percentage of border pixels that are in a 4-connected neighborhood of another label + Takes ~1.7s/megapixels to calculate + + Parameters + ---------- + mask: np.ndarray integer grayscale image with the labels + args None + kwargs None + + Returns + ------- + Dict of percentages for each label key + + """ + n_border_pixels = {} + n_border_occupied_pixels = {} + + adjacent_indices = [ + [-1, 0], + [0, -1], + [1, 0], + [0, 1], + ] + + for coordinates, pixel in np.ndenumerate(mask): + if pixel == 0: + continue + is_border = False + is_occupied = False + + for adjacent_index in adjacent_indices: + try: + adjacent_pixel = mask[coordinates[0] + adjacent_index[0], coordinates[1] + adjacent_index[1]] + except IndexError: + # At image border, don't count image borders + continue + if adjacent_pixel == pixel: + continue + + is_border = True + if adjacent_pixel != 0: + is_occupied = True + + if is_border: + try: + n_border_pixels[pixel] += 1 + except KeyError: + n_border_pixels[pixel] = 1 + n_border_occupied_pixels[pixel] = 0 + if is_occupied: + n_border_occupied_pixels[pixel] += 1 + + result = {key: n_border_occupied_pixels[key] / n_border_pixels[key] for key in n_border_pixels.keys()} + + return result + + +# Copied from https://github.com/afermg/cp_measurements/blob/main/src/cp_measure/minimal/measuregranularity.py +__doc__ = """\ +MeasureGranularity +================== +**MeasureGranularity** outputs spectra of size measurements of the +textures in the image. + +Image granularity is a texture measurement that tries to fit a series of +structure elements of increasing size into the texture of the image and outputs a spectrum of measures +based on how well they fit. +Granularity is measured as described by Ilya Ravkin (references below). + +Basically, MeasureGranularity: +1 - Downsamples the image (if you tell it to). This is set in +**Subsampling factor for granularity measurements** or **Subsampling factor for background reduction**. +2 - Background subtracts anything larger than the radius in pixels set in +**Radius of structuring element.** +3 - For as many times as you set in **Range of the granular spectrum**, it gets rid of bright areas +that are only 1 pixel across, reports how much signal was lost by doing that, then repeats. +i.e. The first time it removes one pixel from all bright areas in the image, +(effectively deleting those that are only 1 pixel in size) and then reports what % of the signal was lost. +It then takes the first-iteration image and repeats the removal and reporting (effectively reporting +the amount of signal that is two pixels in size). etc. + +|MeasureGranularity_example| + +As of **CellProfiler 4.0** the settings for this module have been changed to simplify +configuration. A single set of parameters is now applied to all images and objects within the module, +rather than each image needing individual configuration. +Pipelines from older versions will be converted to match this format. If multiple sets of parameters +were defined CellProfiler will apply the first set from the older pipeline version. +Specifying multiple sets of parameters can still be achieved by running multiple copies of this module. + + +| + +============ ============ =============== +Supports 2D? Supports 3D? Respects masks? +============ ============ =============== +YES YES YES +============ ============ =============== + +Measurements made by this module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- *Granularity:* The module returns one measurement for each instance + of the granularity spectrum set in **Range of the granular spectrum**. + +References +^^^^^^^^^^ + +- Serra J. (1989) *Image Analysis and Mathematical Morphology*, Vol. 1. + Academic Press, London +- Maragos P. “Pattern spectrum and multiscale shape representation”, + *IEEE Transactions on Pattern Analysis and Machine Intelligence*, 11, + N 7, pp. 701-716, 1989 +- Vincent L. (2000) “Granulometries and Opening Trees”, *Fundamenta + Informaticae*, 41, No. 1-2, pp. 57-90, IOS Press, 2000. +- Vincent L. (1992) “Morphological Area Opening and Closing for + Grayscale Images”, *Proc. NATO Shape in Picture Workshop*, + Driebergen, The Netherlands, pp. 197-208. +- Ravkin I, Temov V. (1988) “Bit representation techniques and image + processing”, *Applied Informatics*, v.14, pp. 41-90, Finances and + Statistics, Moskow, (in Russian) +""" + + +def granularity( + mask: np.ndarray, + pixels: np.ndarray, + subsample_size: float = 0.25, + image_sample_size: float = 0.25, + element_size: int = 10, + granular_spectrum_length: int = 16, # default 16 +): + """ + Parameters + ---------- + subsample_size : float, optional + Subsampling factor for granularity measurements. + If the textures of interest are larger than a few pixels, we recommend + you subsample the image with a factor <1 to speed up the processing. + Downsampling the image will let you detect larger structures with a + smaller sized structure element. A factor >1 might increase the accuracy + but also require more processing time. Images are typically of higher + resolution than is required for granularity measurements, so the default + value is 0.25. For low-resolution images, increase the subsampling + fraction; for high-resolution images, decrease the subsampling fraction. + Subsampling by 1/4 reduces computation time by (1/4) :sup:`3` because the + size of the image is (1/4) :sup:`2` of original and the range of granular + spectrum can be 1/4 of original. Moreover, the results are sometimes + actually a little better with subsampling, which is probably because + with subsampling the individual granular spectrum components can be used + as features, whereas without subsampling a feature should be a sum of + several adjacent granular spectrum components. The recommendation on the + numerical value cannot be determined in advance; an analysis as in this + reference may be required before running the whole set. See this `pdf`_, + slides 27-31, 49-50. + + .. _pdf: http://www.ravkin.net/presentations/Statistical%20properties%20of%20algorithms%20for%20analysis%20of%20cell%20images.pdf" + + image_sample_size : float, optional + Subsampling factor for background reduction. + It is important to remove low frequency image background variations as + they will affect the final granularity measurement. Any method can be + used as a pre-processing step prior to this module; we have chosen to + simply subtract a highly open image. To do it quickly, we subsample the + image first. The subsampling factor for background reduction is usually + [0.125 – 0.25]. This is highly empirical, but a small factor should be + used if the structures of interest are large. The significance of + background removal in the context of granulometry is that image volume + at certain granular size is normalized by total image volume, which + depends on how the background was removed. + + element_size : int, optional + Radius of structuring element. + This radius should correspond to the radius of the textures of interest + *after* subsampling; i.e., if textures in the original image scale have + a radius of 40 pixels, and a subsampling factor of 0.25 is used, the + structuring element size should be 10 or slightly smaller, and the range + of the spectrum defined below will cover more sizes. + + granular_spectrum_length : int, optional + Range of the granular spectrum. + You may need a trial run to see which granular + spectrum range yields informative measurements. Start by using a wide + spectrum and narrow it down to the informative range to save time. + + Returns + ------- + None + + """ + # + # Downsample the image and mask + # + new_shape = np.array(pixels.shape) + if subsample_size < 1: + new_shape = new_shape * subsample_size + if pixels.ndim == 2: + i, j = np.mgrid[0 : new_shape[0], 0 : new_shape[1]].astype(float) / subsample_size + pixels = scipy.ndimage.map_coordinates(pixels, (i, j), order=1) + mask = scipy.ndimage.map_coordinates(mask.astype(float), (i, j)) > 0.9 + else: + k, i, j = np.mgrid[0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2]].astype(float) / subsample_size + pixels = scipy.ndimage.map_coordinates(pixels, (k, i, j), order=1) + mask = scipy.ndimage.map_coordinates(mask.astype(float), (k, i, j)) > 0.9 + else: + pixels = pixels.copy() + mask = mask.copy() + # + # Remove background pixels using a greyscale tophat filter + # + if image_sample_size < 1: + back_shape = new_shape * image_sample_size + if pixels.ndim == 2: + i, j = np.mgrid[0 : back_shape[0], 0 : back_shape[1]].astype(float) / image_sample_size + back_pixels = scipy.ndimage.map_coordinates(pixels, (i, j), order=1) + back_mask = scipy.ndimage.map_coordinates(mask.astype(float), (i, j)) > 0.9 + else: + k, i, j = np.mgrid[0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2]].astype(float) / subsample_size + back_pixels = scipy.ndimage.map_coordinates(pixels, (k, i, j), order=1) + back_mask = scipy.ndimage.map_coordinates(mask.astype(float), (k, i, j)) > 0.9 + else: + back_pixels = pixels + back_mask = mask + back_shape = new_shape + radius = element_size + if pixels.ndim == 2: + footprint = skimage.morphology.disk(radius, dtype=bool) + else: + footprint = skimage.morphology.ball(radius, dtype=bool) + back_pixels_mask = np.zeros_like(back_pixels) + back_pixels_mask[back_mask == True] = back_pixels[back_mask == True] + back_pixels = skimage.morphology.erosion(back_pixels_mask, footprint=footprint) + back_pixels_mask = np.zeros_like(back_pixels) + back_pixels_mask[back_mask == True] = back_pixels[back_mask == True] + back_pixels = skimage.morphology.dilation(back_pixels_mask, footprint=footprint) + try: + if image_sample_size < 1: + if pixels.ndim == 2: + i, j = np.mgrid[0 : new_shape[0], 0 : new_shape[1]].astype(float) + # + # Make sure the mapping only references the index range of + # back_pixels. + # + i *= float(back_shape[0] - 1) / float(new_shape[0] - 1) + j *= float(back_shape[1] - 1) / float(new_shape[1] - 1) + back_pixels = scipy.ndimage.map_coordinates(back_pixels, (i, j), order=1) + else: + k, i, j = np.mgrid[0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2]].astype(float) + k *= float(back_shape[0] - 1) / float(new_shape[0] - 1) + i *= float(back_shape[1] - 1) / float(new_shape[1] - 1) + j *= float(back_shape[2] - 1) / float(new_shape[2] - 1) + back_pixels = scipy.ndimage.map_coordinates(back_pixels, (k, i, j), order=1) + # TODO Debug the reason for the ZeroDivisionError when using MIBI-TOF dataeset + # from https://spatialdata.scverse.org/en/latest/tutorials/notebooks/notebooks/examples/technology_mibitof.html + except ZeroDivisionError: + return _handle_granularity_error(granular_spectrum_length) + pixels -= back_pixels + pixels[pixels < 0] = 0 + + # Transcribed from the Matlab module: granspectr function + # + # CALCULATES GRANULAR SPECTRUM, ALSO KNOWN AS SIZE DISTRIBUTION, + # GRANULOMETRY, AND PATTERN SPECTRUM, SEE REF.: + # J.Serra, Image Analysis and Mathematical Morphology, Vol. 1. Academic Press, London, 1989 + # Maragos,P. "Pattern spectrum and multiscale shape representation", IEEE Transactions on Pattern Analysis and Machine Intelligence, 11, N 7, pp. 701-716, 1989 + # L.Vincent "Granulometries and Opening Trees", Fundamenta Informaticae, 41, No. 1-2, pp. 57-90, IOS Press, 2000. + # L.Vincent "Morphological Area Opening and Closing for Grayscale Images", Proc. NATO Shape in Picture Workshop, Driebergen, The Netherlands, pp. 197-208, 1992. + # I.Ravkin, V.Temov "Bit representation techniques and image processing", Applied Informatics, v.14, pp. 41-90, Finances and Statistics, Moskow, 1988 (in Russian) + # THIS IMPLEMENTATION INSTEAD OF OPENING USES EROSION FOLLOWED BY RECONSTRUCTION + # + ng = granular_spectrum_length + startmean = np.mean(pixels[mask]) + ero = pixels.copy() + # Mask the test image so that masked pixels will have no effect + # during reconstruction + # + ero[~mask] = 0 + currentmean = startmean + startmean = max(startmean, np.finfo(float).eps) + + if pixels.ndim == 2: + footprint = skimage.morphology.disk(1, dtype=bool) + else: + footprint = skimage.morphology.ball(1, dtype=bool) + results = [] + for i in range(1, ng + 1): + prevmean = currentmean + ero_mask = np.zeros_like(ero) + ero_mask[mask == True] = ero[mask == True] + ero = skimage.morphology.erosion(ero_mask, footprint=footprint) + rec = skimage.morphology.reconstruction(ero, pixels, footprint=footprint) + currentmean = np.mean(rec[mask]) + gs = (prevmean - currentmean) * 100 / startmean + + # TODO find better solution for the return + # results[f"Granularity_{str(i)}"] = gs + results.append(gs) + # Restore the reconstructed image to the shape of the + # original image so we can match against object labels + # + orig_shape = pixels.shape + try: + if pixels.ndim == 2: + i, j = np.mgrid[0 : orig_shape[0], 0 : orig_shape[1]].astype(float) + # + # Make sure the mapping only references the index range of + # back_pixels. + # + i *= float(new_shape[0] - 1) / float(orig_shape[0] - 1) + j *= float(new_shape[1] - 1) / float(orig_shape[1] - 1) + rec = scipy.ndimage.map_coordinates(rec, (i, j), order=1) + else: + k, i, j = np.mgrid[0 : orig_shape[0], 0 : orig_shape[1], 0 : orig_shape[2]].astype(float) + k *= float(new_shape[0] - 1) / float(orig_shape[0] - 1) + i *= float(new_shape[1] - 1) / float(orig_shape[1] - 1) + j *= float(new_shape[2] - 1) / float(orig_shape[2] - 1) + rec = scipy.ndimage.map_coordinates(rec, (k, i, j), order=1) + + # TODO Debug the reason for the ZeroDivisionError when using MIBI-TOF dataeset + # from https://spatialdata.scverse.org/en/latest/tutorials/notebooks/notebooks/examples/technology_mibitof.html + except ZeroDivisionError: + return _handle_granularity_error(granular_spectrum_length) + + # TODO check if this is necessary + # Calculate the means for the objects + # + # for object_record in object_records: + # assert isinstance(object_record, ObjectRecord) + # if object_record.nobjects > 0: + # new_mean = fix( + # scipy.ndimage.mean( + # rec, object_record.labels, object_record.range + # ) + # ) + # gss = ( + # (object_record.current_mean - new_mean) + # * 100 + # / object_record.start_mean + # ) + # object_record.current_mean = new_mean + # else: + # gss = np.zeros((0,)) + # measurements.add_measurement(object_record.name, feature, gss) + + if len(results) == 1: + return results[0] + else: + return results + + +def _handle_granularity_error(granular_spectrum_length: int): + return [None for _ in range(granular_spectrum_length)] + + +# Inspired by https://github.com/afermg/cp_measure/blob/main/src/cp_measure/fast/measureobjectsizeshape.py +def zernike(masks: np.ndarray, pixels: np.ndarray, zernike_numbers: int = 9) -> dict[int, np.ndarray]: + unique_indices = np.unique(masks) + unique_indices = unique_indices[unique_indices != 0] + res = centrosome.zernike.zernike( + zernike_indexes=centrosome.zernike.get_zernike_indexes(zernike_numbers + 1), + labels=masks, + indexes=unique_indices, + ) + return {orig_idx: res[res_idx] for orig_idx, res_idx in zip(unique_indices, range(res.shape[0]))} + + +# Copied from https://github.com/afermg/cp_measure/blob/main/src/cp_measure/fast/measureobjectintensitydistribution.py +def radial_distribution( + labels: np.ndarray, + pixels: np.ndarray, + scaled: bool = True, + bin_count: int = 4, + maximum_radius: int = 100, +): + """ + zernike_degree : int + Maximum zernike moment. + + This is the maximum radial moment that will be calculated. There are + increasing numbers of azimuthal moments as you increase the radial + moment, so higher values are increasingly expensive to calculate. + + scaled : bool + Scale the bins? + + When True divide the object radially into the number of bins + that you specify. Otherwise create the number of bins you specify + based on distance. If True, it will use a maximum distance so + that each object will have the same measurements (which might be zero + for small objects) and so that the measurements can be taken without + knowing the maximum object radius before the run starts. + + bin_count : int + Number of bins + + Specify the number of bins that you want to use to measure the distribution. + Radial distribution is measured with respect to a series of concentric + rings starting from the object center (or more generally, between contours + at a normalized distance from the object center). This number specifies + the number of rings into which the distribution is to be divided. + + maximum_radius : int + Maximum radius + + Specify the maximum radius for the unscaled bins. The unscaled binning method + creates the number of bins that you specify and creates equally spaced bin + boundaries up to the maximum radius. Parts of the object that are beyond this + radius will be counted in an overflow bin. The radius is measured in pixels. + """ + + if labels.dtype == bool: + labels = labels.astype(np.integer) + + M_CATEGORY = "RadialDistribution" + F_FRAC_AT_D = "FracAtD" + F_MEAN_FRAC = "MeanFrac" + F_RADIAL_CV = "RadialCV" + + FF_SCALE = "%dof%d" + FF_OVERFLOW = "Overflow" + FF_GENERIC = FF_SCALE + + MF_FRAC_AT_D = "_".join((M_CATEGORY, F_FRAC_AT_D, FF_GENERIC)) + MF_MEAN_FRAC = "_".join((M_CATEGORY, F_MEAN_FRAC, FF_GENERIC)) + MF_RADIAL_CV = "_".join((M_CATEGORY, F_RADIAL_CV, FF_GENERIC)) + OF_FRAC_AT_D = "_".join((M_CATEGORY, F_FRAC_AT_D, FF_OVERFLOW)) + OF_MEAN_FRAC = "_".join((M_CATEGORY, F_MEAN_FRAC, FF_OVERFLOW)) + OF_RADIAL_CV = "_".join((M_CATEGORY, F_RADIAL_CV, FF_OVERFLOW)) + + unique_labels = np.unique(labels) + n_objects = len(unique_labels[unique_labels > 0]) + d_to_edge = centrosome.cpmorphology.distance_to_edge(labels) + + # Find the point in each object farthest away from the edge. + # This does better than the centroid: + # * The center is within the object + # * The center tends to be an interesting point, like the + # center of the nucleus or the center of one or the other + # of two touching cells. + # + # MODIFICATION: Delegated label indices to maximum_position_of_labels + # This should not affect this one-mask/object function + i, j = centrosome.cpmorphology.maximum_position_of_labels( + # d_to_edge, labels, indices=[1] + d_to_edge, + labels, + indices=[1], + ) + + center_labels = np.zeros(labels.shape, int) + + center_labels[i, j] = labels[i, j] + + # + # Use the coloring trick here to process touching objects + # in separate operations + # + colors = centrosome.cpmorphology.color_labels(labels) + + n_colors = np.max(colors) + + d_from_center = np.zeros(labels.shape) + + cl = np.zeros(labels.shape, int) + + for color in range(1, n_colors + 1): + mask = colors == color + l, d = centrosome.propagate.propagate(np.zeros(center_labels.shape), center_labels, mask, 1) + + d_from_center[mask] = d[mask] + + cl[mask] = l[mask] + + good_mask = cl > 0 + + i_center = np.zeros(cl.shape) + + i_center[good_mask] = i[cl[good_mask] - 1] + + j_center = np.zeros(cl.shape) + + j_center[good_mask] = j[cl[good_mask] - 1] + + normalized_distance = np.zeros(labels.shape) + + if scaled: + total_distance = d_from_center + d_to_edge + + normalized_distance[good_mask] = d_from_center[good_mask] / (total_distance[good_mask] + 0.001) + else: + normalized_distance[good_mask] = d_from_center[good_mask] / maximum_radius + + n_good_pixels = np.sum(good_mask) + + good_labels = labels[good_mask] + + bin_indexes = (normalized_distance * bin_count).astype(int) + + bin_indexes[bin_indexes > bin_count] = bin_count + + labels_and_bins = (good_labels - 1, bin_indexes[good_mask]) + + histogram = scipy.sparse.coo_matrix((pixels[good_mask], labels_and_bins), (n_objects, bin_count + 1)).toarray() + + sum_by_object = np.sum(histogram, 1) + + sum_by_object_per_bin = np.dstack([sum_by_object] * (bin_count + 1))[0] + + fraction_at_distance = histogram / sum_by_object_per_bin + + number_at_distance = scipy.sparse.coo_matrix( + (np.ones(n_good_pixels), labels_and_bins), (n_objects, bin_count + 1) + ).toarray() + + sum_by_object = np.sum(number_at_distance, 1) + + sum_by_object_per_bin = np.dstack([sum_by_object] * (bin_count + 1))[0] + + fraction_at_bin = number_at_distance / sum_by_object_per_bin + + mean_pixel_fraction = fraction_at_distance / (fraction_at_bin + np.finfo(float).eps) + + # Anisotropy calculation. Split each cell into eight wedges, then + # compute coefficient of variation of the wedges' mean intensities + # in each ring. + # + # Compute each pixel's delta from the center object's centroid + i, j = np.mgrid[0 : labels.shape[0], 0 : labels.shape[1]] + + i_mask = i[good_mask] > i_center[good_mask] + + j_mask = j[good_mask] > j_center[good_mask] + + abs_mask = abs(i[good_mask] - i_center[good_mask]) > abs(j[good_mask] - j_center[good_mask]) + + radial_index = i_mask.astype(int) + j_mask.astype(int) * 2 + abs_mask.astype(int) * 4 + + results = {} + + for bin_idx in range(bin_count + (0 if scaled else 1)): + bin_mask = good_mask & (bin_indexes == bin_idx) + + bin_pixels = np.sum(bin_mask) + + bin_labels = labels[bin_mask] + + bin_radial_index = radial_index[bin_indexes[good_mask] == bin_idx] + + labels_and_radii = (bin_labels - 1, bin_radial_index) + + radial_values = scipy.sparse.coo_matrix((pixels[bin_mask], labels_and_radii), (n_objects, 8)).toarray() + + pixel_count = scipy.sparse.coo_matrix((np.ones(bin_pixels), labels_and_radii), (n_objects, 8)).toarray() + + mask = pixel_count == 0 + + radial_means = np.ma.masked_array(radial_values / pixel_count, mask) + + radial_cv = np.std(radial_means, 1) / np.mean(radial_means, 1) + + radial_cv[np.sum(~mask, 1) == 0] = 0 + + for measurement, feature, overflow_feature in ( + (fraction_at_distance[:, bin_idx], MF_FRAC_AT_D, OF_FRAC_AT_D), + (mean_pixel_fraction[:, bin_idx], MF_MEAN_FRAC, OF_MEAN_FRAC), + (np.array(radial_cv), MF_RADIAL_CV, OF_RADIAL_CV), + ): + if bin_idx == bin_count: + measurement_name = overflow_feature + else: + measurement_name = feature % (bin_idx + 1, bin_count) + + results[measurement_name] = measurement + + return results diff --git a/tests/image/test_morphology.py b/tests/image/test_morphology.py new file mode 100644 index 000000000..08c0332e2 --- /dev/null +++ b/tests/image/test_morphology.py @@ -0,0 +1,534 @@ +from __future__ import annotations + +import time +import typing +from pyexpat import features + +import numpy as np +import numpy.testing as npt +import pandas as pd +import pytest +import skimage.measure +import spatialdata as sd +from anndata import AnnData +from skimage.measure import regionprops +from spatialdata import SpatialData +from spatialdata.datasets import blobs, raccoon + +import squidpy as sq + +# noinspection PyProtectedMember +from squidpy.im import ImageContainer, _measurements, quantify_morphology + +# noinspection PyProtectedMember +from squidpy.im._feature import ( + _get_region_props, + _get_table_key, + calculate_image_features, + sdata_image_features_helper, +) + +# noinspection PyProtectedMember +from squidpy.im._measurements import ( + border_occupied_factor, + calculate_histogram, + calculate_image_feature, + calculate_image_texture, + calculate_quantiles, +) + + +@pytest.fixture +def sdata_racoon() -> SpatialData: + return raccoon() + + +@pytest.fixture +def sdata_blobs() -> SpatialData: + return blobs() + + +@pytest.fixture +def morphology_methods() -> list[str]: + return ["label", "area", "eccentricity", "circularity", "granularity", "border_occupied_factor"] + + +@pytest.mark.xfail( + raises=AssertionError, reason="calling skimage.measure.label on segmentation produces additional labels" +) +class TestSkimageBackend: + def test_label_bug_present(self, sdata_blobs): + npt.assert_array_equal( + np.unique(skimage.measure.label(sdata_blobs["blobs_labels"])), + np.unique(sdata_blobs["blobs_labels"]), + ) + + +def dummy_callable(): + pass + + +@pytest.fixture +def malformed_morphology_methods() -> dict[str, typing.Any]: + methods = { + "wrong_container": [("label", "area"), "label,area"], + "wrong_method_type": [["test", dummy_callable, 42.42], ["test", dummy_callable, 42]], + } + return methods + + +class TestMorphology: + # @pytest.mark.parametrize( + # "sdata,methods", + # pytest.param( + # sdata_blobs, ("label", "area"), id="tuple" + # ), + # pytest.param( + # sdata_blobs, "label,area", id="string" + # ), + # ) + def test_sanity_method_list(self, sdata_blobs, malformed_morphology_methods): + with pytest.raises(ValueError, match="Argument `methods` must be a list of strings."): + for methods in malformed_morphology_methods["wrong_container"]: + sq.im.quantify_morphology(sdata=sdata_blobs, label="blobs_labels", image="blobs_image", methods=methods) + + # @pytest.mark.parametrize( + # "sdata,methods", + # pytest.param(sdata_blobs, ["test", dummy_callable, 42.42], id="float"), + # pytest.param(sdata_blobs, ["test", dummy_callable, 42], id="int"), + # ) + def test_sanity_method_list_types(self, sdata_blobs, malformed_morphology_methods): + with pytest.raises(ValueError, match="All elements in `methods` must be strings or callables."): + for methods in malformed_morphology_methods["wrong_method_type"]: + sq.im.quantify_morphology(sdata=sdata_blobs, label="blobs_labels", image="blobs_image", methods=methods) + + def test_get_table_key_no_annotators(self, sdata_blobs): + label = "blobs_multiscale_labels" + with pytest.raises(ValueError, match=f"No tables automatically detected in `sdata` for {label}"): + _get_table_key(sdata=sdata_blobs, label=label, kwargs={}) + + def test_get_table_key_multiple_annotators(self, sdata_blobs): + sdata_blobs.tables["multi_table"] = sd.deepcopy(sdata_blobs["table"]) + label = "blobs_labels" + with pytest.raises(ValueError, match=f"Multiple tables detected in `sdata` for {label}"): + _get_table_key(sdata=sdata_blobs, label=label, kwargs={}) + + def test_quantify_morphology_granularity(self, sdata_blobs): + granular_spectrum_length = 16 + sq.im.quantify_morphology( + sdata=sdata_blobs, + label="blobs_labels", + image="blobs_image", + methods=["label", "granularity"], + split_by_channels=True, + ) + + columns = sdata_blobs["table"].obsm["morphology"].columns + for channel in range(3): + for granular_spectrum in range(granular_spectrum_length): + assert f"granularity_ch{channel}_{granular_spectrum}" in columns + + def test_quantify_morphology_border_occupied_factor(self, sdata_blobs): + sq.im.quantify_morphology( + sdata=sdata_blobs, + label="blobs_labels", + image="blobs_image", + methods=["label", "border_occupied_factor"], + split_by_channels=True, + ) + assert "border_occupied_factor" in sdata_blobs["table"].obsm["morphology"].columns + + def test__get_region_props(self, sdata_blobs): + region_props = _get_region_props( + label_element=sdata_blobs["blobs_labels"], + image_element=sdata_blobs["blobs_image"], + props=["label", "area", "eccentricity"], + ) + assert len(region_props) == len(sdata_blobs["table"].obs) + + def test_quantify_morphology_callables(self, sdata_blobs, morphology_methods): + sq.im.quantify_morphology( + sdata=sdata_blobs, + label="blobs_labels", + image="blobs_image", + methods=morphology_methods, + split_by_channels=True, + ) + + assert "morphology" in sdata_blobs["table"].obsm.keys() + assert isinstance(sdata_blobs["table"].obsm["morphology"], pd.DataFrame) + assert "circularity" in sdata_blobs["table"].obsm["morphology"].columns + assert "border_occupied_factor" in sdata_blobs["table"].obsm["morphology"].columns + + def test_quantify_morphology_all(self, sdata_blobs): + sq.im.quantify_morphology( + sdata=sdata_blobs, + label="blobs_labels", + image="blobs_image", + split_by_channels=True, + ) + + for name in _measurements._all_regionprops_names(): + assert any([column.startswith(name) for column in sdata_blobs["table"].obsm["morphology"].columns]) + # for column in sdata_blobs["table"].obsm["morphology"].columns: + # assert any(column.startswith(name) for name in _measurements._all_regionprops_names()) + + @pytest.mark.xfail( + raises=ValueError, + reason="For the moment, there is no association " "between the multiscale labels and a table in blobs.", + ) + def test_quantify_morphology_multiscale(self, sdata_blobs, morphology_methods): + sq.im.quantify_morphology( + sdata=sdata_blobs, + label="blobs_multiscale_labels", + image="blobs_multiscale_image", + methods=morphology_methods, + split_by_channels=True, + ) + + def test_quantify_morphology_cp_measure(self, sdata_blobs): + sq.im.quantify_morphology( + sdata=sdata_blobs, + label="blobs_labels", + image="blobs_image", + split_by_channels=True, + methods=["label", "radial_distribution"], + ) + + print(sdata_blobs["table"].obsm["morphology"].columns) + + +# TODO Remove for release +@pytest.fixture +def sdata_merfish() -> SpatialData: + zarr_path = "./data/merfish.zarr" + return sd.read_zarr(zarr_path) + + +# TODO Remove for release +@pytest.fixture +def sdata_mibitof() -> SpatialData: + zarr_path = "./data/mibitof.zarr" + return sd.read_zarr(zarr_path) + + +class TestMorphologyPerformance: + def test_performance(self, sdata_mibitof, morphology_methods): + start_time = time.perf_counter() + sq.im.quantify_morphology( + sdata=sdata_mibitof, + label="point8_labels", + image="point8_image", + methods=None, + split_by_channels=True, + ) + end_time = time.perf_counter() + assert end_time - start_time < 60 + + +class TestMeasurements: + def test_border_occupied_factor(self): + label_image = np.array( + [ + [0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 0, 0, 0, 0], + [0, 1, 1, 1, 2, 2, 2, 0], + [0, 1, 1, 1, 2, 2, 2, 0], + [0, 0, 3, 3, 2, 2, 2, 0], + [0, 0, 3, 3, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0], + ] + ) + + expected = {1: 3 / 8, 2: 3 / 8, 3: 2 / 4} + actual = border_occupied_factor(label_image) + assert actual == expected + + def test_radial_distribution(self, sdata_mibitof): + pixels = np.random.randint(100, size=64**2).reshape((64, 64)) + mask = np.zeros_like(pixels, dtype=bool) + mask[2:-3, 2:-3] = True + + result = _measurements.radial_distribution( + labels=pixels, + pixels=mask, + ) + + # result = _measurements.radial_distribution( + # labels=np.array(sdata_mibitof["point8_labels"]), + # pixels=np.array(sdata_mibitof["point8_image"]), + # ) + + print(result) + + def test_cp_measure(self, sdata_blobs): + from cp_measure.bulk import get_fast_measurements + + measurements = get_fast_measurements() + + size = 200 + rng = np.random.default_rng(42) + pixels = rng.integers(low=0, high=10, size=(size, size)) + + masks = np.zeros_like(pixels) + masks[5:-6, 5:-6] = 1 + + results = {} + for measurement_name, measurement in measurements.items(): + results[measurement_name] = measurement(masks, pixels) + + print(results) + + +@pytest.fixture() +def blobs_as_image_container(sdata_blobs: SpatialData) -> ImageContainer: + img_layer_name = "blobs_image" + seg_layer_name = "blobs_labels" + img = ImageContainer(sdata_blobs[img_layer_name].to_numpy(), layer=img_layer_name) + img.add_img(sdata_blobs[seg_layer_name].to_numpy(), layer=seg_layer_name) + + return img + + +@pytest.fixture() +def blobs_as_adata(sdata_blobs: SpatialData) -> AnnData: + s_adata = sdata_blobs.tables["table"] + # print(s_adata.uns) + return s_adata + + +@pytest.fixture() +def mibitof_as_image_container(sdata_mibitof: SpatialData) -> ImageContainer: + img_layer_name = "point8_image" + seg_layer_name = "point8_labels" + img = ImageContainer(sdata_mibitof[img_layer_name].to_numpy(), layer=img_layer_name) + img.add_img(sdata_mibitof[seg_layer_name].to_numpy(), layer=seg_layer_name) + return img + + +@pytest.fixture() +def mibitof_as_adata(sdata_mibitof: SpatialData) -> AnnData: + s_adata = sdata_mibitof.tables["table"] + s_adata.uns["spatial"] = {"point8_labels": {"scalefactors": {"spot_diameter_fullres": 7.0}}} + return s_adata + + +@pytest.fixture() +def visium_adata() -> AnnData: + return sq.datasets.visium_fluo_adata_crop() + + +@pytest.fixture() +def visium_img() -> ImageContainer: + img = sq.datasets.visium_fluo_image_crop() + sq.im.segment( + img=img, + layer="image", + layer_added="segmented_watershed", + method="watershed", + channel=0, + ) + return img + + +@pytest.fixture() +def calc_im_features_kwargs() -> dict[str, typing.Any]: + kwargs = { + # "adata": visium_adata, + # "img": visium_img, + # "features": features, + "layer": "image", + # "library_id": "point8_labels", + "key_added": "segmentation_features", + "features_kwargs": { + "segmentation": { + "label_layer": "segmented_watershed", + "props": ["label", "area", "mean_intensity"], + "channels": [1, 2], + } + }, + "copy": True, + "mask_circle": True, + } + return kwargs + + +class TestMorphologyImageFeaturesCompatibility: + def test_quantiles( + self, calc_im_features_kwargs: dict[str, typing.Any], visium_adata: AnnData, visium_img: ImageContainer + ): + calc_im_features_kwargs.update( + { + "features": ["summary"], + "adata": visium_adata, + "img": visium_img, + } + ) + + # expected = calculate_image_features( + # **calc_im_features_kwargs + # ) + actual = calculate_quantiles( + mask=visium_img["segmented_watershed"].to_numpy(), pixels=visium_img["image"].to_numpy() + ) + print(actual) + + def test_calculate_image_features(self, visium_adata: AnnData, visium_img: ImageContainer): + actual = calculate_image_feature( + # feature=calculate_histogram, + feature=calculate_image_texture, + mask=visium_img["segmented_watershed"].to_numpy(), + pixels=visium_img["image"].to_numpy(), + ) + print(actual) + + def test_calculate_image_features_performance(self, visium_adata: AnnData, visium_img: ImageContainer): + start_time = time.perf_counter() + props = regionprops( + label_image=visium_img["segmented_watershed"].to_numpy()[:, :, 0, 0], + intensity_image=visium_img["image"].to_numpy()[:, :, 0, 0], + extra_properties=calculate_histogram, + ) + actual = {prop.label: prop.calculate_histogram for prop in props} + end_time = time.perf_counter() + # calc_im_features_result = calculate_image_feature( + # feature=calculate_histogram, + # mask=visium_img["segmented_watershed"].to_numpy(), + # pixels=visium_img["image"].to_numpy() + # ) + # end_time = time.perf_counter() + # assert end_time - start_time < 60 + + print(end_time - start_time) + + def test_im_features_morphology_equivalence( + self, + # blobs_as_adata: AnnData, blobs_as_image_container: ImageContainer, + # mibitof_as_adata: AnnData, mibitof_as_image_container: ImageContainer, + calc_im_features_kwargs: dict[str, typing.Any], + visium_adata: AnnData, + visium_img: ImageContainer, + # adata: AnnData + ): + # print(adata.uns.spatial) + # expected = calculate_image_features( + # adata=mibitof_as_adata, + # img=mibitof_as_image_container, + # layer="point8_labels", + # library_id="point8_labels", + # features=features, + # key_added="foo", + # copy=True, + # features_kwargs={"segmentation": {"label_layer": "point8_labels", "intensity_layer": "point8_image"}}, + # ) + + calc_im_features_kwargs.update( + { + "features": ["texture", "summary", "histogram", "segmentation"], + "adata": visium_adata, + "img": visium_img, + } + ) + + expected = calculate_image_features(**calc_im_features_kwargs) + + actual = sdata_image_features_helper(**calc_im_features_kwargs) + # morphology = quantify_morphology() + + pd.testing.assert_frame_equal(actual, expected) + + +class TestImageContainerSDataCompatibility: + @pytest.mark.xfail( + raises=AssertionError, + reason="There cannot exist two xarray datasets in one ImageContainer " + "with the same 'channel' name, while SpatialData enforces that dimensions " + "must be named ('c', 'z', 'y', 'x')", + ) + def test_image_container_enforcing_unique_channel_naming( + self, calc_im_features_kwargs: dict[str, typing.Any], visium_img: ImageContainer + ): + from spatialdata.models._utils import _validate_dims + + label_layer = calc_im_features_kwargs["features_kwargs"]["segmentation"]["label_layer"] + image_layer = calc_im_features_kwargs["layer"] + + visium_img[label_layer] = visium_img[label_layer].rename({"channels:0": "c"}) + visium_img[image_layer] = visium_img[image_layer].rename({"channels": "c"}) + + assert visium_img[label_layer].dims[-1] == "c" + assert visium_img[label_layer].dims[-1] == visium_img[image_layer].dims[-1] + # "WARNING: Channel dimension cannot be aligned with an existing one, using `c_0`" + # "c_0" + + +class TestSdataHelper: + def test_sdata_helper_copy( + self, + calc_im_features_kwargs: dict[str, typing.Any], + visium_adata: AnnData, + visium_img: ImageContainer, + ): + calc_im_features_kwargs.update( + { + "features": ["texture", "summary", "histogram", "segmentation"], + "adata": visium_adata, + "img": visium_img, + } + ) + + # label_layer = calc_im_features_kwargs["features_kwargs"]["segmentation"]["label_layer"] + # image_layer = calc_im_features_kwargs["layer"] + # + # visium_img[label_layer] = visium_img[label_layer].rename({"channels:0": "c"}) + # visium_img[label_layer] = visium_img[label_layer].transpose("c", "z", "y", "x") + # + # visium_img[image_layer] = visium_img[image_layer].rename({"channels": "c"}) + # visium_img[image_layer] = visium_img[image_layer].transpose("c", "z", "y", "x") + + actual = sdata_image_features_helper(**calc_im_features_kwargs) + assert isinstance(actual, pd.DataFrame) + for name in _measurements._all_regionprops_names(): + assert any([column.startswith(name) for column in actual.columns]) + + def test_sdata_helper_obsm( + self, + calc_im_features_kwargs: dict[str, typing.Any], + visium_adata: AnnData, + visium_img: ImageContainer, + ): + calc_im_features_kwargs.update( + { + "features": ["texture", "summary", "histogram", "segmentation"], + "adata": visium_adata, + "img": visium_img, + "copy": False, + } + ) + + actual = sdata_image_features_helper(**calc_im_features_kwargs) + assert actual is None + for name in _measurements._all_regionprops_names(): + assert any( + [column.startswith(name) for column in visium_adata.obsm[calc_im_features_kwargs["key_added"]].columns] + ) + + def test_sdata_helper_adata( + self, + calc_im_features_kwargs: dict[str, typing.Any], + visium_adata: AnnData, + visium_img: ImageContainer, + ): + calc_im_features_kwargs.update( + { + "features": ["texture", "summary", "histogram", "segmentation"], + "adata": visium_adata, + "img": visium_img, + "copy": False, + } + ) + + actual = sdata_image_features_helper(**calc_im_features_kwargs) + assert isinstance(actual, AnnData) + for name in _measurements._all_regionprops_names(): + assert any([column.startswith(name) for column in actual.var_names])