From 6ce91510d2749906cdc9fed4174e078871c1ea46 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Fri, 28 Mar 2025 15:39:37 +0100 Subject: [PATCH 01/15] mvp --- pyproject.toml | 2 + src/squidpy/__init__.py | 2 +- src/squidpy/exp/__init__.py | 11 ++ src/squidpy/exp/_feature.py | 224 ++++++++++++++++++++++++++++++++++++ 4 files changed, 238 insertions(+), 1 deletion(-) create mode 100644 src/squidpy/exp/__init__.py create mode 100644 src/squidpy/exp/_feature.py diff --git a/pyproject.toml b/pyproject.toml index 1143cab94..cd4e7a665 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,8 @@ dependencies = [ "xarray>=2024.10.0", "zarr>=2.6.1,<3.0.0", "spatialdata>=0.2.5", + "centrosome>=1.2.3", + "cp-measure>=0.1.4" ] [project.optional-dependencies] diff --git a/src/squidpy/__init__.py b/src/squidpy/__init__.py index 5fb2b848a..cfba804fd 100644 --- a/src/squidpy/__init__.py +++ b/src/squidpy/__init__.py @@ -3,7 +3,7 @@ from importlib import metadata from importlib.metadata import PackageMetadata -from squidpy import datasets, gr, im, pl, read, tl +from squidpy import datasets, exp, gr, im, pl, read, tl try: md: PackageMetadata = metadata.metadata(__name__) diff --git a/src/squidpy/exp/__init__.py b/src/squidpy/exp/__init__.py new file mode 100644 index 000000000..2dc08d915 --- /dev/null +++ b/src/squidpy/exp/__init__.py @@ -0,0 +1,11 @@ +"""Experimental module for Squidpy. + +This module contains experimental features that are still under development. +These features may change or be removed in future releases. +""" + +from __future__ import annotations + +from squidpy.exp._feature import calculate_image_features + +__all__ = ["calculate_image_features"] diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py new file mode 100644 index 000000000..42fca34a6 --- /dev/null +++ b/src/squidpy/exp/_feature.py @@ -0,0 +1,224 @@ +"""Experimental feature extraction module.""" + +from __future__ import annotations + +from collections.abc import Sequence +from typing import Any +import warnings +import anndata as ad +import numpy as np +import pandas as pd +from cp_measure.bulk import get_core_measurements +from spatialdata._logging import logger as logg +from spatialdata import SpatialData + +from squidpy._constants._constants import ImageFeature +from squidpy._docs import d, inject_docs +from squidpy._utils import Signal, _get_n_cores, parallelize +from spatialdata.models import TableModel +__all__ = ["calculate_image_features"] + + +@d.dedent +@inject_docs(f=ImageFeature) +def calculate_image_features( + sdata: SpatialData, + labels_key: str, + image_key: str, + adata_key_added: str = "morphology", + invalid_as_zero: bool = True, + n_jobs: int | None = None, + backend: str = "loky", + show_progress_bar: bool = True, +) -> pd.DataFrame | None: + """ + Calculate features from segmentation masks using CellProfiler measurements. + + This function uses the `cp_measure` package to extract features from + segmentation masks. It supports both basic shape features and + intensity-based features if an intensity image is provided. + + Parameters + ---------- + sdata + The spatial data object containing the segmentation masks. + labels_key + Key in :attr:`spatialdata.SpatialData.labels` containing the + segmentation masks. + image_key + Key in :attr:`spatialdata.SpatialData.images` containing the + intensity image. + adata_key_added + Key to store the AnnData object in the SpatialData object. + %(parallelize)s + + Returns + ------- + A :class:`pandas.DataFrame` with the calculated features. If the image has + multiple channels, features are calculated for each channel separately and + channel names are appended to the feature names. + + Notes + ----- + This is an experimental feature that requires the `cp_measure` package + to be installed. + """ + # Get the image and labels + image = np.asarray(sdata.images[image_key].compute()) + labels = np.asarray(sdata.labels[labels_key].compute()) + + # Check if labels are empty + if labels.size == 0: + raise ValueError("Labels array is empty") + + max_label = int(labels.max()) + if max_label == 0: + raise ValueError("No cells found in labels (max label is 0)") + + # Get channel names if available + channel_names = None + if ( + hasattr(sdata.images[image_key], "coords") + and "c" in sdata.images[image_key].coords + ): + channel_names = sdata.images[image_key].coords["c"].values + + # Handle image dimensions + if image.ndim == 2: + image = image[None, :, :] # Add channel dimension + elif image.ndim != 3: + raise ValueError(f"Expected 2D or 3D image, got shape {image.shape}") + + # Check if image and labels have matching dimensions + if image.shape[1:] != labels.shape: + raise ValueError( + f"Image and labels have mismatched dimensions: " + f"image {image.shape[1:]}, labels {labels.shape}" + ) + + # Get core measurements from cp_measure + measurements = get_core_measurements() + + # Process each channel + all_features = [] + n_channels = image.shape[0] + n_jobs = _get_n_cores(n_jobs) + + for ch_idx in range(n_channels): + ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" + + logg.info( + f"Calculating features for channel '{ch_idx}' " f"using '{n_jobs}' core(s)" + ) + + ch_image = image[ch_idx] + + # Get cell IDs + cell_ids = range(1, max_label + 1) + + # Parallelize feature calculation + res = parallelize( + _calculate_image_features_helper, + collection=cell_ids, + extractor=pd.concat, + n_jobs=n_jobs, + backend=backend, + show_progress_bar=show_progress_bar, + )(labels, ch_image, measurements, ch_name) + + all_features.append(res) + + # Create AnnData object from results + combined_features = pd.concat(all_features, axis=1) + if invalid_as_zero: + combined_features = combined_features.replace([np.inf, -np.inf], 0) + combined_features = combined_features.fillna(0) + adata = ad.AnnData(X=combined_features) + adata.obs_names = [f"cell_{i}" for i in range(1, max_label + 1)] + adata.var_names = combined_features.columns + + adata.uns["spatialdata_attrs"] = { + "region": labels_key, + "region_key": "region", + "instance_key": "label_id", + } + adata.obs["region"] = pd.Categorical([labels_key] * len(adata)) + adata.obs["label_id"] = range(1, max_label + 1) + # adata.obs[["region", "spot_id"]] + + # Add the AnnData object to the SpatialData object + sdata.tables[adata_key_added] = TableModel.parse(adata) + + # Combine features from all channels + # return pd.concat(all_features, axis=1) + + +def _calculate_image_features_helper( + cell_ids: Sequence[int], + labels: np.ndarray, + image: np.ndarray, + measurements: dict[str, Any], + channel_name: str | None = None, + queue: Any | None = None, +) -> pd.DataFrame: + """Helper function to calculate features for a subset of cells.""" + features_list = [] + for cell_id in cell_ids: + # Get cell mask + cell_mask = (labels == cell_id).astype(np.uint8) + + # Find bounding box of the cell + y_indices, x_indices = np.where(cell_mask) + if len(y_indices) == 0: # Skip empty cells + continue + + y_min, y_max = y_indices.min(), y_indices.max() + x_min, x_max = x_indices.min(), x_indices.max() + + # Add padding to ensure we capture the full cell + pad = 5 + y_min = max(0, y_min - pad) + y_max = min(labels.shape[0], y_max + pad) + x_min = max(0, x_min - pad) + x_max = min(labels.shape[1], x_max + pad) + + # Crop both mask and image to the bounding box + cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max] + image_cropped = image[y_min:y_max, x_min:x_max] + + cell_features = {} + # Calculate all available features + for name, func in measurements.items(): + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + feature_dict = func(cell_mask_cropped, image_cropped) + # Convert numpy arrays to scalars + feature_dict = { + k: ( + float(v[0]) + if isinstance(v, np.ndarray) and v.size == 1 + else v + ) + for k, v in feature_dict.items() + } + # Append channel name to feature names + feature_dict = { + f"{k}_ch{channel_name}": v for k, v in feature_dict.items() + } + cell_features.update(feature_dict) + except Exception as e: + logg.warning( + f"Failed to calculate {name} features for cell {cell_id}: " + f"{str(e)}" + ) + + features_list.append(cell_features) + + if queue is not None: + queue.put(Signal.UPDATE) + + if queue is not None: + queue.put(Signal.FINISH) + + return pd.DataFrame(features_list, index=cell_ids) From 3a82ac8e207c37a4225fca51b6555dac78fd6fe5 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Fri, 28 Mar 2025 17:46:33 +0100 Subject: [PATCH 02/15] performance improvement --- src/squidpy/exp/_feature.py | 351 ++++++++++++++++++++++++++---------- 1 file changed, 255 insertions(+), 96 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 42fca34a6..5f7d7b0b3 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -3,33 +3,215 @@ from __future__ import annotations from collections.abc import Sequence -from typing import Any +from typing import Any, Callable import warnings import anndata as ad import numpy as np import pandas as pd -from cp_measure.bulk import get_core_measurements +from cp_measure.bulk import get_core_measurements, get_correlation_measurements from spatialdata._logging import logger as logg from spatialdata import SpatialData +import xarray as xr from squidpy._constants._constants import ImageFeature from squidpy._docs import d, inject_docs from squidpy._utils import Signal, _get_n_cores, parallelize from spatialdata.models import TableModel + __all__ = ["calculate_image_features"] +def _measurement_wrapper( + func: Callable, + mask: np.ndarray, + image1: np.ndarray, + image2: np.ndarray | None = None, +) -> dict[str, Any]: + """Wrapper function to handle both core and correlation measurements. + + Parameters + ---------- + func + The measurement function to call + mask + The cell mask + image1 + First image (or only image for core measurements) + image2 + Second image for correlation measurements. If None, this is a core + measurement. + + Returns + ------- + Dictionary of feature values + """ + return func(mask, image1) if image2 is None else func(image1, image2, mask) + + +def _calculate_features_helper( + cell_ids: Sequence[int], + labels: np.ndarray, + image1: np.ndarray, + image2: np.ndarray | None, + measurements: dict[str, Any], + channel1_name: str | None = None, + channel2_name: str | None = None, + queue: Any | None = None, + verbose: bool = False, +) -> pd.DataFrame: + """Helper function to calculate features for a subset of cells.""" + features_dict = {} + + # Pre-allocate arrays for type conversion + uint8_features = [ + "radial_distribution", + "radial_zernikes", + "intensity", + "sizeshape", + "zernike", + "ferret", + ] + float_features = ["manders_fold", "rwc"] + + # Pre-compute image normalization if needed + if "texture" in measurements: + img1_min = image1.min() + img1_max = image1.max() + img1_range = img1_max - img1_min + 1e-10 + if image2 is not None: + img2_min = image2.min() + img2_max = image2.max() + img2_range = img2_max - img2_min + 1e-10 + + for cell_id in cell_ids: + # Get cell mask and find bounding box in one step + cell_mask = labels == cell_id + y_indices, x_indices = np.where(cell_mask) + if len(y_indices) == 0: # Skip empty cells + continue + + # Get bounding box with padding + y_min, y_max = y_indices.min(), y_indices.max() + x_min, x_max = x_indices.min(), x_indices.max() + pad = 5 + y_min = max(0, y_min - pad) + y_max = min(labels.shape[0], y_max + pad) + x_min = max(0, x_min - pad) + x_max = min(labels.shape[1], x_max + pad) + + # Crop all arrays at once + cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max] + image1_cropped = image1[y_min:y_max, x_min:x_max] + image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max] + + # Quick shape check + if cell_mask_cropped.shape != image1_cropped.shape or ( + image2_cropped is not None + and cell_mask_cropped.shape != image2_cropped.shape + ): + if verbose: + logg.warning( + f"Shape mismatch for cell {cell_id}: " + f"mask {cell_mask_cropped.shape}, " + f"image1 {image1_cropped.shape}, " + f"image2 {image2_cropped.shape if image2_cropped is not None else 'None'}" + ) + continue + + cell_features = {} + # Calculate all available features + for name, func in measurements.items(): + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + + # Pre-convert inputs based on feature type + if name in uint8_features: + mask = cell_mask_cropped.astype(np.uint8) + img1 = image1_cropped.astype(np.uint8) + img2 = ( + None + if image2_cropped is None + else image2_cropped.astype(np.uint8) + ) + elif name == "texture": + mask = cell_mask_cropped.astype(np.uint8) + img1 = ( + image1_cropped.astype(np.float32) - img1_min + ) / img1_range + img2 = ( + None + if image2_cropped is None + else (image2_cropped.astype(np.float32) - img2_min) + / img2_range + ) + elif name in float_features: + mask = cell_mask_cropped.astype(np.float32) + img1 = image1_cropped.astype(np.float32) + img2 = ( + None + if image2_cropped is None + else image2_cropped.astype(np.float32) + ) + else: + mask = cell_mask_cropped.astype(np.float32) + img1 = image1_cropped.astype(np.float32) + img2 = ( + None + if image2_cropped is None + else image2_cropped.astype(np.float32) + ) + + feature_dict = _measurement_wrapper(func, mask, img1, img2) + + for k, v in feature_dict.items(): + if len(v) > 1: + raise ValueError(f"Feature {k} has more than one value.") + else: + feature_dict[k] = float(v[0]) + + # Append channel names efficiently + if image2 is None: + feature_dict = { + f"{k}_ch{channel1_name}": v for k, v in feature_dict.items() + } + else: + feature_dict = { + f"{k}_ch{channel1_name}_ch{channel2_name}": v + for k, v in feature_dict.items() + } + + cell_features.update(feature_dict) + except Exception as e: + if verbose: + logg.warning( + f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}" + ) + + features_dict[cell_id] = cell_features + + if queue is not None: + queue.put(Signal.UPDATE) + + if queue is not None: + queue.put(Signal.FINISH) + + return pd.DataFrame.from_dict(features_dict, orient="index") + + @d.dedent @inject_docs(f=ImageFeature) def calculate_image_features( sdata: SpatialData, labels_key: str, image_key: str, + scale: str | None = None, adata_key_added: str = "morphology", invalid_as_zero: bool = True, n_jobs: int | None = None, backend: str = "loky", show_progress_bar: bool = True, + verbose: bool = False, ) -> pd.DataFrame | None: """ Calculate features from segmentation masks using CellProfiler measurements. @@ -63,9 +245,23 @@ def calculate_image_features( This is an experimental feature that requires the `cp_measure` package to be installed. """ - # Get the image and labels - image = np.asarray(sdata.images[image_key].compute()) - labels = np.asarray(sdata.labels[labels_key].compute()) + + if ( + (isinstance(sdata.images[image_key], xr.DataTree) + or isinstance(sdata.labels[labels_key], xr.DataTree)) + and scale is None + ): + raise ValueError("When using multi-scale data, please specify the scale.") + + if scale is not None and not isinstance(scale, str): + raise ValueError("Scale must be a string.") + + if scale is not None: + image = np.asarray(sdata.images[image_key][scale].image.compute()) + labels = np.asarray(sdata.labels[labels_key][scale].image.compute()) + else: + image = np.asarray(sdata.images[image_key].compute()) + labels = np.asarray(sdata.labels[labels_key].compute()) # Check if labels are empty if labels.size == 0: @@ -97,128 +293,91 @@ def calculate_image_features( ) # Get core measurements from cp_measure - measurements = get_core_measurements() + measurements_core = get_core_measurements() + measurements_corr = get_correlation_measurements() + + # Get unique cell IDs from labels, excluding background (0) + cell_ids = np.unique(labels) + cell_ids = cell_ids[cell_ids != 0] # Process each channel all_features = [] n_channels = image.shape[0] n_jobs = _get_n_cores(n_jobs) - for ch_idx in range(n_channels): - ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" + logg.info(f"Using '{n_jobs}' core(s).") - logg.info( - f"Calculating features for channel '{ch_idx}' " f"using '{n_jobs}' core(s)" - ) + # First process core measurements for each channel + for ch_idx in range(n_channels): + logg.info(f"Calculating core features for channel '{ch_idx}'.") + ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" ch_image = image[ch_idx] - # Get cell IDs - cell_ids = range(1, max_label + 1) - - # Parallelize feature calculation res = parallelize( - _calculate_image_features_helper, + _calculate_features_helper, collection=cell_ids, extractor=pd.concat, n_jobs=n_jobs, backend=backend, show_progress_bar=show_progress_bar, - )(labels, ch_image, measurements, ch_name) + verbose=verbose, + )(labels, ch_image, None, measurements_core, ch_name) all_features.append(res) + # Then process correlation measurements between channels + for ch1_idx in range(n_channels): + for ch2_idx in range(ch1_idx + 1, n_channels): + ch1_name = ( + channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}" + ) + ch2_name = ( + channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}" + ) + + logg.info( + f"Calculating correlation features between channels " + f"'{ch1_name}' and '{ch2_name}'." + ) + + ch1_image = image[ch1_idx] + ch2_image = image[ch2_idx] + + # Parallelize feature calculation + res = parallelize( + _calculate_features_helper, + collection=cell_ids, + extractor=pd.concat, + n_jobs=n_jobs, + backend=backend, + show_progress_bar=show_progress_bar, + verbose=verbose, + )(labels, ch1_image, ch2_image, measurements_corr, ch1_name, ch2_name) + + all_features.append(res) + # Create AnnData object from results combined_features = pd.concat(all_features, axis=1) if invalid_as_zero: combined_features = combined_features.replace([np.inf, -np.inf], 0) combined_features = combined_features.fillna(0) + + # Ensure cell IDs are preserved in the correct order + cell_ids = sorted(combined_features.index) + combined_features = combined_features.loc[cell_ids] + adata = ad.AnnData(X=combined_features) - adata.obs_names = [f"cell_{i}" for i in range(1, max_label + 1)] + adata.obs_names = [f"cell_{i}" for i in cell_ids] adata.var_names = combined_features.columns adata.uns["spatialdata_attrs"] = { "region": labels_key, - "region_key": "region", - "instance_key": "label_id", + "region_key": "region", + "instance_key": "label_id", } adata.obs["region"] = pd.Categorical([labels_key] * len(adata)) - adata.obs["label_id"] = range(1, max_label + 1) - # adata.obs[["region", "spot_id"]] + adata.obs["label_id"] = cell_ids # Add the AnnData object to the SpatialData object sdata.tables[adata_key_added] = TableModel.parse(adata) - - # Combine features from all channels - # return pd.concat(all_features, axis=1) - - -def _calculate_image_features_helper( - cell_ids: Sequence[int], - labels: np.ndarray, - image: np.ndarray, - measurements: dict[str, Any], - channel_name: str | None = None, - queue: Any | None = None, -) -> pd.DataFrame: - """Helper function to calculate features for a subset of cells.""" - features_list = [] - for cell_id in cell_ids: - # Get cell mask - cell_mask = (labels == cell_id).astype(np.uint8) - - # Find bounding box of the cell - y_indices, x_indices = np.where(cell_mask) - if len(y_indices) == 0: # Skip empty cells - continue - - y_min, y_max = y_indices.min(), y_indices.max() - x_min, x_max = x_indices.min(), x_indices.max() - - # Add padding to ensure we capture the full cell - pad = 5 - y_min = max(0, y_min - pad) - y_max = min(labels.shape[0], y_max + pad) - x_min = max(0, x_min - pad) - x_max = min(labels.shape[1], x_max + pad) - - # Crop both mask and image to the bounding box - cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max] - image_cropped = image[y_min:y_max, x_min:x_max] - - cell_features = {} - # Calculate all available features - for name, func in measurements.items(): - try: - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - feature_dict = func(cell_mask_cropped, image_cropped) - # Convert numpy arrays to scalars - feature_dict = { - k: ( - float(v[0]) - if isinstance(v, np.ndarray) and v.size == 1 - else v - ) - for k, v in feature_dict.items() - } - # Append channel name to feature names - feature_dict = { - f"{k}_ch{channel_name}": v for k, v in feature_dict.items() - } - cell_features.update(feature_dict) - except Exception as e: - logg.warning( - f"Failed to calculate {name} features for cell {cell_id}: " - f"{str(e)}" - ) - - features_list.append(cell_features) - - if queue is not None: - queue.put(Signal.UPDATE) - - if queue is not None: - queue.put(Signal.FINISH) - - return pd.DataFrame(features_list, index=cell_ids) From f734634ea86167fdeb093190ca18c28f38e1cf35 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 28 Mar 2025 16:47:29 +0000 Subject: [PATCH 03/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/exp/_feature.py | 83 ++++++++++--------------------------- 1 file changed, 23 insertions(+), 60 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 5f7d7b0b3..cf7adbea3 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -2,21 +2,22 @@ from __future__ import annotations -from collections.abc import Sequence -from typing import Any, Callable import warnings +from collections.abc import Callable, Sequence +from typing import Any + import anndata as ad import numpy as np import pandas as pd +import xarray as xr from cp_measure.bulk import get_core_measurements, get_correlation_measurements -from spatialdata._logging import logger as logg from spatialdata import SpatialData -import xarray as xr +from spatialdata._logging import logger as logg +from spatialdata.models import TableModel from squidpy._constants._constants import ImageFeature from squidpy._docs import d, inject_docs from squidpy._utils import Signal, _get_n_cores, parallelize -from spatialdata.models import TableModel __all__ = ["calculate_image_features"] @@ -106,8 +107,7 @@ def _calculate_features_helper( # Quick shape check if cell_mask_cropped.shape != image1_cropped.shape or ( - image2_cropped is not None - and cell_mask_cropped.shape != image2_cropped.shape + image2_cropped is not None and cell_mask_cropped.shape != image2_cropped.shape ): if verbose: logg.warning( @@ -129,38 +129,23 @@ def _calculate_features_helper( if name in uint8_features: mask = cell_mask_cropped.astype(np.uint8) img1 = image1_cropped.astype(np.uint8) - img2 = ( - None - if image2_cropped is None - else image2_cropped.astype(np.uint8) - ) + img2 = None if image2_cropped is None else image2_cropped.astype(np.uint8) elif name == "texture": mask = cell_mask_cropped.astype(np.uint8) - img1 = ( - image1_cropped.astype(np.float32) - img1_min - ) / img1_range + img1 = (image1_cropped.astype(np.float32) - img1_min) / img1_range img2 = ( None if image2_cropped is None - else (image2_cropped.astype(np.float32) - img2_min) - / img2_range + else (image2_cropped.astype(np.float32) - img2_min) / img2_range ) elif name in float_features: mask = cell_mask_cropped.astype(np.float32) img1 = image1_cropped.astype(np.float32) - img2 = ( - None - if image2_cropped is None - else image2_cropped.astype(np.float32) - ) + img2 = None if image2_cropped is None else image2_cropped.astype(np.float32) else: mask = cell_mask_cropped.astype(np.float32) img1 = image1_cropped.astype(np.float32) - img2 = ( - None - if image2_cropped is None - else image2_cropped.astype(np.float32) - ) + img2 = None if image2_cropped is None else image2_cropped.astype(np.float32) feature_dict = _measurement_wrapper(func, mask, img1, img2) @@ -172,21 +157,14 @@ def _calculate_features_helper( # Append channel names efficiently if image2 is None: - feature_dict = { - f"{k}_ch{channel1_name}": v for k, v in feature_dict.items() - } + feature_dict = {f"{k}_ch{channel1_name}": v for k, v in feature_dict.items()} else: - feature_dict = { - f"{k}_ch{channel1_name}_ch{channel2_name}": v - for k, v in feature_dict.items() - } + feature_dict = {f"{k}_ch{channel1_name}_ch{channel2_name}": v for k, v in feature_dict.items()} cell_features.update(feature_dict) except Exception as e: if verbose: - logg.warning( - f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}" - ) + logg.warning(f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}") features_dict[cell_id] = cell_features @@ -247,10 +225,8 @@ def calculate_image_features( """ if ( - (isinstance(sdata.images[image_key], xr.DataTree) - or isinstance(sdata.labels[labels_key], xr.DataTree)) - and scale is None - ): + isinstance(sdata.images[image_key], xr.DataTree) or isinstance(sdata.labels[labels_key], xr.DataTree) + ) and scale is None: raise ValueError("When using multi-scale data, please specify the scale.") if scale is not None and not isinstance(scale, str): @@ -273,10 +249,7 @@ def calculate_image_features( # Get channel names if available channel_names = None - if ( - hasattr(sdata.images[image_key], "coords") - and "c" in sdata.images[image_key].coords - ): + if hasattr(sdata.images[image_key], "coords") and "c" in sdata.images[image_key].coords: channel_names = sdata.images[image_key].coords["c"].values # Handle image dimensions @@ -287,10 +260,7 @@ def calculate_image_features( # Check if image and labels have matching dimensions if image.shape[1:] != labels.shape: - raise ValueError( - f"Image and labels have mismatched dimensions: " - f"image {image.shape[1:]}, labels {labels.shape}" - ) + raise ValueError(f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}") # Get core measurements from cp_measure measurements_core = get_core_measurements() @@ -329,17 +299,10 @@ def calculate_image_features( # Then process correlation measurements between channels for ch1_idx in range(n_channels): for ch2_idx in range(ch1_idx + 1, n_channels): - ch1_name = ( - channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}" - ) - ch2_name = ( - channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}" - ) - - logg.info( - f"Calculating correlation features between channels " - f"'{ch1_name}' and '{ch2_name}'." - ) + ch1_name = channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}" + ch2_name = channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}" + + logg.info(f"Calculating correlation features between channels '{ch1_name}' and '{ch2_name}'.") ch1_image = image[ch1_idx] ch2_image = image[ch2_idx] From 9fb24b407f8680cb743535c7467f0410ccde55ec Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Fri, 28 Mar 2025 22:38:35 +0100 Subject: [PATCH 04/15] added skimage features --- src/squidpy/exp/_feature.py | 457 ++++++++++++++++++++++++++++++------ 1 file changed, 384 insertions(+), 73 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 5f7d7b0b3..5ef09b20a 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -2,25 +2,199 @@ from __future__ import annotations -from collections.abc import Sequence -from typing import Any, Callable import warnings +from collections.abc import Callable, Sequence +from typing import Any + import anndata as ad +import itertools import numpy as np import pandas as pd +import xarray as xr from cp_measure.bulk import get_core_measurements, get_correlation_measurements -from spatialdata._logging import logger as logg +from scipy import ndimage +from skimage import measure from spatialdata import SpatialData -import xarray as xr +from spatialdata._logging import logger as logg +from spatialdata.models import TableModel +from skimage.measure import label from squidpy._constants._constants import ImageFeature from squidpy._docs import d, inject_docs from squidpy._utils import Signal, _get_n_cores, parallelize -from spatialdata.models import TableModel __all__ = ["calculate_image_features"] +def _get_regionprops_features( + cell_ids: Sequence[int], + labels: np.ndarray, + intensity_image: np.ndarray | None = None, + queue: Any | None = None, +) -> dict[str, float]: + """Calculate regionprops features for a cell. + + Parameters + ---------- + cell_id + The ID of the cell to process + labels + The labels array containing cell masks + intensity_image + Optional intensity image for intensity-based features + queue + Optional queue for progress tracking. If provided, will send update signals. + + Returns + ------- + Dictionary of regionprops features + """ + # Define channel-independent properties (only need mask) + mask_props = { + "area", + "area_filled", + "area_convex", + "num_pixels", + "axis_major_length", + "axis_minor_length", + "eccentricity", + "equivalent_diameter", + "extent", + "feret_diameter_max", + "solidity", + "euler_number", + "centroid", + "centroid_local", + "perimeter", + "perimeter_crofton", + "inertia_tensor", + "inertia_tensor_eigvals", + } + + # Define channel-dependent properties (need intensity image) + intensity_props = { + "intensity_max", + "intensity_mean", + "intensity_min", + "intensity_std", + } + + features = {} + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + + # labels only (channel independent) + if intensity_image is None: + for cell_id in cell_ids: + cell_mask_cropped, _, _ = _get_cell_crops( + cell_id=cell_id, + labels=labels, + ) + + if cell_mask_cropped is None: + continue + + region_prop = measure.regionprops(label_image=label(cell_mask_cropped)) + + if not region_prop: + continue + + cell_features = {} + + # Calculate regionprops features while ignoring warnings + for prop in mask_props: + try: + value = getattr(region_prop, prop) + + # Handle array-like properties + if isinstance(value, (np.ndarray, list, tuple)): + value = np.array(value) + if value.ndim == 1: + for i, v in enumerate(value): + cell_features[f"{prop}_{i}"] = float(v) + elif value.ndim == 2: + for i, j in itertools.product( + range(value.shape[0]), range(value.shape[1]) + ): + cell_features[f"{prop}_{i}x{j}"] = float( + value[i, j] + ) + else: + cell_features[prop] = value + else: + cell_features[prop] = float(value) + except Exception: + continue + + if queue is not None: + queue.put(Signal.UPDATE) + + features[cell_id] = cell_features + + # Calculate intensity-dependent properties if intensity image is provided + else: + for cell_id in cell_ids: + cell_mask_cropped, intensity_image_cropped, _ = _get_cell_crops( + cell_id=cell_id, + labels=labels, + image1=intensity_image, + ) + + if cell_mask_cropped is None: + continue + + intensity_props_obj = measure.regionprops( + label_image=label(cell_mask_cropped), + intensity_image=intensity_image_cropped, + ) + + if not intensity_props_obj: + continue + + cell_features = {} + + for prop in intensity_props: + try: + value = getattr(intensity_props_obj, prop) + + # Skip callable properties + if callable(value): + continue + + # Handle array properties + if isinstance(value, (np.ndarray, list, tuple)): + value = np.array(value) + + if value.ndim == 1: + for i, v in enumerate(value): + cell_features[f"{prop}_{i}"] = float(v) + elif value.ndim == 2: + for i, j in itertools.product( + range(value.shape[0]), range(value.shape[1]) + ): + cell_features[f"{prop}_{i}x{j}"] = float( + value[i, j] + ) + else: + cell_features[prop] = value + else: + cell_features[prop] = float(value) + + except Exception: + continue + + if queue is not None: + queue.put(Signal.UPDATE) + + features[cell_id] = cell_features + + if queue is not None: + queue.put(Signal.FINISH) + + return pd.DataFrame.from_dict(features, orient="index") + + def _measurement_wrapper( func: Callable, mask: np.ndarray, @@ -48,6 +222,79 @@ def _measurement_wrapper( return func(mask, image1) if image2 is None else func(image1, image2, mask) +def _get_cell_crops( + cell_id: int, + labels: np.ndarray, + image1: np.ndarray | None = None, + image2: np.ndarray | None = None, + pad: int = 1, + verbose: bool = False, +) -> tuple[np.ndarray, np.ndarray, np.ndarray | None] | None: + """Generator function to get cropped arrays for a cell. + + Parameters + ---------- + cell_id + The ID of the cell to process + labels + The labels array containing cell masks + image1 + First image to crop + image2 + Optional second image to crop + pad + Amount of padding to add around the cell + verbose + Whether to print warning messages + + Returns + ------- + Tuple of (cell_mask_cropped, image1_cropped, image2_cropped) or None if cell is empty + """ + # Get cell mask and find bounding box in one step + cell_mask = labels == cell_id + y_indices, x_indices = np.where(cell_mask) + if len(y_indices) == 0: # Skip empty cells + return None + + # Get bounding box + y_min, y_max = y_indices.min(), y_indices.max() + x_min, x_max = x_indices.min(), x_indices.max() + + # Get image dimensions + height, width = labels.shape + + # Calculate desired padding + y_pad_min = min(pad, y_min) # How much we can pad to the top + y_pad_max = min(pad, height - y_max - 1) # How much we can pad to the bottom + x_pad_min = min(pad, x_min) # How much we can pad to the left + x_pad_max = min(pad, width - x_max - 1) # How much we can pad to the right + + # Apply symmetric padding where possible + y_min -= y_pad_min + y_max += y_pad_max + x_min -= x_pad_min + x_max += x_pad_max + + # Warn if cell is at border and padding is asymmetric + if verbose and ( + y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad + ): + logg.warning( + f"Cell {cell_id} is at image border. Padding is asymmetric: " + f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, " + f"x: {x_pad_min}/{pad} left, {x_pad_max}/{pad} right" + ) + + # Crop all arrays at once + cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max] + + image1_cropped = None if image1 is None else image1[y_min:y_max, x_min:x_max] + image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max] + + return cell_mask_cropped, image1_cropped, image2_cropped + + def _calculate_features_helper( cell_ids: Sequence[int], labels: np.ndarray, @@ -84,42 +331,40 @@ def _calculate_features_helper( img2_range = img2_max - img2_min + 1e-10 for cell_id in cell_ids: - # Get cell mask and find bounding box in one step - cell_mask = labels == cell_id - y_indices, x_indices = np.where(cell_mask) - if len(y_indices) == 0: # Skip empty cells + # Get cropped arrays for this cell + result = _get_cell_crops(cell_id, labels, image1, image2, verbose=verbose) + if result is None: continue - # Get bounding box with padding - y_min, y_max = y_indices.min(), y_indices.max() - x_min, x_max = x_indices.min(), x_indices.max() - pad = 5 - y_min = max(0, y_min - pad) - y_max = min(labels.shape[0], y_max + pad) - x_min = max(0, x_min - pad) - x_max = min(labels.shape[1], x_max + pad) - - # Crop all arrays at once - cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max] - image1_cropped = image1[y_min:y_max, x_min:x_max] - image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max] - - # Quick shape check - if cell_mask_cropped.shape != image1_cropped.shape or ( - image2_cropped is not None - and cell_mask_cropped.shape != image2_cropped.shape - ): + cell_mask_cropped, image1_cropped, image2_cropped = result + cell_features = {} + + # Calculate regionprops features first + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + regionprops_features = _get_regionprops_features( + cell_mask_cropped, + image1_cropped, + image1_cropped if image2 is None else None, + ) + if image2 is None: + regionprops_features = { + f"{k}_ch{channel1_name}": v for k, v in regionprops_features.items() + } + else: + regionprops_features = { + f"{k}_ch{channel1_name}_ch{channel2_name}": v + for k, v in regionprops_features.items() + } + cell_features.update(regionprops_features) + except Exception as e: if verbose: logg.warning( - f"Shape mismatch for cell {cell_id}: " - f"mask {cell_mask_cropped.shape}, " - f"image1 {image1_cropped.shape}, " - f"image2 {image2_cropped.shape if image2_cropped is not None else 'None'}" + f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}" ) - continue - cell_features = {} - # Calculate all available features + # Calculate all available cp-measure features for name, func in measurements.items(): try: with warnings.catch_warnings(): @@ -206,6 +451,7 @@ def calculate_image_features( labels_key: str, image_key: str, scale: str | None = None, + measurements: list[str] | str | None = None, adata_key_added: str = "morphology", invalid_as_zero: bool = True, n_jobs: int | None = None, @@ -247,10 +493,9 @@ def calculate_image_features( """ if ( - (isinstance(sdata.images[image_key], xr.DataTree) - or isinstance(sdata.labels[labels_key], xr.DataTree)) - and scale is None - ): + isinstance(sdata.images[image_key], xr.DataTree) + or isinstance(sdata.labels[labels_key], xr.DataTree) + ) and scale is None: raise ValueError("When using multi-scale data, please specify the scale.") if scale is not None and not isinstance(scale, str): @@ -263,6 +508,29 @@ def calculate_image_features( image = np.asarray(sdata.images[image_key].compute()) labels = np.asarray(sdata.labels[labels_key].compute()) + available_measurements = [ + "skimage:label", + "skimage:label+image", + "cpmeasure:core", + "cpmeasure:correlation", + ] + + if measurements is None: + measurements = available_measurements + + if isinstance(measurements, str): + measurements = [measurements] + + if isinstance(measurements, list): + invalid_measurements = [ + m for m in measurements if m not in available_measurements + ] + if invalid_measurements: + raise ValueError( + f"Invalid measurement(s): {invalid_measurements}, " + f"available measurements: {available_measurements}" + ) + # Check if labels are empty if labels.size == 0: raise ValueError("Labels array is empty") @@ -288,13 +556,11 @@ def calculate_image_features( # Check if image and labels have matching dimensions if image.shape[1:] != labels.shape: raise ValueError( - f"Image and labels have mismatched dimensions: " - f"image {image.shape[1:]}, labels {labels.shape}" + f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}" ) - # Get core measurements from cp_measure - measurements_core = get_core_measurements() - measurements_corr = get_correlation_measurements() + if "cpmeasure:correlation" in measurements: + measurements_corr = get_correlation_measurements() # Get unique cell IDs from labels, excluding background (0) cell_ids = np.unique(labels) @@ -307,58 +573,103 @@ def calculate_image_features( logg.info(f"Using '{n_jobs}' core(s).") - # First process core measurements for each channel - for ch_idx in range(n_channels): - logg.info(f"Calculating core features for channel '{ch_idx}'.") - - ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" - ch_image = image[ch_idx] - + if "skimage:label" in measurements: + logg.info("Calculating 'skimage' label features.") res = parallelize( - _calculate_features_helper, + _get_regionprops_features, collection=cell_ids, extractor=pd.concat, n_jobs=n_jobs, backend=backend, show_progress_bar=show_progress_bar, verbose=verbose, - )(labels, ch_image, None, measurements_core, ch_name) - + )(labels=labels, intensity_image=None) all_features.append(res) - # Then process correlation measurements between channels - for ch1_idx in range(n_channels): - for ch2_idx in range(ch1_idx + 1, n_channels): - ch1_name = ( - channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}" - ) - ch2_name = ( - channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}" - ) + # skimage features that need a mask and an image + if "skimage:label+image" in measurements: + for ch_idx in range(n_channels): - logg.info( - f"Calculating correlation features between channels " - f"'{ch1_name}' and '{ch2_name}'." + ch_name = ( + channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" ) + ch_image = image[ch_idx] - ch1_image = image[ch1_idx] - ch2_image = image[ch2_idx] - - # Parallelize feature calculation + logg.info(f"Calculating 'skimage' image features for channel '{ch_idx}'.") res = parallelize( - _calculate_features_helper, + _get_regionprops_features, collection=cell_ids, extractor=pd.concat, n_jobs=n_jobs, backend=backend, show_progress_bar=show_progress_bar, verbose=verbose, - )(labels, ch1_image, ch2_image, measurements_corr, ch1_name, ch2_name) - + )(labels=labels, intensity_image=ch_image) all_features.append(res) + # cpmeasure features that need a mask and an image + if "cpmeasure:core" in measurements: + measurements_core = get_core_measurements() + + for ch_idx in range(n_channels): + + ch_name = ( + channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" + ) + ch_image = image[ch_idx] + if "cpmeasure:core" in measurements: + logg.info( + f"Calculating 'cpmeasure' core features for channel '{ch_idx}'." + ) + + res = parallelize( + _calculate_features_helper, + collection=cell_ids, + extractor=pd.concat, + n_jobs=n_jobs, + backend=backend, + show_progress_bar=show_progress_bar, + verbose=verbose, + )(labels, ch_image, None, measurements_core, ch_name) + all_features.append(res) + + # cpmeasure features that correlate two channels + if "cpmeasure:correlation" in measurements: + for ch1_idx in range(n_channels): + for ch2_idx in range(ch1_idx + 1, n_channels): + ch1_name = ( + channel_names[ch1_idx] + if channel_names is not None + else f"ch{ch1_idx}" + ) + ch2_name = ( + channel_names[ch2_idx] + if channel_names is not None + else f"ch{ch2_idx}" + ) + + logg.info( + f"Calculating correlation features between channels '{ch1_name}' and '{ch2_name}'." + ) + + ch1_image = image[ch1_idx] + ch2_image = image[ch2_idx] + + # Parallelize feature calculation + res = parallelize( + _calculate_features_helper, + collection=cell_ids, + extractor=pd.concat, + n_jobs=n_jobs, + backend=backend, + show_progress_bar=show_progress_bar, + verbose=verbose, + )(labels, ch1_image, ch2_image, measurements_corr, ch1_name, ch2_name) + all_features.append(res) + # Create AnnData object from results combined_features = pd.concat(all_features, axis=1) + if invalid_as_zero: combined_features = combined_features.replace([np.inf, -np.inf], 0) combined_features = combined_features.fillna(0) From 78ed95cf5cfe19187bec595338a374f47109b928 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 28 Mar 2025 22:37:46 +0000 Subject: [PATCH 05/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/exp/_feature.py | 80 ++++++++++--------------------------- 1 file changed, 20 insertions(+), 60 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index eb2808e96..18ccf7cfd 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -2,24 +2,23 @@ from __future__ import annotations +import itertools import warnings from collections.abc import Callable, Sequence from typing import Any import anndata as ad -import itertools import numpy as np import pandas as pd import xarray as xr -import xarray as xr from cp_measure.bulk import get_core_measurements, get_correlation_measurements from scipy import ndimage from skimage import measure +from skimage.measure import label from spatialdata import SpatialData from spatialdata._logging import logger as logg from spatialdata.models import TableModel -from skimage.measure import label from squidpy._constants._constants import ImageFeature from squidpy._docs import d, inject_docs from squidpy._utils import Signal, _get_n_cores, parallelize @@ -115,12 +114,8 @@ def _get_regionprops_features( for i, v in enumerate(value): cell_features[f"{prop}_{i}"] = float(v) elif value.ndim == 2: - for i, j in itertools.product( - range(value.shape[0]), range(value.shape[1]) - ): - cell_features[f"{prop}_{i}x{j}"] = float( - value[i, j] - ) + for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])): + cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) else: cell_features[prop] = value else: @@ -171,12 +166,8 @@ def _get_regionprops_features( for i, v in enumerate(value): cell_features[f"{prop}_{i}"] = float(v) elif value.ndim == 2: - for i, j in itertools.product( - range(value.shape[0]), range(value.shape[1]) - ): - cell_features[f"{prop}_{i}x{j}"] = float( - value[i, j] - ) + for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])): + cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) else: cell_features[prop] = value else: @@ -278,9 +269,7 @@ def _get_cell_crops( x_max += x_pad_max # Warn if cell is at border and padding is asymmetric - if verbose and ( - y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad - ): + if verbose and (y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad): logg.warning( f"Cell {cell_id} is at image border. Padding is asymmetric: " f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, " @@ -350,20 +339,15 @@ def _calculate_features_helper( image1_cropped if image2 is None else None, ) if image2 is None: - regionprops_features = { - f"{k}_ch{channel1_name}": v for k, v in regionprops_features.items() - } + regionprops_features = {f"{k}_ch{channel1_name}": v for k, v in regionprops_features.items()} else: regionprops_features = { - f"{k}_ch{channel1_name}_ch{channel2_name}": v - for k, v in regionprops_features.items() + f"{k}_ch{channel1_name}_ch{channel2_name}": v for k, v in regionprops_features.items() } cell_features.update(regionprops_features) except Exception as e: if verbose: - logg.warning( - f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}" - ) + logg.warning(f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}") # Calculate all available cp-measure features for name, func in measurements.items(): @@ -472,8 +456,7 @@ def calculate_image_features( """ if ( - isinstance(sdata.images[image_key], xr.DataTree) - or isinstance(sdata.labels[labels_key], xr.DataTree) + isinstance(sdata.images[image_key], xr.DataTree) or isinstance(sdata.labels[labels_key], xr.DataTree) ) and scale is None: raise ValueError("When using multi-scale data, please specify the scale.") @@ -501,13 +484,10 @@ def calculate_image_features( measurements = [measurements] if isinstance(measurements, list): - invalid_measurements = [ - m for m in measurements if m not in available_measurements - ] + invalid_measurements = [m for m in measurements if m not in available_measurements] if invalid_measurements: raise ValueError( - f"Invalid measurement(s): {invalid_measurements}, " - f"available measurements: {available_measurements}" + f"Invalid measurement(s): {invalid_measurements}, available measurements: {available_measurements}" ) # Check if labels are empty @@ -531,9 +511,7 @@ def calculate_image_features( # Check if image and labels have matching dimensions if image.shape[1:] != labels.shape: - raise ValueError( - f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}" - ) + raise ValueError(f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}") if "cpmeasure:correlation" in measurements: measurements_corr = get_correlation_measurements() @@ -565,10 +543,7 @@ def calculate_image_features( # skimage features that need a mask and an image if "skimage:label+image" in measurements: for ch_idx in range(n_channels): - - ch_name = ( - channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" - ) + ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" ch_image = image[ch_idx] logg.info(f"Calculating 'skimage' image features for channel '{ch_idx}'.") @@ -588,15 +563,10 @@ def calculate_image_features( measurements_core = get_core_measurements() for ch_idx in range(n_channels): - - ch_name = ( - channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" - ) + ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" ch_image = image[ch_idx] if "cpmeasure:core" in measurements: - logg.info( - f"Calculating 'cpmeasure' core features for channel '{ch_idx}'." - ) + logg.info(f"Calculating 'cpmeasure' core features for channel '{ch_idx}'.") res = parallelize( _calculate_features_helper, @@ -613,20 +583,10 @@ def calculate_image_features( if "cpmeasure:correlation" in measurements: for ch1_idx in range(n_channels): for ch2_idx in range(ch1_idx + 1, n_channels): - ch1_name = ( - channel_names[ch1_idx] - if channel_names is not None - else f"ch{ch1_idx}" - ) - ch2_name = ( - channel_names[ch2_idx] - if channel_names is not None - else f"ch{ch2_idx}" - ) + ch1_name = channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}" + ch2_name = channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}" - logg.info( - f"Calculating correlation features between channels '{ch1_name}' and '{ch2_name}'." - ) + logg.info(f"Calculating correlation features between channels '{ch1_name}' and '{ch2_name}'.") ch1_image = image[ch1_idx] ch2_image = image[ch2_idx] From c865d57152ed039c8ca50b645118d8f6291ec600 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Mon, 31 Mar 2025 18:40:19 +0200 Subject: [PATCH 06/15] cleaned up a bit --- src/squidpy/exp/_feature.py | 947 ++++++++++++++++++++---------------- 1 file changed, 515 insertions(+), 432 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 18ccf7cfd..cb0e41101 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -15,7 +15,7 @@ from scipy import ndimage from skimage import measure from skimage.measure import label -from spatialdata import SpatialData +from spatialdata import SpatialData, rasterize from spatialdata._logging import logger as logg from spatialdata.models import TableModel @@ -25,402 +25,51 @@ __all__ = ["calculate_image_features"] - -def _get_regionprops_features( - cell_ids: Sequence[int], - labels: np.ndarray, - intensity_image: np.ndarray | None = None, - queue: Any | None = None, -) -> dict[str, float]: - """Calculate regionprops features for a cell. - - Parameters - ---------- - cell_id - The ID of the cell to process - labels - The labels array containing cell masks - intensity_image - Optional intensity image for intensity-based features - queue - Optional queue for progress tracking. If provided, will send update signals. - - Returns - ------- - Dictionary of regionprops features - """ - # Define channel-independent properties (only need mask) - mask_props = { - "area", - "area_filled", - "area_convex", - "num_pixels", - "axis_major_length", - "axis_minor_length", - "eccentricity", - "equivalent_diameter", - "extent", - "feret_diameter_max", - "solidity", - "euler_number", - "centroid", - "centroid_local", - "perimeter", - "perimeter_crofton", - "inertia_tensor", - "inertia_tensor_eigvals", - } - - # Define channel-dependent properties (need intensity image) - intensity_props = { - "intensity_max", - "intensity_mean", - "intensity_min", - "intensity_std", - } - - features = {} - - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - - # labels only (channel independent) - if intensity_image is None: - for cell_id in cell_ids: - cell_mask_cropped, _, _ = _get_cell_crops( - cell_id=cell_id, - labels=labels, - ) - - if cell_mask_cropped is None: - continue - - region_prop = measure.regionprops(label_image=label(cell_mask_cropped)) - - if not region_prop: - continue - - cell_features = {} - - # Calculate regionprops features while ignoring warnings - for prop in mask_props: - try: - value = getattr(region_prop, prop) - - # Handle array-like properties - if isinstance(value, (np.ndarray, list, tuple)): - value = np.array(value) - if value.ndim == 1: - for i, v in enumerate(value): - cell_features[f"{prop}_{i}"] = float(v) - elif value.ndim == 2: - for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])): - cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) - else: - cell_features[prop] = value - else: - cell_features[prop] = float(value) - except Exception: - continue - - if queue is not None: - queue.put(Signal.UPDATE) - - features[cell_id] = cell_features - - # Calculate intensity-dependent properties if intensity image is provided - else: - for cell_id in cell_ids: - cell_mask_cropped, intensity_image_cropped, _ = _get_cell_crops( - cell_id=cell_id, - labels=labels, - image1=intensity_image, - ) - - if cell_mask_cropped is None: - continue - - intensity_props_obj = measure.regionprops( - label_image=label(cell_mask_cropped), - intensity_image=intensity_image_cropped, - ) - - if not intensity_props_obj: - continue - - cell_features = {} - - for prop in intensity_props: - try: - value = getattr(intensity_props_obj, prop) - - # Skip callable properties - if callable(value): - continue - - # Handle array properties - if isinstance(value, (np.ndarray, list, tuple)): - value = np.array(value) - - if value.ndim == 1: - for i, v in enumerate(value): - cell_features[f"{prop}_{i}"] = float(v) - elif value.ndim == 2: - for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])): - cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) - else: - cell_features[prop] = value - else: - cell_features[prop] = float(value) - - except Exception: - continue - - if queue is not None: - queue.put(Signal.UPDATE) - - features[cell_id] = cell_features - - if queue is not None: - queue.put(Signal.FINISH) - - return pd.DataFrame.from_dict(features, orient="index") - - -def _measurement_wrapper( - func: Callable, - mask: np.ndarray, - image1: np.ndarray, - image2: np.ndarray | None = None, -) -> dict[str, Any]: - """Wrapper function to handle both core and correlation measurements. - - Parameters - ---------- - func - The measurement function to call - mask - The cell mask - image1 - First image (or only image for core measurements) - image2 - Second image for correlation measurements. If None, this is a core - measurement. - - Returns - ------- - Dictionary of feature values - """ - return func(mask, image1) if image2 is None else func(image1, image2, mask) - - -def _get_cell_crops( - cell_id: int, - labels: np.ndarray, - image1: np.ndarray | None = None, - image2: np.ndarray | None = None, - pad: int = 1, - verbose: bool = False, -) -> tuple[np.ndarray, np.ndarray, np.ndarray | None] | None: - """Generator function to get cropped arrays for a cell. - - Parameters - ---------- - cell_id - The ID of the cell to process - labels - The labels array containing cell masks - image1 - First image to crop - image2 - Optional second image to crop - pad - Amount of padding to add around the cell - verbose - Whether to print warning messages - - Returns - ------- - Tuple of (cell_mask_cropped, image1_cropped, image2_cropped) or None if cell is empty - """ - # Get cell mask and find bounding box in one step - cell_mask = labels == cell_id - y_indices, x_indices = np.where(cell_mask) - if len(y_indices) == 0: # Skip empty cells - return None - - # Get bounding box - y_min, y_max = y_indices.min(), y_indices.max() - x_min, x_max = x_indices.min(), x_indices.max() - - # Get image dimensions - height, width = labels.shape - - # Calculate desired padding - y_pad_min = min(pad, y_min) # How much we can pad to the top - y_pad_max = min(pad, height - y_max - 1) # How much we can pad to the bottom - x_pad_min = min(pad, x_min) # How much we can pad to the left - x_pad_max = min(pad, width - x_max - 1) # How much we can pad to the right - - # Apply symmetric padding where possible - y_min -= y_pad_min - y_max += y_pad_max - x_min -= x_pad_min - x_max += x_pad_max - - # Warn if cell is at border and padding is asymmetric - if verbose and (y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad): - logg.warning( - f"Cell {cell_id} is at image border. Padding is asymmetric: " - f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, " - f"x: {x_pad_min}/{pad} left, {x_pad_max}/{pad} right" - ) - - # Crop all arrays at once - cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max] - - image1_cropped = None if image1 is None else image1[y_min:y_max, x_min:x_max] - image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max] - - return cell_mask_cropped, image1_cropped, image2_cropped - - -def _calculate_features_helper( - cell_ids: Sequence[int], - labels: np.ndarray, - image1: np.ndarray, - image2: np.ndarray | None, - measurements: dict[str, Any], - channel1_name: str | None = None, - channel2_name: str | None = None, - queue: Any | None = None, - verbose: bool = False, -) -> pd.DataFrame: - """Helper function to calculate features for a subset of cells.""" - features_dict = {} - - # Pre-allocate arrays for type conversion - uint8_features = [ - "radial_distribution", - "radial_zernikes", - "intensity", - "sizeshape", - "zernike", - "ferret", - ] - float_features = ["manders_fold", "rwc"] - - # Pre-compute image normalization if needed - if "texture" in measurements: - img1_min = image1.min() - img1_max = image1.max() - img1_range = img1_max - img1_min + 1e-10 - if image2 is not None: - img2_min = image2.min() - img2_max = image2.max() - img2_range = img2_max - img2_min + 1e-10 - - for cell_id in cell_ids: - # Get cropped arrays for this cell - result = _get_cell_crops(cell_id, labels, image1, image2, verbose=verbose) - if result is None: - continue - - cell_mask_cropped, image1_cropped, image2_cropped = result - cell_features = {} - - # Calculate regionprops features first - try: - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - regionprops_features = _get_regionprops_features( - cell_mask_cropped, - image1_cropped, - image1_cropped if image2 is None else None, - ) - if image2 is None: - regionprops_features = {f"{k}_ch{channel1_name}": v for k, v in regionprops_features.items()} - else: - regionprops_features = { - f"{k}_ch{channel1_name}_ch{channel2_name}": v for k, v in regionprops_features.items() - } - cell_features.update(regionprops_features) - except Exception as e: - if verbose: - logg.warning(f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}") - - # Calculate all available cp-measure features - for name, func in measurements.items(): - try: - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - - # Pre-convert inputs based on feature type - if name in uint8_features: - mask = cell_mask_cropped.astype(np.uint8) - img1 = image1_cropped.astype(np.uint8) - img2 = None if image2_cropped is None else image2_cropped.astype(np.uint8) - elif name == "texture": - mask = cell_mask_cropped.astype(np.uint8) - img1 = (image1_cropped.astype(np.float32) - img1_min) / img1_range - img2 = ( - None - if image2_cropped is None - else (image2_cropped.astype(np.float32) - img2_min) / img2_range - ) - elif name in float_features: - mask = cell_mask_cropped.astype(np.float32) - img1 = image1_cropped.astype(np.float32) - img2 = None if image2_cropped is None else image2_cropped.astype(np.float32) - else: - mask = cell_mask_cropped.astype(np.float32) - img1 = image1_cropped.astype(np.float32) - img2 = None if image2_cropped is None else image2_cropped.astype(np.float32) - - feature_dict = _measurement_wrapper(func, mask, img1, img2) - - for k, v in feature_dict.items(): - if len(v) > 1: - raise ValueError(f"Feature {k} has more than one value.") - else: - feature_dict[k] = float(v[0]) - - # Append channel names efficiently - if image2 is None: - feature_dict = {f"{k}_ch{channel1_name}": v for k, v in feature_dict.items()} - else: - feature_dict = {f"{k}_ch{channel1_name}_ch{channel2_name}": v for k, v in feature_dict.items()} - - cell_features.update(feature_dict) - except Exception as e: - if verbose: - logg.warning(f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}") - - features_dict[cell_id] = cell_features - - if queue is not None: - queue.put(Signal.UPDATE) - - if queue is not None: - queue.put(Signal.FINISH) - - return pd.DataFrame.from_dict(features_dict, orient="index") +# Define constant property sets +_MASK_PROPS = { + "area", + "area_filled", + "area_convex", + "num_pixels", + "axis_major_length", + "axis_minor_length", + "eccentricity", + "equivalent_diameter", + "extent", + "feret_diameter_max", + "solidity", + "euler_number", + "centroid", + "centroid_local", + "perimeter", + "perimeter_crofton", + "inertia_tensor", + "inertia_tensor_eigvals", +} +_INTENSITY_PROPS = { + "intensity_max", + "intensity_mean", + "intensity_min", + "intensity_std", +} @d.dedent @inject_docs(f=ImageFeature) def calculate_image_features( sdata: SpatialData, - labels_key: str, image_key: str, + labels_key: str | None = None, + shapes_key: str | None = None, scale: str | None = None, measurements: list[str] | str | None = None, adata_key_added: str = "morphology", invalid_as_zero: bool = True, n_jobs: int | None = None, backend: str = "loky", - show_progress_bar: bool = True, + show_progress_bar: bool = False, # slower, needs to be optimised verbose: bool = False, + inplace: bool = True, ) -> pd.DataFrame | None: """ Calculate features from segmentation masks using CellProfiler measurements. @@ -436,6 +85,9 @@ def calculate_image_features( labels_key Key in :attr:`spatialdata.SpatialData.labels` containing the segmentation masks. + shapes_key + Key in :attr:`spatialdata.SpatialData.shapes` containing the + shape features. image_key Key in :attr:`spatialdata.SpatialData.images` containing the intensity image. @@ -455,20 +107,62 @@ def calculate_image_features( to be installed. """ + if image_key not in sdata.images.keys(): + raise ValueError( + f"Image key '{image_key}' not found, valid keys: {list(sdata.images.keys())}" + ) + + if labels_key is not None and shapes_key is not None: + raise ValueError("Use either `labels_key` or `shapes_key`, not both.") + + if labels_key is not None and labels_key not in sdata.labels.keys(): + raise ValueError( + f"Labels key '{labels_key}' not found, valid keys: {list(sdata.labels.keys())}" + ) + + if shapes_key is not None and shapes_key not in sdata.shapes.keys(): + raise ValueError( + f"Shapes key '{shapes_key}' not found, valid keys: {list(sdata.shapes.keys())}" + ) + if ( - isinstance(sdata.images[image_key], xr.DataTree) or isinstance(sdata.labels[labels_key], xr.DataTree) + isinstance(sdata.images[image_key], xr.DataTree) + or isinstance(sdata.labels[labels_key], xr.DataTree) ) and scale is None: raise ValueError("When using multi-scale data, please specify the scale.") if scale is not None and not isinstance(scale, str): raise ValueError("Scale must be a string.") - if scale is not None: - image = np.asarray(sdata.images[image_key][scale].image.compute()) - labels = np.asarray(sdata.labels[labels_key][scale].image.compute()) + image = _get_array_from_DataTree_or_DataArray(sdata.images[image_key], scale) + labels = ( + _get_array_from_DataTree_or_DataArray(sdata.labels[labels_key], scale) + if labels_key is not None + else None + ) + + if labels is not None and image.shape[1:] != labels.shape: + raise ValueError( + f"Image dimensions {image.shape[1:]} do not match labels dimensions {labels.shape} at scale '{scale}'" + ) + + if shapes_key is not None: + scale_str = f" (using scale '{scale}')" if scale is not None else "" + logg.info(f"Converting shapes to labels{scale_str}.") + _, max_y, max_x = image.shape + labels = np.asarray( + rasterize( + sdata.shapes[shapes_key], + ["x", "y"], + min_coordinate=[0, 0], + max_coordinate=[max_x, max_y], + target_coordinate_system="global", + target_unit_to_pixels=1.0, + return_regions_as_labels=True, + ) + ) else: - image = np.asarray(sdata.images[image_key].compute()) - labels = np.asarray(sdata.labels[labels_key].compute()) + labels = _get_array_from_DataTree_or_DataArray(sdata.labels[labels_key], scale) available_measurements = [ "skimage:label", @@ -484,13 +178,14 @@ def calculate_image_features( measurements = [measurements] if isinstance(measurements, list): - invalid_measurements = [m for m in measurements if m not in available_measurements] + invalid_measurements = [ + m for m in measurements if m not in available_measurements + ] if invalid_measurements: raise ValueError( f"Invalid measurement(s): {invalid_measurements}, available measurements: {available_measurements}" ) - # Check if labels are empty if labels.size == 0: raise ValueError("Labels array is empty") @@ -498,29 +193,31 @@ def calculate_image_features( if max_label == 0: raise ValueError("No cells found in labels (max label is 0)") - # Get channel names if available channel_names = None - if hasattr(sdata.images[image_key], "coords") and "c" in sdata.images[image_key].coords: + if ( + hasattr(sdata.images[image_key], "coords") + and "c" in sdata.images[image_key].coords + ): channel_names = sdata.images[image_key].coords["c"].values - # Handle image dimensions if image.ndim == 2: - image = image[None, :, :] # Add channel dimension + image = image[None, :, :] elif image.ndim != 3: raise ValueError(f"Expected 2D or 3D image, got shape {image.shape}") - # Check if image and labels have matching dimensions if image.shape[1:] != labels.shape: - raise ValueError(f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}") + raise ValueError( + f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}" + ) if "cpmeasure:correlation" in measurements: measurements_corr = get_correlation_measurements() - # Get unique cell IDs from labels, excluding background (0) cell_ids = np.unique(labels) cell_ids = cell_ids[cell_ids != 0] + # Sort cell_ids to ensure consistent order + cell_ids = np.sort(cell_ids) - # Process each channel all_features = [] n_channels = image.shape[0] n_jobs = _get_n_cores(n_jobs) @@ -540,12 +237,12 @@ def calculate_image_features( )(labels=labels, intensity_image=None) all_features.append(res) - # skimage features that need a mask and an image if "skimage:label+image" in measurements: for ch_idx in range(n_channels): - ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" + ch_name = ( + channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" + ) ch_image = image[ch_idx] - logg.info(f"Calculating 'skimage' image features for channel '{ch_idx}'.") res = parallelize( _get_regionprops_features, @@ -556,42 +253,47 @@ def calculate_image_features( show_progress_bar=show_progress_bar, verbose=verbose, )(labels=labels, intensity_image=ch_image) + # Append channel names to each feature column + res = res.rename(columns=lambda col: f"{col}_{ch_name}") all_features.append(res) - # cpmeasure features that need a mask and an image if "cpmeasure:core" in measurements: measurements_core = get_core_measurements() - for ch_idx in range(n_channels): - ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}" + ch_name = ( + channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" + ) ch_image = image[ch_idx] - if "cpmeasure:core" in measurements: - logg.info(f"Calculating 'cpmeasure' core features for channel '{ch_idx}'.") - - res = parallelize( - _calculate_features_helper, - collection=cell_ids, - extractor=pd.concat, - n_jobs=n_jobs, - backend=backend, - show_progress_bar=show_progress_bar, - verbose=verbose, - )(labels, ch_image, None, measurements_core, ch_name) - all_features.append(res) + logg.info(f"Calculating 'cpmeasure' core features for channel '{ch_idx}'.") + res = parallelize( + _calculate_features_helper, + collection=cell_ids, + extractor=pd.concat, + n_jobs=n_jobs, + backend=backend, + show_progress_bar=show_progress_bar, + verbose=verbose, + )(labels, ch_image, None, measurements_core, ch_name) + all_features.append(res) - # cpmeasure features that correlate two channels if "cpmeasure:correlation" in measurements: for ch1_idx in range(n_channels): for ch2_idx in range(ch1_idx + 1, n_channels): - ch1_name = channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}" - ch2_name = channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}" - - logg.info(f"Calculating correlation features between channels '{ch1_name}' and '{ch2_name}'.") - + ch1_name = ( + channel_names[ch1_idx] + if channel_names is not None + else f"{ch1_idx}" + ) + ch2_name = ( + channel_names[ch2_idx] + if channel_names is not None + else f"{ch2_idx}" + ) + logg.info( + f"Calculating 'cpmeasure' correlation features between channels '{ch1_name}' and '{ch2_name}'." + ) ch1_image = image[ch1_idx] ch2_image = image[ch2_idx] - - # Parallelize feature calculation res = parallelize( _calculate_features_helper, collection=cell_ids, @@ -603,7 +305,6 @@ def calculate_image_features( )(labels, ch1_image, ch2_image, measurements_corr, ch1_name, ch2_name) all_features.append(res) - # Create AnnData object from results combined_features = pd.concat(all_features, axis=1) if invalid_as_zero: @@ -611,7 +312,6 @@ def calculate_image_features( combined_features = combined_features.fillna(0) # Ensure cell IDs are preserved in the correct order - cell_ids = sorted(combined_features.index) combined_features = combined_features.loc[cell_ids] adata = ad.AnnData(X=combined_features) @@ -619,12 +319,395 @@ def calculate_image_features( adata.var_names = combined_features.columns adata.uns["spatialdata_attrs"] = { - "region": labels_key, + "region": labels_key if labels_key is not None else shapes_key, "region_key": "region", "instance_key": "label_id", } - adata.obs["region"] = pd.Categorical([labels_key] * len(adata)) + adata.obs["region"] = pd.Categorical( + [labels_key if labels_key is not None else shapes_key] * len(adata) + ) adata.obs["label_id"] = cell_ids - # Add the AnnData object to the SpatialData object - sdata.tables[adata_key_added] = TableModel.parse(adata) + if inplace: + sdata.tables[adata_key_added] = TableModel.parse(adata) + else: + return combined_features + + +def _extract_features_from_regionprops( + region_obj: Any, props: set[str], cell_id: int, skip_callable: bool = False +) -> dict[str, float]: + """Extract features from a regionprops object given a list of properties.""" + cell_features = {} + for prop in props: + try: + value = getattr(region_obj, prop) + if skip_callable and callable(value): + continue + if isinstance(value, np.ndarray | list | tuple): + value = np.array(value) + if value.ndim == 1: + for i, v in enumerate(value): + cell_features[f"{prop}_{i}"] = float(v) + elif value.ndim == 2: + for i, j in itertools.product( + range(value.shape[0]), range(value.shape[1]) + ): + cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) + else: + cell_features[prop] = value + else: + cell_features[prop] = float(value) + except Exception as e: + logg.warning(f"Error calculating {prop} for cell {cell_id}: {str(e)}") + continue + return cell_features + + +def _calculate_regionprops_from_crop( + cell_mask_cropped: np.ndarray, + intensity_image_cropped: np.ndarray | None, + cell_id: int, +) -> dict[str, float]: + """ + Calculate regionprops features from pre-cropped arrays. + Uses intensity-based properties if an intensity image is provided. + """ + if intensity_image_cropped is None: + region_props = measure.regionprops(label_image=label(cell_mask_cropped)) + if not region_props: + return {} + return _extract_features_from_regionprops(region_props[0], _MASK_PROPS, cell_id) + else: + region_props = measure.regionprops( + label_image=label(cell_mask_cropped), + intensity_image=intensity_image_cropped, + ) + if not region_props: + return {} + return _extract_features_from_regionprops( + region_props[0], _INTENSITY_PROPS, cell_id, skip_callable=True + ) + + +def _append_channel_names( + features: dict, channel1: str, channel2: str | None = None +) -> dict: + """Append channel name(s) to all keys in the feature dictionary.""" + if channel2 is None: + return {f"{k}_{channel1}": v for k, v in features.items()} + else: + return {f"{k}_{channel1}_{channel2}": v for k, v in features.items()} + + +def _prepare_images_for_measurement( + name: str, + cell_mask: np.ndarray, + img1: np.ndarray, + img2: np.ndarray | None, + conv_params: dict, +) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]: + """ + Convert inputs to the appropriate dtype based on the measurement type. + """ + if name in conv_params.get("uint8_features", []): + mask = cell_mask.astype(np.uint8) + image1_prepared = img1.astype(np.uint8) + image2_prepared = None if img2 is None else img2.astype(np.uint8) + elif name == "texture": + mask = cell_mask.astype(np.uint8) + image1_prepared = ( + img1.astype(np.float32) - conv_params["img1_min"] + ) / conv_params["img1_range"] + image2_prepared = ( + None + if img2 is None + else (img2.astype(np.float32) - conv_params["img2_min"]) + / conv_params["img2_range"] + ) + elif name in conv_params.get("float_features", []): + mask = cell_mask.astype(np.float32) + image1_prepared = img1.astype(np.float32) + image2_prepared = None if img2 is None else img2.astype(np.float32) + else: + mask = cell_mask.astype(np.float32) + image1_prepared = img1.astype(np.float32) + image2_prepared = None if img2 is None else img2.astype(np.float32) + return mask, image1_prepared, image2_prepared + + +def _get_cell_crops( + cell_id: int, + labels: np.ndarray, + image1: np.ndarray | None = None, + image2: np.ndarray | None = None, + pad: int = 1, + verbose: bool = False, +) -> tuple[np.ndarray, np.ndarray, np.ndarray | None] | None: + """Generator function to get cropped arrays for a cell. + + Parameters + ---------- + cell_id + The ID of the cell to process + labels + The labels array containing cell masks + image1 + First image to crop + image2 + Optional second image to crop + pad + Amount of padding to add around the cell + verbose + Whether to print warning messages + + Returns + ------- + Tuple of (cell_mask_cropped, image1_cropped, image2_cropped) or None if cell is empty + """ + cell_mask = labels == cell_id + y_indices, x_indices = np.where(cell_mask) + if len(y_indices) == 0: # Skip empty cells + return None + + y_min, y_max = y_indices.min(), y_indices.max() + x_min, x_max = x_indices.min(), x_indices.max() + height, width = labels.shape + + y_pad_min = min(pad, y_min) + y_pad_max = min(pad, height - y_max - 1) + x_pad_min = min(pad, x_min) + x_pad_max = min(pad, width - x_max - 1) + + y_min -= y_pad_min + y_max += y_pad_max + x_min -= x_pad_min + x_max += x_pad_max + + if verbose and ( + y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad + ): + logg.warning( + f"Cell {cell_id} is at image border. Padding is asymmetric: " + f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, " + f"x: {x_pad_min}/{pad} left, {x_pad_max}/{pad} right" + ) + + cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max] + image1_cropped = None if image1 is None else image1[y_min:y_max, x_min:x_max] + image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max] + + return cell_mask_cropped, image1_cropped, image2_cropped + + +def _get_regionprops_features( + cell_ids: Sequence[int], + labels: np.ndarray, + intensity_image: np.ndarray | None = None, + queue: Any | None = None, +) -> pd.DataFrame: + """Calculate regionprops features for each cell from the full label image.""" + # Initialize features dictionary with None values to preserve order + features = {cell_id: None for cell_id in cell_ids} + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # Process cells in order to preserve order + for cell_id in cell_ids: + crop = _get_cell_crops(cell_id, labels, image1=intensity_image) + if crop is None: + continue + cell_mask_cropped, intensity_image_cropped, _ = crop + cell_features = _calculate_regionprops_from_crop( + cell_mask_cropped, intensity_image_cropped, cell_id + ) + features[cell_id] = cell_features + if queue is not None: + queue.put(Signal.UPDATE) + if queue is not None: + queue.put(Signal.FINISH) + + # Convert to DataFrame while preserving order + df = pd.DataFrame.from_dict(features, orient="index") + # Ensure the index matches the input cell_ids order + df = df.reindex(cell_ids) + return df + + +def _measurement_wrapper( + func: Callable, + mask: np.ndarray, + image1: np.ndarray, + image2: np.ndarray | None = None, +) -> dict[str, Any]: + """Wrapper function to handle both core and correlation measurements. + + Parameters + ---------- + func + The measurement function to call + mask + The cell mask + image1 + First image (or only image for core measurements) + image2 + Second image for correlation measurements. If None, this is a core + measurement. + + Returns + ------- + Dictionary of feature values + """ + return func(mask, image1) if image2 is None else func(image1, image2, mask) + + +def _calculate_features_helper( + cell_ids: Sequence[int], + labels: np.ndarray, + image1: np.ndarray, + image2: np.ndarray | None, + measurements: dict[str, Any], + channel1_name: str | None = None, + channel2_name: str | None = None, + queue: Any | None = None, + verbose: bool = False, +) -> pd.DataFrame: + """Helper function to calculate features for a subset of cells.""" + # Initialize features dictionary with None values to preserve order + features_dict = {cell_id: None for cell_id in cell_ids} + + # Pre-allocate lists for type conversion + uint8_features = [ + "radial_distribution", + "radial_zernikes", + "intensity", + "sizeshape", + "zernike", + "ferret", + ] + float_features = ["manders_fold", "rwc"] + + # Pre-compute normalization if needed + conv_params: dict[str, Any] = { + "uint8_features": uint8_features, + "float_features": float_features, + } + if "texture" in measurements: + img1_min = image1.min() + img1_max = image1.max() + conv_params["img1_min"] = img1_min + conv_params["img1_range"] = img1_max - img1_min + 1e-10 + if image2 is not None: + img2_min = image2.min() + img2_max = image2.max() + conv_params["img2_min"] = img2_min + conv_params["img2_range"] = img2_max - img2_min + 1e-10 + + # Process cells in order to preserve order + for cell_id in cell_ids: + crop = _get_cell_crops(cell_id, labels, image1, image2, verbose=verbose) + if crop is None: + continue + cell_mask_cropped, image1_cropped, image2_cropped = crop + cell_features = {} + + # Calculate regionprops features using cached crop + try: + region_features = _calculate_regionprops_from_crop( + cell_mask_cropped, + image1_cropped if image2 is None else None, + cell_id, + ) + if image2 is None: + region_features = _append_channel_names(region_features, channel1_name) + else: + region_features = _append_channel_names( + region_features, channel1_name, channel2_name + ) + cell_features.update(region_features) + except Exception as e: + if verbose: + logg.warning( + f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}" + ) + + # Calculate cp-measure features for each measurement + for name, func in measurements.items(): + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + mask_conv, img1_conv, img2_conv = _prepare_images_for_measurement( + name, + cell_mask_cropped, + image1_cropped, + image2_cropped, + conv_params, + ) + feature_dict = _measurement_wrapper( + func, mask_conv, img1_conv, img2_conv + ) + # Ensure each feature returns a single value + for k, v in feature_dict.items(): + if len(v) > 1: + raise ValueError(f"Feature {k} has more than one value.") + else: + feature_dict[k] = float(v[0]) + if image2 is None: + feature_dict = _append_channel_names( + feature_dict, channel1_name + ) + else: + feature_dict = _append_channel_names( + feature_dict, channel1_name, channel2_name + ) + cell_features.update(feature_dict) + except Exception as e: + if verbose: + logg.warning( + f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}" + ) + + features_dict[cell_id] = cell_features + + if queue is not None: + queue.put(Signal.UPDATE) + + if queue is not None: + queue.put(Signal.FINISH) + + # Convert to DataFrame while preserving order + df = pd.DataFrame.from_dict(features_dict, orient="index") + # Ensure the index matches the input cell_ids order + df = df.reindex(cell_ids) + return df + + +def _get_array_from_DataTree_or_DataArray( + data: xr.DataTree | xr.DataArray, scale: str | None = None +) -> np.ndarray: + """ + Returns a NumPy array for the given data and scale. + If data is an xr.DataTree, it checks for the scale key and computes the image. + If data is an xr.DataArray, it computes the array (ignoring scale). + + Parameters + ---------- + data + The xarray data to convert to a NumPy array + scale + Optional scale key for DataTree data + + Returns + ------- + np.ndarray + The computed NumPy array + """ + if not isinstance(data, xr.DataTree): + return np.asarray(data.compute()) + if scale is None: + raise ValueError("Scale must be provided for DataTree data") + if scale not in data: + raise ValueError( + f"Scale '{scale}' not found. Available scales: {list(data.keys())}" + ) + return np.asarray(data[scale].image.compute()) From 0bc950764e34f3c85f993acec490840c5a298a0c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 31 Mar 2025 16:40:41 +0000 Subject: [PATCH 07/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/exp/_feature.py | 119 +++++++++--------------------------- 1 file changed, 28 insertions(+), 91 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index cb0e41101..c49ddc5c4 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -108,26 +108,19 @@ def calculate_image_features( """ if image_key not in sdata.images.keys(): - raise ValueError( - f"Image key '{image_key}' not found, valid keys: {list(sdata.images.keys())}" - ) + raise ValueError(f"Image key '{image_key}' not found, valid keys: {list(sdata.images.keys())}") if labels_key is not None and shapes_key is not None: raise ValueError("Use either `labels_key` or `shapes_key`, not both.") if labels_key is not None and labels_key not in sdata.labels.keys(): - raise ValueError( - f"Labels key '{labels_key}' not found, valid keys: {list(sdata.labels.keys())}" - ) + raise ValueError(f"Labels key '{labels_key}' not found, valid keys: {list(sdata.labels.keys())}") if shapes_key is not None and shapes_key not in sdata.shapes.keys(): - raise ValueError( - f"Shapes key '{shapes_key}' not found, valid keys: {list(sdata.shapes.keys())}" - ) + raise ValueError(f"Shapes key '{shapes_key}' not found, valid keys: {list(sdata.shapes.keys())}") if ( - isinstance(sdata.images[image_key], xr.DataTree) - or isinstance(sdata.labels[labels_key], xr.DataTree) + isinstance(sdata.images[image_key], xr.DataTree) or isinstance(sdata.labels[labels_key], xr.DataTree) ) and scale is None: raise ValueError("When using multi-scale data, please specify the scale.") @@ -135,11 +128,7 @@ def calculate_image_features( raise ValueError("Scale must be a string.") image = _get_array_from_DataTree_or_DataArray(sdata.images[image_key], scale) - labels = ( - _get_array_from_DataTree_or_DataArray(sdata.labels[labels_key], scale) - if labels_key is not None - else None - ) + labels = _get_array_from_DataTree_or_DataArray(sdata.labels[labels_key], scale) if labels_key is not None else None if labels is not None and image.shape[1:] != labels.shape: raise ValueError( @@ -178,9 +167,7 @@ def calculate_image_features( measurements = [measurements] if isinstance(measurements, list): - invalid_measurements = [ - m for m in measurements if m not in available_measurements - ] + invalid_measurements = [m for m in measurements if m not in available_measurements] if invalid_measurements: raise ValueError( f"Invalid measurement(s): {invalid_measurements}, available measurements: {available_measurements}" @@ -194,10 +181,7 @@ def calculate_image_features( raise ValueError("No cells found in labels (max label is 0)") channel_names = None - if ( - hasattr(sdata.images[image_key], "coords") - and "c" in sdata.images[image_key].coords - ): + if hasattr(sdata.images[image_key], "coords") and "c" in sdata.images[image_key].coords: channel_names = sdata.images[image_key].coords["c"].values if image.ndim == 2: @@ -206,9 +190,7 @@ def calculate_image_features( raise ValueError(f"Expected 2D or 3D image, got shape {image.shape}") if image.shape[1:] != labels.shape: - raise ValueError( - f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}" - ) + raise ValueError(f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}") if "cpmeasure:correlation" in measurements: measurements_corr = get_correlation_measurements() @@ -239,9 +221,7 @@ def calculate_image_features( if "skimage:label+image" in measurements: for ch_idx in range(n_channels): - ch_name = ( - channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" - ) + ch_name = channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" ch_image = image[ch_idx] logg.info(f"Calculating 'skimage' image features for channel '{ch_idx}'.") res = parallelize( @@ -260,9 +240,7 @@ def calculate_image_features( if "cpmeasure:core" in measurements: measurements_core = get_core_measurements() for ch_idx in range(n_channels): - ch_name = ( - channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" - ) + ch_name = channel_names[ch_idx] if channel_names is not None else f"{ch_idx}" ch_image = image[ch_idx] logg.info(f"Calculating 'cpmeasure' core features for channel '{ch_idx}'.") res = parallelize( @@ -279,16 +257,8 @@ def calculate_image_features( if "cpmeasure:correlation" in measurements: for ch1_idx in range(n_channels): for ch2_idx in range(ch1_idx + 1, n_channels): - ch1_name = ( - channel_names[ch1_idx] - if channel_names is not None - else f"{ch1_idx}" - ) - ch2_name = ( - channel_names[ch2_idx] - if channel_names is not None - else f"{ch2_idx}" - ) + ch1_name = channel_names[ch1_idx] if channel_names is not None else f"{ch1_idx}" + ch2_name = channel_names[ch2_idx] if channel_names is not None else f"{ch2_idx}" logg.info( f"Calculating 'cpmeasure' correlation features between channels '{ch1_name}' and '{ch2_name}'." ) @@ -323,9 +293,7 @@ def calculate_image_features( "region_key": "region", "instance_key": "label_id", } - adata.obs["region"] = pd.Categorical( - [labels_key if labels_key is not None else shapes_key] * len(adata) - ) + adata.obs["region"] = pd.Categorical([labels_key if labels_key is not None else shapes_key] * len(adata)) adata.obs["label_id"] = cell_ids if inplace: @@ -350,9 +318,7 @@ def _extract_features_from_regionprops( for i, v in enumerate(value): cell_features[f"{prop}_{i}"] = float(v) elif value.ndim == 2: - for i, j in itertools.product( - range(value.shape[0]), range(value.shape[1]) - ): + for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])): cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) else: cell_features[prop] = value @@ -385,14 +351,10 @@ def _calculate_regionprops_from_crop( ) if not region_props: return {} - return _extract_features_from_regionprops( - region_props[0], _INTENSITY_PROPS, cell_id, skip_callable=True - ) + return _extract_features_from_regionprops(region_props[0], _INTENSITY_PROPS, cell_id, skip_callable=True) -def _append_channel_names( - features: dict, channel1: str, channel2: str | None = None -) -> dict: +def _append_channel_names(features: dict, channel1: str, channel2: str | None = None) -> dict: """Append channel name(s) to all keys in the feature dictionary.""" if channel2 is None: return {f"{k}_{channel1}": v for k, v in features.items()} @@ -416,14 +378,9 @@ def _prepare_images_for_measurement( image2_prepared = None if img2 is None else img2.astype(np.uint8) elif name == "texture": mask = cell_mask.astype(np.uint8) - image1_prepared = ( - img1.astype(np.float32) - conv_params["img1_min"] - ) / conv_params["img1_range"] + image1_prepared = (img1.astype(np.float32) - conv_params["img1_min"]) / conv_params["img1_range"] image2_prepared = ( - None - if img2 is None - else (img2.astype(np.float32) - conv_params["img2_min"]) - / conv_params["img2_range"] + None if img2 is None else (img2.astype(np.float32) - conv_params["img2_min"]) / conv_params["img2_range"] ) elif name in conv_params.get("float_features", []): mask = cell_mask.astype(np.float32) @@ -484,9 +441,7 @@ def _get_cell_crops( x_min -= x_pad_min x_max += x_pad_max - if verbose and ( - y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad - ): + if verbose and (y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad): logg.warning( f"Cell {cell_id} is at image border. Padding is asymmetric: " f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, " @@ -518,9 +473,7 @@ def _get_regionprops_features( if crop is None: continue cell_mask_cropped, intensity_image_cropped, _ = crop - cell_features = _calculate_regionprops_from_crop( - cell_mask_cropped, intensity_image_cropped, cell_id - ) + cell_features = _calculate_regionprops_from_crop(cell_mask_cropped, intensity_image_cropped, cell_id) features[cell_id] = cell_features if queue is not None: queue.put(Signal.UPDATE) @@ -621,15 +574,11 @@ def _calculate_features_helper( if image2 is None: region_features = _append_channel_names(region_features, channel1_name) else: - region_features = _append_channel_names( - region_features, channel1_name, channel2_name - ) + region_features = _append_channel_names(region_features, channel1_name, channel2_name) cell_features.update(region_features) except Exception as e: if verbose: - logg.warning( - f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}" - ) + logg.warning(f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}") # Calculate cp-measure features for each measurement for name, func in measurements.items(): @@ -643,9 +592,7 @@ def _calculate_features_helper( image2_cropped, conv_params, ) - feature_dict = _measurement_wrapper( - func, mask_conv, img1_conv, img2_conv - ) + feature_dict = _measurement_wrapper(func, mask_conv, img1_conv, img2_conv) # Ensure each feature returns a single value for k, v in feature_dict.items(): if len(v) > 1: @@ -653,19 +600,13 @@ def _calculate_features_helper( else: feature_dict[k] = float(v[0]) if image2 is None: - feature_dict = _append_channel_names( - feature_dict, channel1_name - ) + feature_dict = _append_channel_names(feature_dict, channel1_name) else: - feature_dict = _append_channel_names( - feature_dict, channel1_name, channel2_name - ) + feature_dict = _append_channel_names(feature_dict, channel1_name, channel2_name) cell_features.update(feature_dict) except Exception as e: if verbose: - logg.warning( - f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}" - ) + logg.warning(f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}") features_dict[cell_id] = cell_features @@ -682,9 +623,7 @@ def _calculate_features_helper( return df -def _get_array_from_DataTree_or_DataArray( - data: xr.DataTree | xr.DataArray, scale: str | None = None -) -> np.ndarray: +def _get_array_from_DataTree_or_DataArray(data: xr.DataTree | xr.DataArray, scale: str | None = None) -> np.ndarray: """ Returns a NumPy array for the given data and scale. If data is an xr.DataTree, it checks for the scale key and computes the image. @@ -707,7 +646,5 @@ def _get_array_from_DataTree_or_DataArray( if scale is None: raise ValueError("Scale must be provided for DataTree data") if scale not in data: - raise ValueError( - f"Scale '{scale}' not found. Available scales: {list(data.keys())}" - ) + raise ValueError(f"Scale '{scale}' not found. Available scales: {list(data.keys())}") return np.asarray(data[scale].image.compute()) From 60ed2234e93e08043ecac1cc2afb1e0dddae4e1c Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 1 Apr 2025 15:13:21 +0200 Subject: [PATCH 08/15] fixed off-by-one error --- src/squidpy/exp/_feature.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index cb0e41101..7c4c8b5db 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -326,7 +326,11 @@ def calculate_image_features( adata.obs["region"] = pd.Categorical( [labels_key if labels_key is not None else shapes_key] * len(adata) ) - adata.obs["label_id"] = cell_ids + # here we either use the cell_ids or the index of the shapes. Needed + # because when converting the shapes to labels, a potential index 0 + # in the shapes is set to 1 in the labels and therefore we'd otherwise + # be off-by-one in the label_id. + adata.obs["label_id"] = sdata.shapes[shapes_key].index.values if shapes_key is not None else cell_ids if inplace: sdata.tables[adata_key_added] = TableModel.parse(adata) From 9aade33593deeb90ede9175204f532ceac726ac5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 1 Apr 2025 13:14:11 +0000 Subject: [PATCH 09/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/exp/_feature.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 357b5c4bb..bee79a1a8 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -293,12 +293,10 @@ def calculate_image_features( "region_key": "region", "instance_key": "label_id", } - adata.obs["region"] = pd.Categorical( - [labels_key if labels_key is not None else shapes_key] * len(adata) - ) - # here we either use the cell_ids or the index of the shapes. Needed + adata.obs["region"] = pd.Categorical([labels_key if labels_key is not None else shapes_key] * len(adata)) + # here we either use the cell_ids or the index of the shapes. Needed # because when converting the shapes to labels, a potential index 0 - # in the shapes is set to 1 in the labels and therefore we'd otherwise + # in the shapes is set to 1 in the labels and therefore we'd otherwise # be off-by-one in the label_id. adata.obs["label_id"] = sdata.shapes[shapes_key].index.values if shapes_key is not None else cell_ids From 6d056b48f06bcd9460d664b90496dd296319b94a Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 29 Apr 2025 15:55:10 -0400 Subject: [PATCH 10/15] fixed mypy --- src/squidpy/exp/_feature.py | 90 +++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 34 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index bee79a1a8..e642fc581 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -9,6 +9,7 @@ import anndata as ad import numpy as np +import numpy.typing as npt import pandas as pd import xarray as xr from cp_measure.bulk import get_core_measurements, get_correlation_measurements @@ -53,6 +54,12 @@ "intensity_std", } +# Define array types using modern syntax +NDArray = npt.NDArray[Any] # Generic array +FloatArray = npt.NDArray[np.float32] # Float32 array +IntArray = npt.NDArray[np.int_] # Integer array +BoolArray = npt.NDArray[np.bool_] # Boolean array + @d.dedent @inject_docs(f=ImageFeature) @@ -199,6 +206,7 @@ def calculate_image_features( cell_ids = cell_ids[cell_ids != 0] # Sort cell_ids to ensure consistent order cell_ids = np.sort(cell_ids) + cell_ids_list = cell_ids.tolist() # Convert to list for parallelize all_features = [] n_channels = image.shape[0] @@ -210,7 +218,7 @@ def calculate_image_features( logg.info("Calculating 'skimage' label features.") res = parallelize( _get_regionprops_features, - collection=cell_ids, + collection=cell_ids_list, extractor=pd.concat, n_jobs=n_jobs, backend=backend, @@ -226,7 +234,7 @@ def calculate_image_features( logg.info(f"Calculating 'skimage' image features for channel '{ch_idx}'.") res = parallelize( _get_regionprops_features, - collection=cell_ids, + collection=cell_ids_list, extractor=pd.concat, n_jobs=n_jobs, backend=backend, @@ -234,7 +242,7 @@ def calculate_image_features( verbose=verbose, )(labels=labels, intensity_image=ch_image) # Append channel names to each feature column - res = res.rename(columns=lambda col: f"{col}_{ch_name}") + res = res.rename(columns=lambda col, ch_name=ch_name: f"{col}_{ch_name}") all_features.append(res) if "cpmeasure:core" in measurements: @@ -245,7 +253,7 @@ def calculate_image_features( logg.info(f"Calculating 'cpmeasure' core features for channel '{ch_idx}'.") res = parallelize( _calculate_features_helper, - collection=cell_ids, + collection=cell_ids_list, extractor=pd.concat, n_jobs=n_jobs, backend=backend, @@ -266,7 +274,7 @@ def calculate_image_features( ch2_image = image[ch2_idx] res = parallelize( _calculate_features_helper, - collection=cell_ids, + collection=cell_ids_list, extractor=pd.concat, n_jobs=n_jobs, backend=backend, @@ -307,7 +315,10 @@ def calculate_image_features( def _extract_features_from_regionprops( - region_obj: Any, props: set[str], cell_id: int, skip_callable: bool = False + region_obj: Any, + props: set[str], + cell_id: int, + skip_callable: bool = False, ) -> dict[str, float]: """Extract features from a regionprops object given a list of properties.""" cell_features = {} @@ -325,18 +336,18 @@ def _extract_features_from_regionprops( for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])): cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) else: - cell_features[prop] = value + cell_features[prop] = float(value.flatten()[0]) # Convert to float else: cell_features[prop] = float(value) - except Exception as e: + except (ValueError, TypeError, AttributeError) as e: logg.warning(f"Error calculating {prop} for cell {cell_id}: {str(e)}") continue return cell_features def _calculate_regionprops_from_crop( - cell_mask_cropped: np.ndarray, - intensity_image_cropped: np.ndarray | None, + cell_mask_cropped: NDArray, + intensity_image_cropped: NDArray | None, cell_id: int, ) -> dict[str, float]: """ @@ -358,7 +369,11 @@ def _calculate_regionprops_from_crop( return _extract_features_from_regionprops(region_props[0], _INTENSITY_PROPS, cell_id, skip_callable=True) -def _append_channel_names(features: dict, channel1: str, channel2: str | None = None) -> dict: +def _append_channel_names( + features: dict[str, Any], + channel1: str | None, + channel2: str | None = None, +) -> dict[str, Any]: """Append channel name(s) to all keys in the feature dictionary.""" if channel2 is None: return {f"{k}_{channel1}": v for k, v in features.items()} @@ -368,11 +383,11 @@ def _append_channel_names(features: dict, channel1: str, channel2: str | None = def _prepare_images_for_measurement( name: str, - cell_mask: np.ndarray, - img1: np.ndarray, - img2: np.ndarray | None, - conv_params: dict, -) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]: + cell_mask: NDArray, + img1: NDArray, + img2: NDArray | None, + conv_params: dict[str, Any], +) -> tuple[NDArray, NDArray | None, NDArray | None]: """ Convert inputs to the appropriate dtype based on the measurement type. """ @@ -399,12 +414,12 @@ def _prepare_images_for_measurement( def _get_cell_crops( cell_id: int, - labels: np.ndarray, - image1: np.ndarray | None = None, - image2: np.ndarray | None = None, + labels: NDArray, + image1: NDArray | None = None, + image2: NDArray | None = None, pad: int = 1, verbose: bool = False, -) -> tuple[np.ndarray, np.ndarray, np.ndarray | None] | None: +) -> tuple[NDArray, NDArray | None, NDArray | None] | None: """Generator function to get cropped arrays for a cell. Parameters @@ -461,13 +476,13 @@ def _get_cell_crops( def _get_regionprops_features( cell_ids: Sequence[int], - labels: np.ndarray, - intensity_image: np.ndarray | None = None, + labels: NDArray, + intensity_image: NDArray | None = None, queue: Any | None = None, ) -> pd.DataFrame: """Calculate regionprops features for each cell from the full label image.""" # Initialize features dictionary with None values to preserve order - features = {cell_id: None for cell_id in cell_ids} + features = dict.fromkeys(cell_ids, None) with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -492,10 +507,10 @@ def _get_regionprops_features( def _measurement_wrapper( - func: Callable, - mask: np.ndarray, - image1: np.ndarray, - image2: np.ndarray | None = None, + func: Callable[..., dict[str, Any]], + mask: NDArray, + image1: NDArray | None, + image2: NDArray | None = None, ) -> dict[str, Any]: """Wrapper function to handle both core and correlation measurements. @@ -515,14 +530,16 @@ def _measurement_wrapper( ------- Dictionary of feature values """ + if image1 is None: + return {} # Return empty dict if no image data return func(mask, image1) if image2 is None else func(image1, image2, mask) def _calculate_features_helper( cell_ids: Sequence[int], - labels: np.ndarray, - image1: np.ndarray, - image2: np.ndarray | None, + labels: NDArray, + image1: NDArray, + image2: NDArray | None, measurements: dict[str, Any], channel1_name: str | None = None, channel2_name: str | None = None, @@ -531,7 +548,7 @@ def _calculate_features_helper( ) -> pd.DataFrame: """Helper function to calculate features for a subset of cells.""" # Initialize features dictionary with None values to preserve order - features_dict = {cell_id: None for cell_id in cell_ids} + features_dict = dict.fromkeys(cell_ids, None) # Pre-allocate lists for type conversion uint8_features = [ @@ -580,7 +597,7 @@ def _calculate_features_helper( else: region_features = _append_channel_names(region_features, channel1_name, channel2_name) cell_features.update(region_features) - except Exception as e: + except (ValueError, TypeError, AttributeError) as e: if verbose: logg.warning(f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}") @@ -589,6 +606,8 @@ def _calculate_features_helper( try: with warnings.catch_warnings(): warnings.simplefilter("ignore") + if image1_cropped is None: + continue mask_conv, img1_conv, img2_conv = _prepare_images_for_measurement( name, cell_mask_cropped, @@ -608,7 +627,7 @@ def _calculate_features_helper( else: feature_dict = _append_channel_names(feature_dict, channel1_name, channel2_name) cell_features.update(feature_dict) - except Exception as e: + except (ValueError, TypeError, AttributeError) as e: if verbose: logg.warning(f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}") @@ -627,7 +646,10 @@ def _calculate_features_helper( return df -def _get_array_from_DataTree_or_DataArray(data: xr.DataTree | xr.DataArray, scale: str | None = None) -> np.ndarray: +def _get_array_from_DataTree_or_DataArray( + data: xr.DataTree | xr.DataArray, + scale: str | None = None, +) -> NDArray: """ Returns a NumPy array for the given data and scale. If data is an xr.DataTree, it checks for the scale key and computes the image. From 5e11b8dcb60002d03c718865f24a46f703f8168e Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 29 Apr 2025 16:33:20 -0400 Subject: [PATCH 11/15] numba --- src/squidpy/exp/_feature.py | 150 +++++++++++++++++++++++++----------- 1 file changed, 106 insertions(+), 44 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index e642fc581..8f734ba00 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -23,6 +23,8 @@ from squidpy._constants._constants import ImageFeature from squidpy._docs import d, inject_docs from squidpy._utils import Signal, _get_n_cores, parallelize +from numba import njit, prange +import os __all__ = ["calculate_image_features"] @@ -321,27 +323,63 @@ def _extract_features_from_regionprops( skip_callable: bool = False, ) -> dict[str, float]: """Extract features from a regionprops object given a list of properties.""" - cell_features = {} + # Pre-allocate arrays for common cases + max_1d_features = 10 # Adjust based on typical usage + max_2d_features = 10 # Adjust based on typical usage + + # Initialize arrays for batch processing + names_1d = np.empty(max_1d_features, dtype=object) + values_1d = np.empty(max_1d_features, dtype=float) + names_2d = np.empty(max_2d_features, dtype=object) + values_2d = np.empty(max_2d_features, dtype=float) + + # Initialize counters + count_1d = 0 + count_2d = 0 + scalar_features = {} + for prop in props: try: value = getattr(region_obj, prop) if skip_callable and callable(value): continue + if isinstance(value, np.ndarray | list | tuple): value = np.array(value) if value.ndim == 1: - for i, v in enumerate(value): - cell_features[f"{prop}_{i}"] = float(v) + # Vectorized operation for 1D arrays + n = len(value) + if count_1d + n <= max_1d_features: + indices = np.arange(n) + names_1d[count_1d:count_1d+n] = [f"{prop}_{i}" for i in indices] + values_1d[count_1d:count_1d+n] = value.astype(float) + count_1d += n elif value.ndim == 2: - for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])): - cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) + # Vectorized operation for 2D arrays + rows, cols = value.shape + n = rows * cols + if count_2d + n <= max_2d_features: + i, j = np.meshgrid(range(rows), range(cols), indexing='ij') + names_2d[count_2d:count_2d+n] = [f"{prop}_{i_}_{j_}" for i_, j_ in zip(i.ravel(), j.ravel())] + values_2d[count_2d:count_2d+n] = value.ravel().astype(float) + count_2d += n else: - cell_features[prop] = float(value.flatten()[0]) # Convert to float + scalar_features[prop] = float(value.flatten()[0]) else: - cell_features[prop] = float(value) + scalar_features[prop] = float(value) + except (ValueError, TypeError, AttributeError) as e: logg.warning(f"Error calculating {prop} for cell {cell_id}: {str(e)}") continue + + # Combine all features + cell_features = {} + if count_1d > 0: + cell_features.update(dict(zip(names_1d[:count_1d], values_1d[:count_1d]))) + if count_2d > 0: + cell_features.update(dict(zip(names_2d[:count_2d], values_2d[:count_2d]))) + cell_features.update(scalar_features) + return cell_features @@ -412,65 +450,89 @@ def _prepare_images_for_measurement( return mask, image1_prepared, image2_prepared -def _get_cell_crops( +@njit(fastmath=True) +def _get_cell_crops_numba( cell_id: int, - labels: NDArray, - image1: NDArray | None = None, - image2: NDArray | None = None, + labels: np.ndarray, + image1: np.ndarray, + image2: np.ndarray, pad: int = 1, - verbose: bool = False, -) -> tuple[NDArray, NDArray | None, NDArray | None] | None: - """Generator function to get cropped arrays for a cell. - - Parameters - ---------- - cell_id - The ID of the cell to process - labels - The labels array containing cell masks - image1 - First image to crop - image2 - Optional second image to crop - pad - Amount of padding to add around the cell - verbose - Whether to print warning messages - - Returns - ------- - Tuple of (cell_mask_cropped, image1_cropped, image2_cropped) or None if cell is empty +) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + """Numba-accelerated version of _get_cell_crops. + + Note: image1 and image2 should be passed as empty arrays (np.zeros((0,0))) if not used. """ + # Find cell boundaries cell_mask = labels == cell_id y_indices, x_indices = np.where(cell_mask) - if len(y_indices) == 0: # Skip empty cells - return None + if len(y_indices) == 0: + # Return empty arrays if no cell found + return np.zeros((0, 0), dtype=np.bool_), np.zeros((0, 0), dtype=image1.dtype), np.zeros((0, 0), dtype=image2.dtype) y_min, y_max = y_indices.min(), y_indices.max() x_min, x_max = x_indices.min(), x_indices.max() height, width = labels.shape + # Calculate padding y_pad_min = min(pad, y_min) y_pad_max = min(pad, height - y_max - 1) x_pad_min = min(pad, x_min) x_pad_max = min(pad, width - x_max - 1) + # Apply padding y_min -= y_pad_min y_max += y_pad_max x_min -= x_pad_min x_max += x_pad_max - if verbose and (y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad): - logg.warning( - f"Cell {cell_id} is at image border. Padding is asymmetric: " - f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, " - f"x: {x_pad_min}/{pad} left, {x_pad_max}/{pad} right" - ) + # Calculate crop dimensions + y_size = y_max - y_min + x_size = x_max - x_min + + # Create output arrays + cell_mask_cropped = np.zeros((y_size, x_size), dtype=np.bool_) + image1_cropped = np.zeros((y_size, x_size), dtype=image1.dtype) + image2_cropped = np.zeros((y_size, x_size), dtype=image2.dtype) + + # Copy data + for i in range(y_size): + for j in range(x_size): + cell_mask_cropped[i, j] = cell_mask[y_min + i, x_min + j] + if image1.size > 0: # Only copy if image1 is not empty + image1_cropped[i, j] = image1[y_min + i, x_min + j] + if image2.size > 0: # Only copy if image2 is not empty + image2_cropped[i, j] = image2[y_min + i, x_min + j] + + return cell_mask_cropped, image1_cropped, image2_cropped - cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max] - image1_cropped = None if image1 is None else image1[y_min:y_max, x_min:x_max] - image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max] +def _get_cell_crops( + cell_id: int, + labels: NDArray, + image1: NDArray | None = None, + image2: NDArray | None = None, + pad: int = 1, + verbose: bool = False, +) -> tuple[NDArray, NDArray | None, NDArray | None] | None: + """Generator function to get cropped arrays for a cell.""" + # Create empty arrays for unused images + empty_image = np.zeros((0, 0), dtype=np.float32) + image1_np = image1 if image1 is not None else empty_image + image2_np = image2 if image2 is not None else empty_image + + # Use Numba-accelerated version + cell_mask_cropped, image1_cropped, image2_cropped = _get_cell_crops_numba( + cell_id, labels, image1_np, image2_np, pad + ) + + # Return None if no cell found + if cell_mask_cropped.size == 0: + return None + + # Convert back to None for unused images + image1_cropped = image1_cropped if image1 is not None else None + image2_cropped = image2_cropped if image2 is not None else None + return cell_mask_cropped, image1_cropped, image2_cropped From 7963e845f73972f6231ffb01df164eb3f35402ca Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 29 Apr 2025 16:39:24 -0400 Subject: [PATCH 12/15] pre-alloc types --- src/squidpy/exp/_feature.py | 89 +++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 8f734ba00..74a635d55 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -62,6 +62,18 @@ IntArray = npt.NDArray[np.int_] # Integer array BoolArray = npt.NDArray[np.bool_] # Boolean array +# Define property sets at module level for better performance +_SCALAR_PROPS = frozenset({ + "area", "area_filled", "area_convex", "num_pixels", + "axis_major_length", "axis_minor_length", "eccentricity", + "equivalent_diameter", "extent", "feret_diameter_max", + "solidity", "euler_number", "perimeter", "perimeter_crofton" +}) + +_ARRAY_1D_PROPS = frozenset({"centroid", "centroid_local"}) +_ARRAY_2D_PROPS = frozenset({"inertia_tensor"}) +_SPECIAL_PROPS = frozenset({"inertia_tensor_eigvals"}) + @d.dedent @inject_docs(f=ImageFeature) @@ -323,20 +335,7 @@ def _extract_features_from_regionprops( skip_callable: bool = False, ) -> dict[str, float]: """Extract features from a regionprops object given a list of properties.""" - # Pre-allocate arrays for common cases - max_1d_features = 10 # Adjust based on typical usage - max_2d_features = 10 # Adjust based on typical usage - - # Initialize arrays for batch processing - names_1d = np.empty(max_1d_features, dtype=object) - values_1d = np.empty(max_1d_features, dtype=float) - names_2d = np.empty(max_2d_features, dtype=object) - values_2d = np.empty(max_2d_features, dtype=float) - - # Initialize counters - count_1d = 0 - count_2d = 0 - scalar_features = {} + cell_features = {} for prop in props: try: @@ -344,42 +343,44 @@ def _extract_features_from_regionprops( if skip_callable and callable(value): continue - if isinstance(value, np.ndarray | list | tuple): - value = np.array(value) - if value.ndim == 1: - # Vectorized operation for 1D arrays - n = len(value) - if count_1d + n <= max_1d_features: - indices = np.arange(n) - names_1d[count_1d:count_1d+n] = [f"{prop}_{i}" for i in indices] - values_1d[count_1d:count_1d+n] = value.astype(float) - count_1d += n - elif value.ndim == 2: - # Vectorized operation for 2D arrays - rows, cols = value.shape - n = rows * cols - if count_2d + n <= max_2d_features: - i, j = np.meshgrid(range(rows), range(cols), indexing='ij') - names_2d[count_2d:count_2d+n] = [f"{prop}_{i_}_{j_}" for i_, j_ in zip(i.ravel(), j.ravel())] - values_2d[count_2d:count_2d+n] = value.ravel().astype(float) - count_2d += n - else: - scalar_features[prop] = float(value.flatten()[0]) + if prop in _SCALAR_PROPS: + cell_features[prop] = float(value) + elif prop in _ARRAY_1D_PROPS: + # Convert to array only once + value = np.asarray(value) + for i, v in enumerate(value): + cell_features[f"{prop}_{i}"] = float(v) + elif prop in _ARRAY_2D_PROPS: + # Convert to array only once + value = np.asarray(value) + for i in range(value.shape[0]): + for j in range(value.shape[1]): + cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) + elif prop in _SPECIAL_PROPS: + # Convert to array only once + value = np.asarray(value) + for i, v in enumerate(value): + cell_features[f"{prop}_{i}"] = float(v) else: - scalar_features[prop] = float(value) + # Fallback for any other properties + if isinstance(value, (np.ndarray, list, tuple)): + value = np.asarray(value) + if value.ndim == 1: + for i, v in enumerate(value): + cell_features[f"{prop}_{i}"] = float(v) + elif value.ndim == 2: + for i in range(value.shape[0]): + for j in range(value.shape[1]): + cell_features[f"{prop}_{i}x{j}"] = float(value[i, j]) + else: + cell_features[prop] = float(value.flatten()[0]) + else: + cell_features[prop] = float(value) except (ValueError, TypeError, AttributeError) as e: logg.warning(f"Error calculating {prop} for cell {cell_id}: {str(e)}") continue - # Combine all features - cell_features = {} - if count_1d > 0: - cell_features.update(dict(zip(names_1d[:count_1d], values_1d[:count_1d]))) - if count_2d > 0: - cell_features.update(dict(zip(names_2d[:count_2d], values_2d[:count_2d]))) - cell_features.update(scalar_features) - return cell_features From 6998a7d46ab9f7a30eee1f0343f3477c764e9ae2 Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 29 Apr 2025 17:31:11 -0400 Subject: [PATCH 13/15] pyproject + numba --- pyproject.toml | 2 +- src/squidpy/exp/_feature.py | 84 ++++++++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 26 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cd4e7a665..007118929 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,7 @@ dependencies = [ "zarr>=2.6.1,<3.0.0", "spatialdata>=0.2.5", "centrosome>=1.2.3", - "cp-measure>=0.1.4" + "cp_measure>=0.1.4" ] [project.optional-dependencies] diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 74a635d55..562286e99 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -463,46 +463,57 @@ def _get_cell_crops_numba( Note: image1 and image2 should be passed as empty arrays (np.zeros((0,0))) if not used. """ - # Find cell boundaries + # Find cell boundaries using vectorized operations cell_mask = labels == cell_id - y_indices, x_indices = np.where(cell_mask) - if len(y_indices) == 0: - # Return empty arrays if no cell found + if not np.any(cell_mask): return np.zeros((0, 0), dtype=np.bool_), np.zeros((0, 0), dtype=image1.dtype), np.zeros((0, 0), dtype=image2.dtype) + # Get non-zero indices efficiently + y_indices, x_indices = np.nonzero(cell_mask) y_min, y_max = y_indices.min(), y_indices.max() x_min, x_max = x_indices.min(), x_indices.max() + + # Get image dimensions height, width = labels.shape - # Calculate padding + # Calculate padding with boundary checks in one step y_pad_min = min(pad, y_min) y_pad_max = min(pad, height - y_max - 1) x_pad_min = min(pad, x_min) x_pad_max = min(pad, width - x_max - 1) - # Apply padding - y_min -= y_pad_min - y_max += y_pad_max - x_min -= x_pad_min - x_max += x_pad_max - - # Calculate crop dimensions - y_size = y_max - y_min - x_size = x_max - x_min + # Calculate crop dimensions with padding + y_start = y_min - y_pad_min + y_end = y_max + y_pad_max + 1 + x_start = x_min - x_pad_min + x_end = x_max + x_pad_max + 1 - # Create output arrays + # Create output arrays with exact size + y_size = y_end - y_start + x_size = x_end - x_start + + # Create cell mask crop cell_mask_cropped = np.zeros((y_size, x_size), dtype=np.bool_) - image1_cropped = np.zeros((y_size, x_size), dtype=image1.dtype) - image2_cropped = np.zeros((y_size, x_size), dtype=image2.dtype) - - # Copy data for i in range(y_size): for j in range(x_size): - cell_mask_cropped[i, j] = cell_mask[y_min + i, x_min + j] - if image1.size > 0: # Only copy if image1 is not empty - image1_cropped[i, j] = image1[y_min + i, x_min + j] - if image2.size > 0: # Only copy if image2 is not empty - image2_cropped[i, j] = image2[y_min + i, x_min + j] + cell_mask_cropped[i, j] = cell_mask[y_start + i, x_start + j] + + # Handle image crops efficiently + if image1.size > 0: + image1_cropped = np.zeros((y_size, x_size), dtype=image1.dtype) + for i in range(y_size): + for j in range(x_size): + image1_cropped[i, j] = image1[y_start + i, x_start + j] + else: + image1_cropped = np.zeros((0, 0), dtype=image1.dtype) + + if image2.size > 0: + image2_cropped = np.zeros((y_size, x_size), dtype=image2.dtype) + for i in range(y_size): + for j in range(x_size): + image2_cropped[i, j] = image2[y_start + i, x_start + j] + else: + image2_cropped = np.zeros((0, 0), dtype=image2.dtype) return cell_mask_cropped, image1_cropped, image2_cropped @@ -595,7 +606,30 @@ def _measurement_wrapper( """ if image1 is None: return {} # Return empty dict if no image data - return func(mask, image1) if image2 is None else func(image1, image2, mask) + + try: + if image2 is None: + return func(mask, image1) + else: + # Check if we have valid data for correlation + if not np.any(mask) or not np.any(image1) or not np.any(image2): + # Get feature names from a successful call to maintain structure + dummy_mask = np.ones((2, 2), dtype=bool) + dummy_img = np.ones((2, 2), dtype=image1.dtype) + feature_names = func(dummy_img, dummy_img, dummy_mask).keys() + # Return dictionary with NaN values for all features + return {name: np.nan for name in feature_names} + return func(image1, image2, mask) + except (IndexError, ValueError) as e: + # Handle cases where correlation calculation fails + if "index 0 is out of bounds" in str(e) or "size 0" in str(e): + # Get feature names from a successful call to maintain structure + dummy_mask = np.ones((2, 2), dtype=bool) + dummy_img = np.ones((2, 2), dtype=image1.dtype) + feature_names = func(dummy_img, dummy_img, dummy_mask).keys() + # Return dictionary with NaN values for all features + return {name: np.nan for name in feature_names} + raise # Re-raise other errors def _calculate_features_helper( From f7342c8320820739d00ccf272f0df1d311ef3024 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 21:31:30 +0000 Subject: [PATCH 14/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/squidpy/exp/_feature.py | 62 +++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 562286e99..6cc87a007 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -3,6 +3,7 @@ from __future__ import annotations import itertools +import os import warnings from collections.abc import Callable, Sequence from typing import Any @@ -13,6 +14,7 @@ import pandas as pd import xarray as xr from cp_measure.bulk import get_core_measurements, get_correlation_measurements +from numba import njit, prange from scipy import ndimage from skimage import measure from skimage.measure import label @@ -23,8 +25,6 @@ from squidpy._constants._constants import ImageFeature from squidpy._docs import d, inject_docs from squidpy._utils import Signal, _get_n_cores, parallelize -from numba import njit, prange -import os __all__ = ["calculate_image_features"] @@ -63,12 +63,24 @@ BoolArray = npt.NDArray[np.bool_] # Boolean array # Define property sets at module level for better performance -_SCALAR_PROPS = frozenset({ - "area", "area_filled", "area_convex", "num_pixels", - "axis_major_length", "axis_minor_length", "eccentricity", - "equivalent_diameter", "extent", "feret_diameter_max", - "solidity", "euler_number", "perimeter", "perimeter_crofton" -}) +_SCALAR_PROPS = frozenset( + { + "area", + "area_filled", + "area_convex", + "num_pixels", + "axis_major_length", + "axis_minor_length", + "eccentricity", + "equivalent_diameter", + "extent", + "feret_diameter_max", + "solidity", + "euler_number", + "perimeter", + "perimeter_crofton", + } +) _ARRAY_1D_PROPS = frozenset({"centroid", "centroid_local"}) _ARRAY_2D_PROPS = frozenset({"inertia_tensor"}) @@ -336,13 +348,13 @@ def _extract_features_from_regionprops( ) -> dict[str, float]: """Extract features from a regionprops object given a list of properties.""" cell_features = {} - + for prop in props: try: value = getattr(region_obj, prop) if skip_callable and callable(value): continue - + if prop in _SCALAR_PROPS: cell_features[prop] = float(value) elif prop in _ARRAY_1D_PROPS: @@ -376,11 +388,11 @@ def _extract_features_from_regionprops( cell_features[prop] = float(value.flatten()[0]) else: cell_features[prop] = float(value) - + except (ValueError, TypeError, AttributeError) as e: logg.warning(f"Error calculating {prop} for cell {cell_id}: {str(e)}") continue - + return cell_features @@ -460,19 +472,23 @@ def _get_cell_crops_numba( pad: int = 1, ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: """Numba-accelerated version of _get_cell_crops. - + Note: image1 and image2 should be passed as empty arrays (np.zeros((0,0))) if not used. """ # Find cell boundaries using vectorized operations cell_mask = labels == cell_id if not np.any(cell_mask): - return np.zeros((0, 0), dtype=np.bool_), np.zeros((0, 0), dtype=image1.dtype), np.zeros((0, 0), dtype=image2.dtype) + return ( + np.zeros((0, 0), dtype=np.bool_), + np.zeros((0, 0), dtype=image1.dtype), + np.zeros((0, 0), dtype=image2.dtype), + ) # Get non-zero indices efficiently y_indices, x_indices = np.nonzero(cell_mask) y_min, y_max = y_indices.min(), y_indices.max() x_min, x_max = x_indices.min(), x_indices.max() - + # Get image dimensions height, width = labels.shape @@ -491,13 +507,13 @@ def _get_cell_crops_numba( # Create output arrays with exact size y_size = y_end - y_start x_size = x_end - x_start - + # Create cell mask crop cell_mask_cropped = np.zeros((y_size, x_size), dtype=np.bool_) for i in range(y_size): for j in range(x_size): cell_mask_cropped[i, j] = cell_mask[y_start + i, x_start + j] - + # Handle image crops efficiently if image1.size > 0: image1_cropped = np.zeros((y_size, x_size), dtype=image1.dtype) @@ -506,7 +522,7 @@ def _get_cell_crops_numba( image1_cropped[i, j] = image1[y_start + i, x_start + j] else: image1_cropped = np.zeros((0, 0), dtype=image1.dtype) - + if image2.size > 0: image2_cropped = np.zeros((y_size, x_size), dtype=image2.dtype) for i in range(y_size): @@ -531,20 +547,20 @@ def _get_cell_crops( empty_image = np.zeros((0, 0), dtype=np.float32) image1_np = image1 if image1 is not None else empty_image image2_np = image2 if image2 is not None else empty_image - + # Use Numba-accelerated version cell_mask_cropped, image1_cropped, image2_cropped = _get_cell_crops_numba( cell_id, labels, image1_np, image2_np, pad ) - + # Return None if no cell found if cell_mask_cropped.size == 0: return None - + # Convert back to None for unused images image1_cropped = image1_cropped if image1 is not None else None image2_cropped = image2_cropped if image2 is not None else None - + return cell_mask_cropped, image1_cropped, image2_cropped @@ -606,7 +622,7 @@ def _measurement_wrapper( """ if image1 is None: return {} # Return empty dict if no image data - + try: if image2 is None: return func(mask, image1) From b2eb6aa5426a8992dc38ad252d6090a96cd4c00a Mon Sep 17 00:00:00 2001 From: Tim Treis Date: Tue, 29 Apr 2025 17:35:07 -0400 Subject: [PATCH 15/15] mypy --- src/squidpy/exp/_feature.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py index 6cc87a007..41e0a9e86 100644 --- a/src/squidpy/exp/_feature.py +++ b/src/squidpy/exp/_feature.py @@ -375,7 +375,7 @@ def _extract_features_from_regionprops( cell_features[f"{prop}_{i}"] = float(v) else: # Fallback for any other properties - if isinstance(value, (np.ndarray, list, tuple)): + if isinstance(value, np.ndarray | list | tuple): value = np.asarray(value) if value.ndim == 1: for i, v in enumerate(value): @@ -466,11 +466,11 @@ def _prepare_images_for_measurement( @njit(fastmath=True) def _get_cell_crops_numba( cell_id: int, - labels: np.ndarray, - image1: np.ndarray, - image2: np.ndarray, + labels: npt.NDArray[np.int_], + image1: npt.NDArray[np.float32], + image2: npt.NDArray[np.float32], pad: int = 1, -) -> tuple[np.ndarray, np.ndarray, np.ndarray]: +) -> tuple[npt.NDArray[np.bool_], npt.NDArray[np.float32], npt.NDArray[np.float32]]: """Numba-accelerated version of _get_cell_crops. Note: image1 and image2 should be passed as empty arrays (np.zeros((0,0))) if not used. @@ -553,7 +553,6 @@ def _get_cell_crops( cell_id, labels, image1_np, image2_np, pad ) - # Return None if no cell found if cell_mask_cropped.size == 0: return None @@ -634,7 +633,7 @@ def _measurement_wrapper( dummy_img = np.ones((2, 2), dtype=image1.dtype) feature_names = func(dummy_img, dummy_img, dummy_mask).keys() # Return dictionary with NaN values for all features - return {name: np.nan for name in feature_names} + return dict.fromkeys(feature_names, np.nan) return func(image1, image2, mask) except (IndexError, ValueError) as e: # Handle cases where correlation calculation fails @@ -644,7 +643,7 @@ def _measurement_wrapper( dummy_img = np.ones((2, 2), dtype=image1.dtype) feature_names = func(dummy_img, dummy_img, dummy_mask).keys() # Return dictionary with NaN values for all features - return {name: np.nan for name in feature_names} + return dict.fromkeys(feature_names, np.nan) raise # Re-raise other errors