From 6ce91510d2749906cdc9fed4174e078871c1ea46 Mon Sep 17 00:00:00 2001
From: Tim Treis <tim.treis@stud.uni-heidelberg.de>
Date: Fri, 28 Mar 2025 15:39:37 +0100
Subject: [PATCH 01/15] mvp

---
 pyproject.toml              |   2 +
 src/squidpy/__init__.py     |   2 +-
 src/squidpy/exp/__init__.py |  11 ++
 src/squidpy/exp/_feature.py | 224 ++++++++++++++++++++++++++++++++++++
 4 files changed, 238 insertions(+), 1 deletion(-)
 create mode 100644 src/squidpy/exp/__init__.py
 create mode 100644 src/squidpy/exp/_feature.py

diff --git a/pyproject.toml b/pyproject.toml
index 1143cab94..cd4e7a665 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,6 +74,8 @@ dependencies = [
    "xarray>=2024.10.0",
    "zarr>=2.6.1,<3.0.0",
    "spatialdata>=0.2.5",
+   "centrosome>=1.2.3",
+   "cp-measure>=0.1.4"
 ]
 
 [project.optional-dependencies]
diff --git a/src/squidpy/__init__.py b/src/squidpy/__init__.py
index 5fb2b848a..cfba804fd 100644
--- a/src/squidpy/__init__.py
+++ b/src/squidpy/__init__.py
@@ -3,7 +3,7 @@
 from importlib import metadata
 from importlib.metadata import PackageMetadata
 
-from squidpy import datasets, gr, im, pl, read, tl
+from squidpy import datasets, exp, gr, im, pl, read, tl
 
 try:
     md: PackageMetadata = metadata.metadata(__name__)
diff --git a/src/squidpy/exp/__init__.py b/src/squidpy/exp/__init__.py
new file mode 100644
index 000000000..2dc08d915
--- /dev/null
+++ b/src/squidpy/exp/__init__.py
@@ -0,0 +1,11 @@
+"""Experimental module for Squidpy.
+
+This module contains experimental features that are still under development.
+These features may change or be removed in future releases.
+"""
+
+from __future__ import annotations
+
+from squidpy.exp._feature import calculate_image_features
+
+__all__ = ["calculate_image_features"]
diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py
new file mode 100644
index 000000000..42fca34a6
--- /dev/null
+++ b/src/squidpy/exp/_feature.py
@@ -0,0 +1,224 @@
+"""Experimental feature extraction module."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+import warnings
+import anndata as ad
+import numpy as np
+import pandas as pd
+from cp_measure.bulk import get_core_measurements
+from spatialdata._logging import logger as logg
+from spatialdata import SpatialData
+
+from squidpy._constants._constants import ImageFeature
+from squidpy._docs import d, inject_docs
+from squidpy._utils import Signal, _get_n_cores, parallelize
+from spatialdata.models import TableModel
+__all__ = ["calculate_image_features"]
+
+
+@d.dedent
+@inject_docs(f=ImageFeature)
+def calculate_image_features(
+    sdata: SpatialData,
+    labels_key: str,
+    image_key: str,
+    adata_key_added: str = "morphology",
+    invalid_as_zero: bool = True,
+    n_jobs: int | None = None,
+    backend: str = "loky",
+    show_progress_bar: bool = True,
+) -> pd.DataFrame | None:
+    """
+    Calculate features from segmentation masks using CellProfiler measurements.
+
+    This function uses the `cp_measure` package to extract features from
+    segmentation masks. It supports both basic shape features and
+    intensity-based features if an intensity image is provided.
+
+    Parameters
+    ----------
+    sdata
+        The spatial data object containing the segmentation masks.
+    labels_key
+        Key in :attr:`spatialdata.SpatialData.labels` containing the
+        segmentation masks.
+    image_key
+        Key in :attr:`spatialdata.SpatialData.images` containing the
+        intensity image.
+    adata_key_added
+        Key to store the AnnData object in the SpatialData object.
+    %(parallelize)s
+
+    Returns
+    -------
+    A :class:`pandas.DataFrame` with the calculated features. If the image has
+    multiple channels, features are calculated for each channel separately and
+    channel names are appended to the feature names.
+
+    Notes
+    -----
+    This is an experimental feature that requires the `cp_measure` package
+    to be installed.
+    """
+    # Get the image and labels
+    image = np.asarray(sdata.images[image_key].compute())
+    labels = np.asarray(sdata.labels[labels_key].compute())
+
+    # Check if labels are empty
+    if labels.size == 0:
+        raise ValueError("Labels array is empty")
+
+    max_label = int(labels.max())
+    if max_label == 0:
+        raise ValueError("No cells found in labels (max label is 0)")
+
+    # Get channel names if available
+    channel_names = None
+    if (
+        hasattr(sdata.images[image_key], "coords")
+        and "c" in sdata.images[image_key].coords
+    ):
+        channel_names = sdata.images[image_key].coords["c"].values
+
+    # Handle image dimensions
+    if image.ndim == 2:
+        image = image[None, :, :]  # Add channel dimension
+    elif image.ndim != 3:
+        raise ValueError(f"Expected 2D or 3D image, got shape {image.shape}")
+
+    # Check if image and labels have matching dimensions
+    if image.shape[1:] != labels.shape:
+        raise ValueError(
+            f"Image and labels have mismatched dimensions: "
+            f"image {image.shape[1:]}, labels {labels.shape}"
+        )
+
+    # Get core measurements from cp_measure
+    measurements = get_core_measurements()
+
+    # Process each channel
+    all_features = []
+    n_channels = image.shape[0]
+    n_jobs = _get_n_cores(n_jobs)
+
+    for ch_idx in range(n_channels):
+        ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}"
+
+        logg.info(
+            f"Calculating features for channel '{ch_idx}' " f"using '{n_jobs}' core(s)"
+        )
+
+        ch_image = image[ch_idx]
+
+        # Get cell IDs
+        cell_ids = range(1, max_label + 1)
+
+        # Parallelize feature calculation
+        res = parallelize(
+            _calculate_image_features_helper,
+            collection=cell_ids,
+            extractor=pd.concat,
+            n_jobs=n_jobs,
+            backend=backend,
+            show_progress_bar=show_progress_bar,
+        )(labels, ch_image, measurements, ch_name)
+
+        all_features.append(res)
+
+    # Create AnnData object from results
+    combined_features = pd.concat(all_features, axis=1)
+    if invalid_as_zero:
+        combined_features = combined_features.replace([np.inf, -np.inf], 0)
+        combined_features = combined_features.fillna(0)
+    adata = ad.AnnData(X=combined_features)
+    adata.obs_names = [f"cell_{i}" for i in range(1, max_label + 1)]
+    adata.var_names = combined_features.columns
+
+    adata.uns["spatialdata_attrs"] = {
+        "region": labels_key,
+        "region_key": "region", 
+        "instance_key": "label_id", 
+    }
+    adata.obs["region"] = pd.Categorical([labels_key] * len(adata))
+    adata.obs["label_id"] = range(1, max_label + 1)
+    # adata.obs[["region", "spot_id"]]
+
+    # Add the AnnData object to the SpatialData object
+    sdata.tables[adata_key_added] = TableModel.parse(adata)
+
+    # Combine features from all channels
+    # return pd.concat(all_features, axis=1)
+
+
+def _calculate_image_features_helper(
+    cell_ids: Sequence[int],
+    labels: np.ndarray,
+    image: np.ndarray,
+    measurements: dict[str, Any],
+    channel_name: str | None = None,
+    queue: Any | None = None,
+) -> pd.DataFrame:
+    """Helper function to calculate features for a subset of cells."""
+    features_list = []
+    for cell_id in cell_ids:
+        # Get cell mask
+        cell_mask = (labels == cell_id).astype(np.uint8)
+
+        # Find bounding box of the cell
+        y_indices, x_indices = np.where(cell_mask)
+        if len(y_indices) == 0:  # Skip empty cells
+            continue
+
+        y_min, y_max = y_indices.min(), y_indices.max()
+        x_min, x_max = x_indices.min(), x_indices.max()
+
+        # Add padding to ensure we capture the full cell
+        pad = 5
+        y_min = max(0, y_min - pad)
+        y_max = min(labels.shape[0], y_max + pad)
+        x_min = max(0, x_min - pad)
+        x_max = min(labels.shape[1], x_max + pad)
+
+        # Crop both mask and image to the bounding box
+        cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max]
+        image_cropped = image[y_min:y_max, x_min:x_max]
+
+        cell_features = {}
+        # Calculate all available features
+        for name, func in measurements.items():
+            try:
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore")
+                    feature_dict = func(cell_mask_cropped, image_cropped)
+                    # Convert numpy arrays to scalars
+                    feature_dict = {
+                        k: (
+                            float(v[0])
+                            if isinstance(v, np.ndarray) and v.size == 1
+                            else v
+                        )
+                        for k, v in feature_dict.items()
+                    }
+                    # Append channel name to feature names
+                    feature_dict = {
+                        f"{k}_ch{channel_name}": v for k, v in feature_dict.items()
+                    }
+                    cell_features.update(feature_dict)
+            except Exception as e:
+                logg.warning(
+                    f"Failed to calculate {name} features for cell {cell_id}: "
+                    f"{str(e)}"
+                )
+
+        features_list.append(cell_features)
+
+        if queue is not None:
+            queue.put(Signal.UPDATE)
+
+    if queue is not None:
+        queue.put(Signal.FINISH)
+
+    return pd.DataFrame(features_list, index=cell_ids)

From 3a82ac8e207c37a4225fca51b6555dac78fd6fe5 Mon Sep 17 00:00:00 2001
From: Tim Treis <tim.treis@stud.uni-heidelberg.de>
Date: Fri, 28 Mar 2025 17:46:33 +0100
Subject: [PATCH 02/15] performance improvement

---
 src/squidpy/exp/_feature.py | 351 ++++++++++++++++++++++++++----------
 1 file changed, 255 insertions(+), 96 deletions(-)

diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py
index 42fca34a6..5f7d7b0b3 100644
--- a/src/squidpy/exp/_feature.py
+++ b/src/squidpy/exp/_feature.py
@@ -3,33 +3,215 @@
 from __future__ import annotations
 
 from collections.abc import Sequence
-from typing import Any
+from typing import Any, Callable
 import warnings
 import anndata as ad
 import numpy as np
 import pandas as pd
-from cp_measure.bulk import get_core_measurements
+from cp_measure.bulk import get_core_measurements, get_correlation_measurements
 from spatialdata._logging import logger as logg
 from spatialdata import SpatialData
+import xarray as xr
 
 from squidpy._constants._constants import ImageFeature
 from squidpy._docs import d, inject_docs
 from squidpy._utils import Signal, _get_n_cores, parallelize
 from spatialdata.models import TableModel
+
 __all__ = ["calculate_image_features"]
 
 
+def _measurement_wrapper(
+    func: Callable,
+    mask: np.ndarray,
+    image1: np.ndarray,
+    image2: np.ndarray | None = None,
+) -> dict[str, Any]:
+    """Wrapper function to handle both core and correlation measurements.
+
+    Parameters
+    ----------
+    func
+        The measurement function to call
+    mask
+        The cell mask
+    image1
+        First image (or only image for core measurements)
+    image2
+        Second image for correlation measurements. If None, this is a core
+        measurement.
+
+    Returns
+    -------
+    Dictionary of feature values
+    """
+    return func(mask, image1) if image2 is None else func(image1, image2, mask)
+
+
+def _calculate_features_helper(
+    cell_ids: Sequence[int],
+    labels: np.ndarray,
+    image1: np.ndarray,
+    image2: np.ndarray | None,
+    measurements: dict[str, Any],
+    channel1_name: str | None = None,
+    channel2_name: str | None = None,
+    queue: Any | None = None,
+    verbose: bool = False,
+) -> pd.DataFrame:
+    """Helper function to calculate features for a subset of cells."""
+    features_dict = {}
+
+    # Pre-allocate arrays for type conversion
+    uint8_features = [
+        "radial_distribution",
+        "radial_zernikes",
+        "intensity",
+        "sizeshape",
+        "zernike",
+        "ferret",
+    ]
+    float_features = ["manders_fold", "rwc"]
+
+    # Pre-compute image normalization if needed
+    if "texture" in measurements:
+        img1_min = image1.min()
+        img1_max = image1.max()
+        img1_range = img1_max - img1_min + 1e-10
+        if image2 is not None:
+            img2_min = image2.min()
+            img2_max = image2.max()
+            img2_range = img2_max - img2_min + 1e-10
+
+    for cell_id in cell_ids:
+        # Get cell mask and find bounding box in one step
+        cell_mask = labels == cell_id
+        y_indices, x_indices = np.where(cell_mask)
+        if len(y_indices) == 0:  # Skip empty cells
+            continue
+
+        # Get bounding box with padding
+        y_min, y_max = y_indices.min(), y_indices.max()
+        x_min, x_max = x_indices.min(), x_indices.max()
+        pad = 5
+        y_min = max(0, y_min - pad)
+        y_max = min(labels.shape[0], y_max + pad)
+        x_min = max(0, x_min - pad)
+        x_max = min(labels.shape[1], x_max + pad)
+
+        # Crop all arrays at once
+        cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max]
+        image1_cropped = image1[y_min:y_max, x_min:x_max]
+        image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max]
+
+        # Quick shape check
+        if cell_mask_cropped.shape != image1_cropped.shape or (
+            image2_cropped is not None
+            and cell_mask_cropped.shape != image2_cropped.shape
+        ):
+            if verbose:
+                logg.warning(
+                    f"Shape mismatch for cell {cell_id}: "
+                    f"mask {cell_mask_cropped.shape}, "
+                    f"image1 {image1_cropped.shape}, "
+                    f"image2 {image2_cropped.shape if image2_cropped is not None else 'None'}"
+                )
+            continue
+
+        cell_features = {}
+        # Calculate all available features
+        for name, func in measurements.items():
+            try:
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore")
+
+                    # Pre-convert inputs based on feature type
+                    if name in uint8_features:
+                        mask = cell_mask_cropped.astype(np.uint8)
+                        img1 = image1_cropped.astype(np.uint8)
+                        img2 = (
+                            None
+                            if image2_cropped is None
+                            else image2_cropped.astype(np.uint8)
+                        )
+                    elif name == "texture":
+                        mask = cell_mask_cropped.astype(np.uint8)
+                        img1 = (
+                            image1_cropped.astype(np.float32) - img1_min
+                        ) / img1_range
+                        img2 = (
+                            None
+                            if image2_cropped is None
+                            else (image2_cropped.astype(np.float32) - img2_min)
+                            / img2_range
+                        )
+                    elif name in float_features:
+                        mask = cell_mask_cropped.astype(np.float32)
+                        img1 = image1_cropped.astype(np.float32)
+                        img2 = (
+                            None
+                            if image2_cropped is None
+                            else image2_cropped.astype(np.float32)
+                        )
+                    else:
+                        mask = cell_mask_cropped.astype(np.float32)
+                        img1 = image1_cropped.astype(np.float32)
+                        img2 = (
+                            None
+                            if image2_cropped is None
+                            else image2_cropped.astype(np.float32)
+                        )
+
+                    feature_dict = _measurement_wrapper(func, mask, img1, img2)
+
+                    for k, v in feature_dict.items():
+                        if len(v) > 1:
+                            raise ValueError(f"Feature {k} has more than one value.")
+                        else:
+                            feature_dict[k] = float(v[0])
+
+                    # Append channel names efficiently
+                    if image2 is None:
+                        feature_dict = {
+                            f"{k}_ch{channel1_name}": v for k, v in feature_dict.items()
+                        }
+                    else:
+                        feature_dict = {
+                            f"{k}_ch{channel1_name}_ch{channel2_name}": v
+                            for k, v in feature_dict.items()
+                        }
+
+                    cell_features.update(feature_dict)
+            except Exception as e:
+                if verbose:
+                    logg.warning(
+                        f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}"
+                    )
+
+        features_dict[cell_id] = cell_features
+
+        if queue is not None:
+            queue.put(Signal.UPDATE)
+
+    if queue is not None:
+        queue.put(Signal.FINISH)
+
+    return pd.DataFrame.from_dict(features_dict, orient="index")
+
+
 @d.dedent
 @inject_docs(f=ImageFeature)
 def calculate_image_features(
     sdata: SpatialData,
     labels_key: str,
     image_key: str,
+    scale: str | None = None,
     adata_key_added: str = "morphology",
     invalid_as_zero: bool = True,
     n_jobs: int | None = None,
     backend: str = "loky",
     show_progress_bar: bool = True,
+    verbose: bool = False,
 ) -> pd.DataFrame | None:
     """
     Calculate features from segmentation masks using CellProfiler measurements.
@@ -63,9 +245,23 @@ def calculate_image_features(
     This is an experimental feature that requires the `cp_measure` package
     to be installed.
     """
-    # Get the image and labels
-    image = np.asarray(sdata.images[image_key].compute())
-    labels = np.asarray(sdata.labels[labels_key].compute())
+
+    if (
+        (isinstance(sdata.images[image_key], xr.DataTree)
+        or isinstance(sdata.labels[labels_key], xr.DataTree))
+        and scale is None
+    ):
+        raise ValueError("When using multi-scale data, please specify the scale.")
+
+    if scale is not None and not isinstance(scale, str):
+        raise ValueError("Scale must be a string.")
+
+    if scale is not None:
+        image = np.asarray(sdata.images[image_key][scale].image.compute())
+        labels = np.asarray(sdata.labels[labels_key][scale].image.compute())
+    else:
+        image = np.asarray(sdata.images[image_key].compute())
+        labels = np.asarray(sdata.labels[labels_key].compute())
 
     # Check if labels are empty
     if labels.size == 0:
@@ -97,128 +293,91 @@ def calculate_image_features(
         )
 
     # Get core measurements from cp_measure
-    measurements = get_core_measurements()
+    measurements_core = get_core_measurements()
+    measurements_corr = get_correlation_measurements()
+
+    # Get unique cell IDs from labels, excluding background (0)
+    cell_ids = np.unique(labels)
+    cell_ids = cell_ids[cell_ids != 0]
 
     # Process each channel
     all_features = []
     n_channels = image.shape[0]
     n_jobs = _get_n_cores(n_jobs)
 
-    for ch_idx in range(n_channels):
-        ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}"
+    logg.info(f"Using '{n_jobs}' core(s).")
 
-        logg.info(
-            f"Calculating features for channel '{ch_idx}' " f"using '{n_jobs}' core(s)"
-        )
+    # First process core measurements for each channel
+    for ch_idx in range(n_channels):
+        logg.info(f"Calculating core features for channel '{ch_idx}'.")
 
+        ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}"
         ch_image = image[ch_idx]
 
-        # Get cell IDs
-        cell_ids = range(1, max_label + 1)
-
-        # Parallelize feature calculation
         res = parallelize(
-            _calculate_image_features_helper,
+            _calculate_features_helper,
             collection=cell_ids,
             extractor=pd.concat,
             n_jobs=n_jobs,
             backend=backend,
             show_progress_bar=show_progress_bar,
-        )(labels, ch_image, measurements, ch_name)
+            verbose=verbose,
+        )(labels, ch_image, None, measurements_core, ch_name)
 
         all_features.append(res)
 
+    # Then process correlation measurements between channels
+    for ch1_idx in range(n_channels):
+        for ch2_idx in range(ch1_idx + 1, n_channels):
+            ch1_name = (
+                channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}"
+            )
+            ch2_name = (
+                channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}"
+            )
+
+            logg.info(
+                f"Calculating correlation features between channels "
+                f"'{ch1_name}' and '{ch2_name}'."
+            )
+
+            ch1_image = image[ch1_idx]
+            ch2_image = image[ch2_idx]
+
+            # Parallelize feature calculation
+            res = parallelize(
+                _calculate_features_helper,
+                collection=cell_ids,
+                extractor=pd.concat,
+                n_jobs=n_jobs,
+                backend=backend,
+                show_progress_bar=show_progress_bar,
+                verbose=verbose,
+            )(labels, ch1_image, ch2_image, measurements_corr, ch1_name, ch2_name)
+
+            all_features.append(res)
+
     # Create AnnData object from results
     combined_features = pd.concat(all_features, axis=1)
     if invalid_as_zero:
         combined_features = combined_features.replace([np.inf, -np.inf], 0)
         combined_features = combined_features.fillna(0)
+
+    # Ensure cell IDs are preserved in the correct order
+    cell_ids = sorted(combined_features.index)
+    combined_features = combined_features.loc[cell_ids]
+
     adata = ad.AnnData(X=combined_features)
-    adata.obs_names = [f"cell_{i}" for i in range(1, max_label + 1)]
+    adata.obs_names = [f"cell_{i}" for i in cell_ids]
     adata.var_names = combined_features.columns
 
     adata.uns["spatialdata_attrs"] = {
         "region": labels_key,
-        "region_key": "region", 
-        "instance_key": "label_id", 
+        "region_key": "region",
+        "instance_key": "label_id",
     }
     adata.obs["region"] = pd.Categorical([labels_key] * len(adata))
-    adata.obs["label_id"] = range(1, max_label + 1)
-    # adata.obs[["region", "spot_id"]]
+    adata.obs["label_id"] = cell_ids
 
     # Add the AnnData object to the SpatialData object
     sdata.tables[adata_key_added] = TableModel.parse(adata)
-
-    # Combine features from all channels
-    # return pd.concat(all_features, axis=1)
-
-
-def _calculate_image_features_helper(
-    cell_ids: Sequence[int],
-    labels: np.ndarray,
-    image: np.ndarray,
-    measurements: dict[str, Any],
-    channel_name: str | None = None,
-    queue: Any | None = None,
-) -> pd.DataFrame:
-    """Helper function to calculate features for a subset of cells."""
-    features_list = []
-    for cell_id in cell_ids:
-        # Get cell mask
-        cell_mask = (labels == cell_id).astype(np.uint8)
-
-        # Find bounding box of the cell
-        y_indices, x_indices = np.where(cell_mask)
-        if len(y_indices) == 0:  # Skip empty cells
-            continue
-
-        y_min, y_max = y_indices.min(), y_indices.max()
-        x_min, x_max = x_indices.min(), x_indices.max()
-
-        # Add padding to ensure we capture the full cell
-        pad = 5
-        y_min = max(0, y_min - pad)
-        y_max = min(labels.shape[0], y_max + pad)
-        x_min = max(0, x_min - pad)
-        x_max = min(labels.shape[1], x_max + pad)
-
-        # Crop both mask and image to the bounding box
-        cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max]
-        image_cropped = image[y_min:y_max, x_min:x_max]
-
-        cell_features = {}
-        # Calculate all available features
-        for name, func in measurements.items():
-            try:
-                with warnings.catch_warnings():
-                    warnings.simplefilter("ignore")
-                    feature_dict = func(cell_mask_cropped, image_cropped)
-                    # Convert numpy arrays to scalars
-                    feature_dict = {
-                        k: (
-                            float(v[0])
-                            if isinstance(v, np.ndarray) and v.size == 1
-                            else v
-                        )
-                        for k, v in feature_dict.items()
-                    }
-                    # Append channel name to feature names
-                    feature_dict = {
-                        f"{k}_ch{channel_name}": v for k, v in feature_dict.items()
-                    }
-                    cell_features.update(feature_dict)
-            except Exception as e:
-                logg.warning(
-                    f"Failed to calculate {name} features for cell {cell_id}: "
-                    f"{str(e)}"
-                )
-
-        features_list.append(cell_features)
-
-        if queue is not None:
-            queue.put(Signal.UPDATE)
-
-    if queue is not None:
-        queue.put(Signal.FINISH)
-
-    return pd.DataFrame(features_list, index=cell_ids)

From f734634ea86167fdeb093190ca18c28f38e1cf35 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 28 Mar 2025 16:47:29 +0000
Subject: [PATCH 03/15] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/squidpy/exp/_feature.py | 83 ++++++++++---------------------------
 1 file changed, 23 insertions(+), 60 deletions(-)

diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py
index 5f7d7b0b3..cf7adbea3 100644
--- a/src/squidpy/exp/_feature.py
+++ b/src/squidpy/exp/_feature.py
@@ -2,21 +2,22 @@
 
 from __future__ import annotations
 
-from collections.abc import Sequence
-from typing import Any, Callable
 import warnings
+from collections.abc import Callable, Sequence
+from typing import Any
+
 import anndata as ad
 import numpy as np
 import pandas as pd
+import xarray as xr
 from cp_measure.bulk import get_core_measurements, get_correlation_measurements
-from spatialdata._logging import logger as logg
 from spatialdata import SpatialData
-import xarray as xr
+from spatialdata._logging import logger as logg
+from spatialdata.models import TableModel
 
 from squidpy._constants._constants import ImageFeature
 from squidpy._docs import d, inject_docs
 from squidpy._utils import Signal, _get_n_cores, parallelize
-from spatialdata.models import TableModel
 
 __all__ = ["calculate_image_features"]
 
@@ -106,8 +107,7 @@ def _calculate_features_helper(
 
         # Quick shape check
         if cell_mask_cropped.shape != image1_cropped.shape or (
-            image2_cropped is not None
-            and cell_mask_cropped.shape != image2_cropped.shape
+            image2_cropped is not None and cell_mask_cropped.shape != image2_cropped.shape
         ):
             if verbose:
                 logg.warning(
@@ -129,38 +129,23 @@ def _calculate_features_helper(
                     if name in uint8_features:
                         mask = cell_mask_cropped.astype(np.uint8)
                         img1 = image1_cropped.astype(np.uint8)
-                        img2 = (
-                            None
-                            if image2_cropped is None
-                            else image2_cropped.astype(np.uint8)
-                        )
+                        img2 = None if image2_cropped is None else image2_cropped.astype(np.uint8)
                     elif name == "texture":
                         mask = cell_mask_cropped.astype(np.uint8)
-                        img1 = (
-                            image1_cropped.astype(np.float32) - img1_min
-                        ) / img1_range
+                        img1 = (image1_cropped.astype(np.float32) - img1_min) / img1_range
                         img2 = (
                             None
                             if image2_cropped is None
-                            else (image2_cropped.astype(np.float32) - img2_min)
-                            / img2_range
+                            else (image2_cropped.astype(np.float32) - img2_min) / img2_range
                         )
                     elif name in float_features:
                         mask = cell_mask_cropped.astype(np.float32)
                         img1 = image1_cropped.astype(np.float32)
-                        img2 = (
-                            None
-                            if image2_cropped is None
-                            else image2_cropped.astype(np.float32)
-                        )
+                        img2 = None if image2_cropped is None else image2_cropped.astype(np.float32)
                     else:
                         mask = cell_mask_cropped.astype(np.float32)
                         img1 = image1_cropped.astype(np.float32)
-                        img2 = (
-                            None
-                            if image2_cropped is None
-                            else image2_cropped.astype(np.float32)
-                        )
+                        img2 = None if image2_cropped is None else image2_cropped.astype(np.float32)
 
                     feature_dict = _measurement_wrapper(func, mask, img1, img2)
 
@@ -172,21 +157,14 @@ def _calculate_features_helper(
 
                     # Append channel names efficiently
                     if image2 is None:
-                        feature_dict = {
-                            f"{k}_ch{channel1_name}": v for k, v in feature_dict.items()
-                        }
+                        feature_dict = {f"{k}_ch{channel1_name}": v for k, v in feature_dict.items()}
                     else:
-                        feature_dict = {
-                            f"{k}_ch{channel1_name}_ch{channel2_name}": v
-                            for k, v in feature_dict.items()
-                        }
+                        feature_dict = {f"{k}_ch{channel1_name}_ch{channel2_name}": v for k, v in feature_dict.items()}
 
                     cell_features.update(feature_dict)
             except Exception as e:
                 if verbose:
-                    logg.warning(
-                        f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}"
-                    )
+                    logg.warning(f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}")
 
         features_dict[cell_id] = cell_features
 
@@ -247,10 +225,8 @@ def calculate_image_features(
     """
 
     if (
-        (isinstance(sdata.images[image_key], xr.DataTree)
-        or isinstance(sdata.labels[labels_key], xr.DataTree))
-        and scale is None
-    ):
+        isinstance(sdata.images[image_key], xr.DataTree) or isinstance(sdata.labels[labels_key], xr.DataTree)
+    ) and scale is None:
         raise ValueError("When using multi-scale data, please specify the scale.")
 
     if scale is not None and not isinstance(scale, str):
@@ -273,10 +249,7 @@ def calculate_image_features(
 
     # Get channel names if available
     channel_names = None
-    if (
-        hasattr(sdata.images[image_key], "coords")
-        and "c" in sdata.images[image_key].coords
-    ):
+    if hasattr(sdata.images[image_key], "coords") and "c" in sdata.images[image_key].coords:
         channel_names = sdata.images[image_key].coords["c"].values
 
     # Handle image dimensions
@@ -287,10 +260,7 @@ def calculate_image_features(
 
     # Check if image and labels have matching dimensions
     if image.shape[1:] != labels.shape:
-        raise ValueError(
-            f"Image and labels have mismatched dimensions: "
-            f"image {image.shape[1:]}, labels {labels.shape}"
-        )
+        raise ValueError(f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}")
 
     # Get core measurements from cp_measure
     measurements_core = get_core_measurements()
@@ -329,17 +299,10 @@ def calculate_image_features(
     # Then process correlation measurements between channels
     for ch1_idx in range(n_channels):
         for ch2_idx in range(ch1_idx + 1, n_channels):
-            ch1_name = (
-                channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}"
-            )
-            ch2_name = (
-                channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}"
-            )
-
-            logg.info(
-                f"Calculating correlation features between channels "
-                f"'{ch1_name}' and '{ch2_name}'."
-            )
+            ch1_name = channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}"
+            ch2_name = channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}"
+
+            logg.info(f"Calculating correlation features between channels '{ch1_name}' and '{ch2_name}'.")
 
             ch1_image = image[ch1_idx]
             ch2_image = image[ch2_idx]

From 9fb24b407f8680cb743535c7467f0410ccde55ec Mon Sep 17 00:00:00 2001
From: Tim Treis <tim.treis@stud.uni-heidelberg.de>
Date: Fri, 28 Mar 2025 22:38:35 +0100
Subject: [PATCH 04/15] added skimage features

---
 src/squidpy/exp/_feature.py | 457 ++++++++++++++++++++++++++++++------
 1 file changed, 384 insertions(+), 73 deletions(-)

diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py
index 5f7d7b0b3..5ef09b20a 100644
--- a/src/squidpy/exp/_feature.py
+++ b/src/squidpy/exp/_feature.py
@@ -2,25 +2,199 @@
 
 from __future__ import annotations
 
-from collections.abc import Sequence
-from typing import Any, Callable
 import warnings
+from collections.abc import Callable, Sequence
+from typing import Any
+
 import anndata as ad
+import itertools
 import numpy as np
 import pandas as pd
+import xarray as xr
 from cp_measure.bulk import get_core_measurements, get_correlation_measurements
-from spatialdata._logging import logger as logg
+from scipy import ndimage
+from skimage import measure
 from spatialdata import SpatialData
-import xarray as xr
+from spatialdata._logging import logger as logg
+from spatialdata.models import TableModel
 
+from skimage.measure import label
 from squidpy._constants._constants import ImageFeature
 from squidpy._docs import d, inject_docs
 from squidpy._utils import Signal, _get_n_cores, parallelize
-from spatialdata.models import TableModel
 
 __all__ = ["calculate_image_features"]
 
 
+def _get_regionprops_features(
+    cell_ids: Sequence[int],
+    labels: np.ndarray,
+    intensity_image: np.ndarray | None = None,
+    queue: Any | None = None,
+) -> dict[str, float]:
+    """Calculate regionprops features for a cell.
+
+    Parameters
+    ----------
+    cell_id
+        The ID of the cell to process
+    labels
+        The labels array containing cell masks
+    intensity_image
+        Optional intensity image for intensity-based features
+    queue
+        Optional queue for progress tracking. If provided, will send update signals.
+
+    Returns
+    -------
+    Dictionary of regionprops features
+    """
+    # Define channel-independent properties (only need mask)
+    mask_props = {
+        "area",
+        "area_filled",
+        "area_convex",
+        "num_pixels",
+        "axis_major_length",
+        "axis_minor_length",
+        "eccentricity",
+        "equivalent_diameter",
+        "extent",
+        "feret_diameter_max",
+        "solidity",
+        "euler_number",
+        "centroid",
+        "centroid_local",
+        "perimeter",
+        "perimeter_crofton",
+        "inertia_tensor",
+        "inertia_tensor_eigvals",
+    }
+
+    # Define channel-dependent properties (need intensity image)
+    intensity_props = {
+        "intensity_max",
+        "intensity_mean",
+        "intensity_min",
+        "intensity_std",
+    }
+
+    features = {}
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+
+        # labels only (channel independent)
+        if intensity_image is None:
+            for cell_id in cell_ids:
+                cell_mask_cropped, _, _ = _get_cell_crops(
+                    cell_id=cell_id,
+                    labels=labels,
+                )
+
+                if cell_mask_cropped is None:
+                    continue
+
+                region_prop = measure.regionprops(label_image=label(cell_mask_cropped))
+
+                if not region_prop:
+                    continue
+
+                cell_features = {}
+
+                # Calculate regionprops features while ignoring warnings
+                for prop in mask_props:
+                    try:
+                        value = getattr(region_prop, prop)
+
+                        # Handle array-like properties
+                        if isinstance(value, (np.ndarray, list, tuple)):
+                            value = np.array(value)
+                            if value.ndim == 1:
+                                for i, v in enumerate(value):
+                                    cell_features[f"{prop}_{i}"] = float(v)
+                            elif value.ndim == 2:
+                                for i, j in itertools.product(
+                                    range(value.shape[0]), range(value.shape[1])
+                                ):
+                                    cell_features[f"{prop}_{i}x{j}"] = float(
+                                        value[i, j]
+                                    )
+                            else:
+                                cell_features[prop] = value
+                        else:
+                            cell_features[prop] = float(value)
+                    except Exception:
+                        continue
+
+                if queue is not None:
+                    queue.put(Signal.UPDATE)
+
+                features[cell_id] = cell_features
+
+        # Calculate intensity-dependent properties if intensity image is provided
+        else:
+            for cell_id in cell_ids:
+                cell_mask_cropped, intensity_image_cropped, _ = _get_cell_crops(
+                    cell_id=cell_id,
+                    labels=labels,
+                    image1=intensity_image,
+                )
+
+                if cell_mask_cropped is None:
+                    continue
+
+                intensity_props_obj = measure.regionprops(
+                    label_image=label(cell_mask_cropped),
+                    intensity_image=intensity_image_cropped,
+                )
+
+                if not intensity_props_obj:
+                    continue
+
+                cell_features = {}
+
+                for prop in intensity_props:
+                    try:
+                        value = getattr(intensity_props_obj, prop)
+
+                        # Skip callable properties
+                        if callable(value):
+                            continue
+
+                        # Handle array properties
+                        if isinstance(value, (np.ndarray, list, tuple)):
+                            value = np.array(value)
+
+                            if value.ndim == 1:
+                                for i, v in enumerate(value):
+                                    cell_features[f"{prop}_{i}"] = float(v)
+                            elif value.ndim == 2:
+                                for i, j in itertools.product(
+                                    range(value.shape[0]), range(value.shape[1])
+                                ):
+                                    cell_features[f"{prop}_{i}x{j}"] = float(
+                                        value[i, j]
+                                    )
+                            else:
+                                cell_features[prop] = value
+                        else:
+                            cell_features[prop] = float(value)
+
+                    except Exception:
+                        continue
+
+                if queue is not None:
+                    queue.put(Signal.UPDATE)
+
+                features[cell_id] = cell_features
+
+    if queue is not None:
+        queue.put(Signal.FINISH)
+
+    return pd.DataFrame.from_dict(features, orient="index")
+
+
 def _measurement_wrapper(
     func: Callable,
     mask: np.ndarray,
@@ -48,6 +222,79 @@ def _measurement_wrapper(
     return func(mask, image1) if image2 is None else func(image1, image2, mask)
 
 
+def _get_cell_crops(
+    cell_id: int,
+    labels: np.ndarray,
+    image1: np.ndarray | None = None,
+    image2: np.ndarray | None = None,
+    pad: int = 1,
+    verbose: bool = False,
+) -> tuple[np.ndarray, np.ndarray, np.ndarray | None] | None:
+    """Generator function to get cropped arrays for a cell.
+
+    Parameters
+    ----------
+    cell_id
+        The ID of the cell to process
+    labels
+        The labels array containing cell masks
+    image1
+        First image to crop
+    image2
+        Optional second image to crop
+    pad
+        Amount of padding to add around the cell
+    verbose
+        Whether to print warning messages
+
+    Returns
+    -------
+    Tuple of (cell_mask_cropped, image1_cropped, image2_cropped) or None if cell is empty
+    """
+    # Get cell mask and find bounding box in one step
+    cell_mask = labels == cell_id
+    y_indices, x_indices = np.where(cell_mask)
+    if len(y_indices) == 0:  # Skip empty cells
+        return None
+
+    # Get bounding box
+    y_min, y_max = y_indices.min(), y_indices.max()
+    x_min, x_max = x_indices.min(), x_indices.max()
+
+    # Get image dimensions
+    height, width = labels.shape
+
+    # Calculate desired padding
+    y_pad_min = min(pad, y_min)  # How much we can pad to the top
+    y_pad_max = min(pad, height - y_max - 1)  # How much we can pad to the bottom
+    x_pad_min = min(pad, x_min)  # How much we can pad to the left
+    x_pad_max = min(pad, width - x_max - 1)  # How much we can pad to the right
+
+    # Apply symmetric padding where possible
+    y_min -= y_pad_min
+    y_max += y_pad_max
+    x_min -= x_pad_min
+    x_max += x_pad_max
+
+    # Warn if cell is at border and padding is asymmetric
+    if verbose and (
+        y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad
+    ):
+        logg.warning(
+            f"Cell {cell_id} is at image border. Padding is asymmetric: "
+            f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, "
+            f"x: {x_pad_min}/{pad} left, {x_pad_max}/{pad} right"
+        )
+
+    # Crop all arrays at once
+    cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max]
+
+    image1_cropped = None if image1 is None else image1[y_min:y_max, x_min:x_max]
+    image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max]
+
+    return cell_mask_cropped, image1_cropped, image2_cropped
+
+
 def _calculate_features_helper(
     cell_ids: Sequence[int],
     labels: np.ndarray,
@@ -84,42 +331,40 @@ def _calculate_features_helper(
             img2_range = img2_max - img2_min + 1e-10
 
     for cell_id in cell_ids:
-        # Get cell mask and find bounding box in one step
-        cell_mask = labels == cell_id
-        y_indices, x_indices = np.where(cell_mask)
-        if len(y_indices) == 0:  # Skip empty cells
+        # Get cropped arrays for this cell
+        result = _get_cell_crops(cell_id, labels, image1, image2, verbose=verbose)
+        if result is None:
             continue
 
-        # Get bounding box with padding
-        y_min, y_max = y_indices.min(), y_indices.max()
-        x_min, x_max = x_indices.min(), x_indices.max()
-        pad = 5
-        y_min = max(0, y_min - pad)
-        y_max = min(labels.shape[0], y_max + pad)
-        x_min = max(0, x_min - pad)
-        x_max = min(labels.shape[1], x_max + pad)
-
-        # Crop all arrays at once
-        cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max]
-        image1_cropped = image1[y_min:y_max, x_min:x_max]
-        image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max]
-
-        # Quick shape check
-        if cell_mask_cropped.shape != image1_cropped.shape or (
-            image2_cropped is not None
-            and cell_mask_cropped.shape != image2_cropped.shape
-        ):
+        cell_mask_cropped, image1_cropped, image2_cropped = result
+        cell_features = {}
+
+        # Calculate regionprops features first
+        try:
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                regionprops_features = _get_regionprops_features(
+                    cell_mask_cropped,
+                    image1_cropped,
+                    image1_cropped if image2 is None else None,
+                )
+            if image2 is None:
+                regionprops_features = {
+                    f"{k}_ch{channel1_name}": v for k, v in regionprops_features.items()
+                }
+            else:
+                regionprops_features = {
+                    f"{k}_ch{channel1_name}_ch{channel2_name}": v
+                    for k, v in regionprops_features.items()
+                }
+            cell_features.update(regionprops_features)
+        except Exception as e:
             if verbose:
                 logg.warning(
-                    f"Shape mismatch for cell {cell_id}: "
-                    f"mask {cell_mask_cropped.shape}, "
-                    f"image1 {image1_cropped.shape}, "
-                    f"image2 {image2_cropped.shape if image2_cropped is not None else 'None'}"
+                    f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}"
                 )
-            continue
 
-        cell_features = {}
-        # Calculate all available features
+        # Calculate all available cp-measure features
         for name, func in measurements.items():
             try:
                 with warnings.catch_warnings():
@@ -206,6 +451,7 @@ def calculate_image_features(
     labels_key: str,
     image_key: str,
     scale: str | None = None,
+    measurements: list[str] | str | None = None,
     adata_key_added: str = "morphology",
     invalid_as_zero: bool = True,
     n_jobs: int | None = None,
@@ -247,10 +493,9 @@ def calculate_image_features(
     """
 
     if (
-        (isinstance(sdata.images[image_key], xr.DataTree)
-        or isinstance(sdata.labels[labels_key], xr.DataTree))
-        and scale is None
-    ):
+        isinstance(sdata.images[image_key], xr.DataTree)
+        or isinstance(sdata.labels[labels_key], xr.DataTree)
+    ) and scale is None:
         raise ValueError("When using multi-scale data, please specify the scale.")
 
     if scale is not None and not isinstance(scale, str):
@@ -263,6 +508,29 @@ def calculate_image_features(
         image = np.asarray(sdata.images[image_key].compute())
         labels = np.asarray(sdata.labels[labels_key].compute())
 
+    available_measurements = [
+        "skimage:label",
+        "skimage:label+image",
+        "cpmeasure:core",
+        "cpmeasure:correlation",
+    ]
+
+    if measurements is None:
+        measurements = available_measurements
+
+    if isinstance(measurements, str):
+        measurements = [measurements]
+
+    if isinstance(measurements, list):
+        invalid_measurements = [
+            m for m in measurements if m not in available_measurements
+        ]
+        if invalid_measurements:
+            raise ValueError(
+                f"Invalid measurement(s): {invalid_measurements}, "
+                f"available measurements: {available_measurements}"
+            )
+
     # Check if labels are empty
     if labels.size == 0:
         raise ValueError("Labels array is empty")
@@ -288,13 +556,11 @@ def calculate_image_features(
     # Check if image and labels have matching dimensions
     if image.shape[1:] != labels.shape:
         raise ValueError(
-            f"Image and labels have mismatched dimensions: "
-            f"image {image.shape[1:]}, labels {labels.shape}"
+            f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}"
         )
 
-    # Get core measurements from cp_measure
-    measurements_core = get_core_measurements()
-    measurements_corr = get_correlation_measurements()
+    if "cpmeasure:correlation" in measurements:
+        measurements_corr = get_correlation_measurements()
 
     # Get unique cell IDs from labels, excluding background (0)
     cell_ids = np.unique(labels)
@@ -307,58 +573,103 @@ def calculate_image_features(
 
     logg.info(f"Using '{n_jobs}' core(s).")
 
-    # First process core measurements for each channel
-    for ch_idx in range(n_channels):
-        logg.info(f"Calculating core features for channel '{ch_idx}'.")
-
-        ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}"
-        ch_image = image[ch_idx]
-
+    if "skimage:label" in measurements:
+        logg.info("Calculating 'skimage' label features.")
         res = parallelize(
-            _calculate_features_helper,
+            _get_regionprops_features,
             collection=cell_ids,
             extractor=pd.concat,
             n_jobs=n_jobs,
             backend=backend,
             show_progress_bar=show_progress_bar,
             verbose=verbose,
-        )(labels, ch_image, None, measurements_core, ch_name)
-
+        )(labels=labels, intensity_image=None)
         all_features.append(res)
 
-    # Then process correlation measurements between channels
-    for ch1_idx in range(n_channels):
-        for ch2_idx in range(ch1_idx + 1, n_channels):
-            ch1_name = (
-                channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}"
-            )
-            ch2_name = (
-                channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}"
-            )
+    # skimage features that need a mask and an image
+    if "skimage:label+image" in measurements:
+        for ch_idx in range(n_channels):
 
-            logg.info(
-                f"Calculating correlation features between channels "
-                f"'{ch1_name}' and '{ch2_name}'."
+            ch_name = (
+                channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}"
             )
+            ch_image = image[ch_idx]
 
-            ch1_image = image[ch1_idx]
-            ch2_image = image[ch2_idx]
-
-            # Parallelize feature calculation
+            logg.info(f"Calculating 'skimage' image features for channel '{ch_idx}'.")
             res = parallelize(
-                _calculate_features_helper,
+                _get_regionprops_features,
                 collection=cell_ids,
                 extractor=pd.concat,
                 n_jobs=n_jobs,
                 backend=backend,
                 show_progress_bar=show_progress_bar,
                 verbose=verbose,
-            )(labels, ch1_image, ch2_image, measurements_corr, ch1_name, ch2_name)
-
+            )(labels=labels, intensity_image=ch_image)
             all_features.append(res)
 
+    # cpmeasure features that need a mask and an image
+    if "cpmeasure:core" in measurements:
+        measurements_core = get_core_measurements()
+
+        for ch_idx in range(n_channels):
+
+            ch_name = (
+                channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}"
+            )
+            ch_image = image[ch_idx]
+            if "cpmeasure:core" in measurements:
+                logg.info(
+                    f"Calculating 'cpmeasure' core features for channel '{ch_idx}'."
+                )
+
+                res = parallelize(
+                    _calculate_features_helper,
+                    collection=cell_ids,
+                    extractor=pd.concat,
+                    n_jobs=n_jobs,
+                    backend=backend,
+                    show_progress_bar=show_progress_bar,
+                    verbose=verbose,
+                )(labels, ch_image, None, measurements_core, ch_name)
+                all_features.append(res)
+
+    # cpmeasure features that correlate two channels
+    if "cpmeasure:correlation" in measurements:
+        for ch1_idx in range(n_channels):
+            for ch2_idx in range(ch1_idx + 1, n_channels):
+                ch1_name = (
+                    channel_names[ch1_idx]
+                    if channel_names is not None
+                    else f"ch{ch1_idx}"
+                )
+                ch2_name = (
+                    channel_names[ch2_idx]
+                    if channel_names is not None
+                    else f"ch{ch2_idx}"
+                )
+
+                logg.info(
+                    f"Calculating correlation features between channels '{ch1_name}' and '{ch2_name}'."
+                )
+
+                ch1_image = image[ch1_idx]
+                ch2_image = image[ch2_idx]
+
+                # Parallelize feature calculation
+                res = parallelize(
+                    _calculate_features_helper,
+                    collection=cell_ids,
+                    extractor=pd.concat,
+                    n_jobs=n_jobs,
+                    backend=backend,
+                    show_progress_bar=show_progress_bar,
+                    verbose=verbose,
+                )(labels, ch1_image, ch2_image, measurements_corr, ch1_name, ch2_name)
+                all_features.append(res)
+
     # Create AnnData object from results
     combined_features = pd.concat(all_features, axis=1)
+
     if invalid_as_zero:
         combined_features = combined_features.replace([np.inf, -np.inf], 0)
         combined_features = combined_features.fillna(0)

From 78ed95cf5cfe19187bec595338a374f47109b928 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 28 Mar 2025 22:37:46 +0000
Subject: [PATCH 05/15] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/squidpy/exp/_feature.py | 80 ++++++++++---------------------------
 1 file changed, 20 insertions(+), 60 deletions(-)

diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py
index eb2808e96..18ccf7cfd 100644
--- a/src/squidpy/exp/_feature.py
+++ b/src/squidpy/exp/_feature.py
@@ -2,24 +2,23 @@
 
 from __future__ import annotations
 
+import itertools
 import warnings
 from collections.abc import Callable, Sequence
 from typing import Any
 
 import anndata as ad
-import itertools
 import numpy as np
 import pandas as pd
 import xarray as xr
-import xarray as xr
 from cp_measure.bulk import get_core_measurements, get_correlation_measurements
 from scipy import ndimage
 from skimage import measure
+from skimage.measure import label
 from spatialdata import SpatialData
 from spatialdata._logging import logger as logg
 from spatialdata.models import TableModel
 
-from skimage.measure import label
 from squidpy._constants._constants import ImageFeature
 from squidpy._docs import d, inject_docs
 from squidpy._utils import Signal, _get_n_cores, parallelize
@@ -115,12 +114,8 @@ def _get_regionprops_features(
                                 for i, v in enumerate(value):
                                     cell_features[f"{prop}_{i}"] = float(v)
                             elif value.ndim == 2:
-                                for i, j in itertools.product(
-                                    range(value.shape[0]), range(value.shape[1])
-                                ):
-                                    cell_features[f"{prop}_{i}x{j}"] = float(
-                                        value[i, j]
-                                    )
+                                for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])):
+                                    cell_features[f"{prop}_{i}x{j}"] = float(value[i, j])
                             else:
                                 cell_features[prop] = value
                         else:
@@ -171,12 +166,8 @@ def _get_regionprops_features(
                                 for i, v in enumerate(value):
                                     cell_features[f"{prop}_{i}"] = float(v)
                             elif value.ndim == 2:
-                                for i, j in itertools.product(
-                                    range(value.shape[0]), range(value.shape[1])
-                                ):
-                                    cell_features[f"{prop}_{i}x{j}"] = float(
-                                        value[i, j]
-                                    )
+                                for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])):
+                                    cell_features[f"{prop}_{i}x{j}"] = float(value[i, j])
                             else:
                                 cell_features[prop] = value
                         else:
@@ -278,9 +269,7 @@ def _get_cell_crops(
     x_max += x_pad_max
 
     # Warn if cell is at border and padding is asymmetric
-    if verbose and (
-        y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad
-    ):
+    if verbose and (y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad):
         logg.warning(
             f"Cell {cell_id} is at image border. Padding is asymmetric: "
             f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, "
@@ -350,20 +339,15 @@ def _calculate_features_helper(
                     image1_cropped if image2 is None else None,
                 )
             if image2 is None:
-                regionprops_features = {
-                    f"{k}_ch{channel1_name}": v for k, v in regionprops_features.items()
-                }
+                regionprops_features = {f"{k}_ch{channel1_name}": v for k, v in regionprops_features.items()}
             else:
                 regionprops_features = {
-                    f"{k}_ch{channel1_name}_ch{channel2_name}": v
-                    for k, v in regionprops_features.items()
+                    f"{k}_ch{channel1_name}_ch{channel2_name}": v for k, v in regionprops_features.items()
                 }
             cell_features.update(regionprops_features)
         except Exception as e:
             if verbose:
-                logg.warning(
-                    f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}"
-                )
+                logg.warning(f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}")
 
         # Calculate all available cp-measure features
         for name, func in measurements.items():
@@ -472,8 +456,7 @@ def calculate_image_features(
     """
 
     if (
-        isinstance(sdata.images[image_key], xr.DataTree)
-        or isinstance(sdata.labels[labels_key], xr.DataTree)
+        isinstance(sdata.images[image_key], xr.DataTree) or isinstance(sdata.labels[labels_key], xr.DataTree)
     ) and scale is None:
         raise ValueError("When using multi-scale data, please specify the scale.")
 
@@ -501,13 +484,10 @@ def calculate_image_features(
         measurements = [measurements]
 
     if isinstance(measurements, list):
-        invalid_measurements = [
-            m for m in measurements if m not in available_measurements
-        ]
+        invalid_measurements = [m for m in measurements if m not in available_measurements]
         if invalid_measurements:
             raise ValueError(
-                f"Invalid measurement(s): {invalid_measurements}, "
-                f"available measurements: {available_measurements}"
+                f"Invalid measurement(s): {invalid_measurements}, available measurements: {available_measurements}"
             )
 
     # Check if labels are empty
@@ -531,9 +511,7 @@ def calculate_image_features(
 
     # Check if image and labels have matching dimensions
     if image.shape[1:] != labels.shape:
-        raise ValueError(
-            f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}"
-        )
+        raise ValueError(f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}")
 
     if "cpmeasure:correlation" in measurements:
         measurements_corr = get_correlation_measurements()
@@ -565,10 +543,7 @@ def calculate_image_features(
     # skimage features that need a mask and an image
     if "skimage:label+image" in measurements:
         for ch_idx in range(n_channels):
-
-            ch_name = (
-                channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}"
-            )
+            ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}"
             ch_image = image[ch_idx]
 
             logg.info(f"Calculating 'skimage' image features for channel '{ch_idx}'.")
@@ -588,15 +563,10 @@ def calculate_image_features(
         measurements_core = get_core_measurements()
 
         for ch_idx in range(n_channels):
-
-            ch_name = (
-                channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}"
-            )
+            ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}"
             ch_image = image[ch_idx]
             if "cpmeasure:core" in measurements:
-                logg.info(
-                    f"Calculating 'cpmeasure' core features for channel '{ch_idx}'."
-                )
+                logg.info(f"Calculating 'cpmeasure' core features for channel '{ch_idx}'.")
 
                 res = parallelize(
                     _calculate_features_helper,
@@ -613,20 +583,10 @@ def calculate_image_features(
     if "cpmeasure:correlation" in measurements:
         for ch1_idx in range(n_channels):
             for ch2_idx in range(ch1_idx + 1, n_channels):
-                ch1_name = (
-                    channel_names[ch1_idx]
-                    if channel_names is not None
-                    else f"ch{ch1_idx}"
-                )
-                ch2_name = (
-                    channel_names[ch2_idx]
-                    if channel_names is not None
-                    else f"ch{ch2_idx}"
-                )
+                ch1_name = channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}"
+                ch2_name = channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}"
 
-                logg.info(
-                    f"Calculating correlation features between channels '{ch1_name}' and '{ch2_name}'."
-                )
+                logg.info(f"Calculating correlation features between channels '{ch1_name}' and '{ch2_name}'.")
 
                 ch1_image = image[ch1_idx]
                 ch2_image = image[ch2_idx]

From c865d57152ed039c8ca50b645118d8f6291ec600 Mon Sep 17 00:00:00 2001
From: Tim Treis <tim.treis@stud.uni-heidelberg.de>
Date: Mon, 31 Mar 2025 18:40:19 +0200
Subject: [PATCH 06/15] cleaned up a bit

---
 src/squidpy/exp/_feature.py | 947 ++++++++++++++++++++----------------
 1 file changed, 515 insertions(+), 432 deletions(-)

diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py
index 18ccf7cfd..cb0e41101 100644
--- a/src/squidpy/exp/_feature.py
+++ b/src/squidpy/exp/_feature.py
@@ -15,7 +15,7 @@
 from scipy import ndimage
 from skimage import measure
 from skimage.measure import label
-from spatialdata import SpatialData
+from spatialdata import SpatialData, rasterize
 from spatialdata._logging import logger as logg
 from spatialdata.models import TableModel
 
@@ -25,402 +25,51 @@
 
 __all__ = ["calculate_image_features"]
 
-
-def _get_regionprops_features(
-    cell_ids: Sequence[int],
-    labels: np.ndarray,
-    intensity_image: np.ndarray | None = None,
-    queue: Any | None = None,
-) -> dict[str, float]:
-    """Calculate regionprops features for a cell.
-
-    Parameters
-    ----------
-    cell_id
-        The ID of the cell to process
-    labels
-        The labels array containing cell masks
-    intensity_image
-        Optional intensity image for intensity-based features
-    queue
-        Optional queue for progress tracking. If provided, will send update signals.
-
-    Returns
-    -------
-    Dictionary of regionprops features
-    """
-    # Define channel-independent properties (only need mask)
-    mask_props = {
-        "area",
-        "area_filled",
-        "area_convex",
-        "num_pixels",
-        "axis_major_length",
-        "axis_minor_length",
-        "eccentricity",
-        "equivalent_diameter",
-        "extent",
-        "feret_diameter_max",
-        "solidity",
-        "euler_number",
-        "centroid",
-        "centroid_local",
-        "perimeter",
-        "perimeter_crofton",
-        "inertia_tensor",
-        "inertia_tensor_eigvals",
-    }
-
-    # Define channel-dependent properties (need intensity image)
-    intensity_props = {
-        "intensity_max",
-        "intensity_mean",
-        "intensity_min",
-        "intensity_std",
-    }
-
-    features = {}
-
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore")
-
-        # labels only (channel independent)
-        if intensity_image is None:
-            for cell_id in cell_ids:
-                cell_mask_cropped, _, _ = _get_cell_crops(
-                    cell_id=cell_id,
-                    labels=labels,
-                )
-
-                if cell_mask_cropped is None:
-                    continue
-
-                region_prop = measure.regionprops(label_image=label(cell_mask_cropped))
-
-                if not region_prop:
-                    continue
-
-                cell_features = {}
-
-                # Calculate regionprops features while ignoring warnings
-                for prop in mask_props:
-                    try:
-                        value = getattr(region_prop, prop)
-
-                        # Handle array-like properties
-                        if isinstance(value, (np.ndarray, list, tuple)):
-                            value = np.array(value)
-                            if value.ndim == 1:
-                                for i, v in enumerate(value):
-                                    cell_features[f"{prop}_{i}"] = float(v)
-                            elif value.ndim == 2:
-                                for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])):
-                                    cell_features[f"{prop}_{i}x{j}"] = float(value[i, j])
-                            else:
-                                cell_features[prop] = value
-                        else:
-                            cell_features[prop] = float(value)
-                    except Exception:
-                        continue
-
-                if queue is not None:
-                    queue.put(Signal.UPDATE)
-
-                features[cell_id] = cell_features
-
-        # Calculate intensity-dependent properties if intensity image is provided
-        else:
-            for cell_id in cell_ids:
-                cell_mask_cropped, intensity_image_cropped, _ = _get_cell_crops(
-                    cell_id=cell_id,
-                    labels=labels,
-                    image1=intensity_image,
-                )
-
-                if cell_mask_cropped is None:
-                    continue
-
-                intensity_props_obj = measure.regionprops(
-                    label_image=label(cell_mask_cropped),
-                    intensity_image=intensity_image_cropped,
-                )
-
-                if not intensity_props_obj:
-                    continue
-
-                cell_features = {}
-
-                for prop in intensity_props:
-                    try:
-                        value = getattr(intensity_props_obj, prop)
-
-                        # Skip callable properties
-                        if callable(value):
-                            continue
-
-                        # Handle array properties
-                        if isinstance(value, (np.ndarray, list, tuple)):
-                            value = np.array(value)
-
-                            if value.ndim == 1:
-                                for i, v in enumerate(value):
-                                    cell_features[f"{prop}_{i}"] = float(v)
-                            elif value.ndim == 2:
-                                for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])):
-                                    cell_features[f"{prop}_{i}x{j}"] = float(value[i, j])
-                            else:
-                                cell_features[prop] = value
-                        else:
-                            cell_features[prop] = float(value)
-
-                    except Exception:
-                        continue
-
-                if queue is not None:
-                    queue.put(Signal.UPDATE)
-
-                features[cell_id] = cell_features
-
-    if queue is not None:
-        queue.put(Signal.FINISH)
-
-    return pd.DataFrame.from_dict(features, orient="index")
-
-
-def _measurement_wrapper(
-    func: Callable,
-    mask: np.ndarray,
-    image1: np.ndarray,
-    image2: np.ndarray | None = None,
-) -> dict[str, Any]:
-    """Wrapper function to handle both core and correlation measurements.
-
-    Parameters
-    ----------
-    func
-        The measurement function to call
-    mask
-        The cell mask
-    image1
-        First image (or only image for core measurements)
-    image2
-        Second image for correlation measurements. If None, this is a core
-        measurement.
-
-    Returns
-    -------
-    Dictionary of feature values
-    """
-    return func(mask, image1) if image2 is None else func(image1, image2, mask)
-
-
-def _get_cell_crops(
-    cell_id: int,
-    labels: np.ndarray,
-    image1: np.ndarray | None = None,
-    image2: np.ndarray | None = None,
-    pad: int = 1,
-    verbose: bool = False,
-) -> tuple[np.ndarray, np.ndarray, np.ndarray | None] | None:
-    """Generator function to get cropped arrays for a cell.
-
-    Parameters
-    ----------
-    cell_id
-        The ID of the cell to process
-    labels
-        The labels array containing cell masks
-    image1
-        First image to crop
-    image2
-        Optional second image to crop
-    pad
-        Amount of padding to add around the cell
-    verbose
-        Whether to print warning messages
-
-    Returns
-    -------
-    Tuple of (cell_mask_cropped, image1_cropped, image2_cropped) or None if cell is empty
-    """
-    # Get cell mask and find bounding box in one step
-    cell_mask = labels == cell_id
-    y_indices, x_indices = np.where(cell_mask)
-    if len(y_indices) == 0:  # Skip empty cells
-        return None
-
-    # Get bounding box
-    y_min, y_max = y_indices.min(), y_indices.max()
-    x_min, x_max = x_indices.min(), x_indices.max()
-
-    # Get image dimensions
-    height, width = labels.shape
-
-    # Calculate desired padding
-    y_pad_min = min(pad, y_min)  # How much we can pad to the top
-    y_pad_max = min(pad, height - y_max - 1)  # How much we can pad to the bottom
-    x_pad_min = min(pad, x_min)  # How much we can pad to the left
-    x_pad_max = min(pad, width - x_max - 1)  # How much we can pad to the right
-
-    # Apply symmetric padding where possible
-    y_min -= y_pad_min
-    y_max += y_pad_max
-    x_min -= x_pad_min
-    x_max += x_pad_max
-
-    # Warn if cell is at border and padding is asymmetric
-    if verbose and (y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad):
-        logg.warning(
-            f"Cell {cell_id} is at image border. Padding is asymmetric: "
-            f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, "
-            f"x: {x_pad_min}/{pad} left, {x_pad_max}/{pad} right"
-        )
-
-    # Crop all arrays at once
-    cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max]
-
-    image1_cropped = None if image1 is None else image1[y_min:y_max, x_min:x_max]
-    image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max]
-
-    return cell_mask_cropped, image1_cropped, image2_cropped
-
-
-def _calculate_features_helper(
-    cell_ids: Sequence[int],
-    labels: np.ndarray,
-    image1: np.ndarray,
-    image2: np.ndarray | None,
-    measurements: dict[str, Any],
-    channel1_name: str | None = None,
-    channel2_name: str | None = None,
-    queue: Any | None = None,
-    verbose: bool = False,
-) -> pd.DataFrame:
-    """Helper function to calculate features for a subset of cells."""
-    features_dict = {}
-
-    # Pre-allocate arrays for type conversion
-    uint8_features = [
-        "radial_distribution",
-        "radial_zernikes",
-        "intensity",
-        "sizeshape",
-        "zernike",
-        "ferret",
-    ]
-    float_features = ["manders_fold", "rwc"]
-
-    # Pre-compute image normalization if needed
-    if "texture" in measurements:
-        img1_min = image1.min()
-        img1_max = image1.max()
-        img1_range = img1_max - img1_min + 1e-10
-        if image2 is not None:
-            img2_min = image2.min()
-            img2_max = image2.max()
-            img2_range = img2_max - img2_min + 1e-10
-
-    for cell_id in cell_ids:
-        # Get cropped arrays for this cell
-        result = _get_cell_crops(cell_id, labels, image1, image2, verbose=verbose)
-        if result is None:
-            continue
-
-        cell_mask_cropped, image1_cropped, image2_cropped = result
-        cell_features = {}
-
-        # Calculate regionprops features first
-        try:
-            with warnings.catch_warnings():
-                warnings.simplefilter("ignore")
-                regionprops_features = _get_regionprops_features(
-                    cell_mask_cropped,
-                    image1_cropped,
-                    image1_cropped if image2 is None else None,
-                )
-            if image2 is None:
-                regionprops_features = {f"{k}_ch{channel1_name}": v for k, v in regionprops_features.items()}
-            else:
-                regionprops_features = {
-                    f"{k}_ch{channel1_name}_ch{channel2_name}": v for k, v in regionprops_features.items()
-                }
-            cell_features.update(regionprops_features)
-        except Exception as e:
-            if verbose:
-                logg.warning(f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}")
-
-        # Calculate all available cp-measure features
-        for name, func in measurements.items():
-            try:
-                with warnings.catch_warnings():
-                    warnings.simplefilter("ignore")
-
-                    # Pre-convert inputs based on feature type
-                    if name in uint8_features:
-                        mask = cell_mask_cropped.astype(np.uint8)
-                        img1 = image1_cropped.astype(np.uint8)
-                        img2 = None if image2_cropped is None else image2_cropped.astype(np.uint8)
-                    elif name == "texture":
-                        mask = cell_mask_cropped.astype(np.uint8)
-                        img1 = (image1_cropped.astype(np.float32) - img1_min) / img1_range
-                        img2 = (
-                            None
-                            if image2_cropped is None
-                            else (image2_cropped.astype(np.float32) - img2_min) / img2_range
-                        )
-                    elif name in float_features:
-                        mask = cell_mask_cropped.astype(np.float32)
-                        img1 = image1_cropped.astype(np.float32)
-                        img2 = None if image2_cropped is None else image2_cropped.astype(np.float32)
-                    else:
-                        mask = cell_mask_cropped.astype(np.float32)
-                        img1 = image1_cropped.astype(np.float32)
-                        img2 = None if image2_cropped is None else image2_cropped.astype(np.float32)
-
-                    feature_dict = _measurement_wrapper(func, mask, img1, img2)
-
-                    for k, v in feature_dict.items():
-                        if len(v) > 1:
-                            raise ValueError(f"Feature {k} has more than one value.")
-                        else:
-                            feature_dict[k] = float(v[0])
-
-                    # Append channel names efficiently
-                    if image2 is None:
-                        feature_dict = {f"{k}_ch{channel1_name}": v for k, v in feature_dict.items()}
-                    else:
-                        feature_dict = {f"{k}_ch{channel1_name}_ch{channel2_name}": v for k, v in feature_dict.items()}
-
-                    cell_features.update(feature_dict)
-            except Exception as e:
-                if verbose:
-                    logg.warning(f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}")
-
-        features_dict[cell_id] = cell_features
-
-        if queue is not None:
-            queue.put(Signal.UPDATE)
-
-    if queue is not None:
-        queue.put(Signal.FINISH)
-
-    return pd.DataFrame.from_dict(features_dict, orient="index")
+# Define constant property sets
+_MASK_PROPS = {
+    "area",
+    "area_filled",
+    "area_convex",
+    "num_pixels",
+    "axis_major_length",
+    "axis_minor_length",
+    "eccentricity",
+    "equivalent_diameter",
+    "extent",
+    "feret_diameter_max",
+    "solidity",
+    "euler_number",
+    "centroid",
+    "centroid_local",
+    "perimeter",
+    "perimeter_crofton",
+    "inertia_tensor",
+    "inertia_tensor_eigvals",
+}
+_INTENSITY_PROPS = {
+    "intensity_max",
+    "intensity_mean",
+    "intensity_min",
+    "intensity_std",
+}
 
 
 @d.dedent
 @inject_docs(f=ImageFeature)
 def calculate_image_features(
     sdata: SpatialData,
-    labels_key: str,
     image_key: str,
+    labels_key: str | None = None,
+    shapes_key: str | None = None,
     scale: str | None = None,
     measurements: list[str] | str | None = None,
     adata_key_added: str = "morphology",
     invalid_as_zero: bool = True,
     n_jobs: int | None = None,
     backend: str = "loky",
-    show_progress_bar: bool = True,
+    show_progress_bar: bool = False,  # slower, needs to be optimised
     verbose: bool = False,
+    inplace: bool = True,
 ) -> pd.DataFrame | None:
     """
     Calculate features from segmentation masks using CellProfiler measurements.
@@ -436,6 +85,9 @@ def calculate_image_features(
     labels_key
         Key in :attr:`spatialdata.SpatialData.labels` containing the
         segmentation masks.
+    shapes_key
+        Key in :attr:`spatialdata.SpatialData.shapes` containing the
+        shape features.
     image_key
         Key in :attr:`spatialdata.SpatialData.images` containing the
         intensity image.
@@ -455,20 +107,62 @@ def calculate_image_features(
     to be installed.
     """
 
+    if image_key not in sdata.images.keys():
+        raise ValueError(
+            f"Image key '{image_key}' not found, valid keys: {list(sdata.images.keys())}"
+        )
+
+    if labels_key is not None and shapes_key is not None:
+        raise ValueError("Use either `labels_key` or `shapes_key`, not both.")
+
+    if labels_key is not None and labels_key not in sdata.labels.keys():
+        raise ValueError(
+            f"Labels key '{labels_key}' not found, valid keys: {list(sdata.labels.keys())}"
+        )
+
+    if shapes_key is not None and shapes_key not in sdata.shapes.keys():
+        raise ValueError(
+            f"Shapes key '{shapes_key}' not found, valid keys: {list(sdata.shapes.keys())}"
+        )
+
     if (
-        isinstance(sdata.images[image_key], xr.DataTree) or isinstance(sdata.labels[labels_key], xr.DataTree)
+        isinstance(sdata.images[image_key], xr.DataTree)
+        or isinstance(sdata.labels[labels_key], xr.DataTree)
     ) and scale is None:
         raise ValueError("When using multi-scale data, please specify the scale.")
 
     if scale is not None and not isinstance(scale, str):
         raise ValueError("Scale must be a string.")
 
-    if scale is not None:
-        image = np.asarray(sdata.images[image_key][scale].image.compute())
-        labels = np.asarray(sdata.labels[labels_key][scale].image.compute())
+    image = _get_array_from_DataTree_or_DataArray(sdata.images[image_key], scale)
+    labels = (
+        _get_array_from_DataTree_or_DataArray(sdata.labels[labels_key], scale)
+        if labels_key is not None
+        else None
+    )
+
+    if labels is not None and image.shape[1:] != labels.shape:
+        raise ValueError(
+            f"Image dimensions {image.shape[1:]} do not match labels dimensions {labels.shape} at scale '{scale}'"
+        )
+
+    if shapes_key is not None:
+        scale_str = f" (using scale '{scale}')" if scale is not None else ""
+        logg.info(f"Converting shapes to labels{scale_str}.")
+        _, max_y, max_x = image.shape
+        labels = np.asarray(
+            rasterize(
+                sdata.shapes[shapes_key],
+                ["x", "y"],
+                min_coordinate=[0, 0],
+                max_coordinate=[max_x, max_y],
+                target_coordinate_system="global",
+                target_unit_to_pixels=1.0,
+                return_regions_as_labels=True,
+            )
+        )
     else:
-        image = np.asarray(sdata.images[image_key].compute())
-        labels = np.asarray(sdata.labels[labels_key].compute())
+        labels = _get_array_from_DataTree_or_DataArray(sdata.labels[labels_key], scale)
 
     available_measurements = [
         "skimage:label",
@@ -484,13 +178,14 @@ def calculate_image_features(
         measurements = [measurements]
 
     if isinstance(measurements, list):
-        invalid_measurements = [m for m in measurements if m not in available_measurements]
+        invalid_measurements = [
+            m for m in measurements if m not in available_measurements
+        ]
         if invalid_measurements:
             raise ValueError(
                 f"Invalid measurement(s): {invalid_measurements}, available measurements: {available_measurements}"
             )
 
-    # Check if labels are empty
     if labels.size == 0:
         raise ValueError("Labels array is empty")
 
@@ -498,29 +193,31 @@ def calculate_image_features(
     if max_label == 0:
         raise ValueError("No cells found in labels (max label is 0)")
 
-    # Get channel names if available
     channel_names = None
-    if hasattr(sdata.images[image_key], "coords") and "c" in sdata.images[image_key].coords:
+    if (
+        hasattr(sdata.images[image_key], "coords")
+        and "c" in sdata.images[image_key].coords
+    ):
         channel_names = sdata.images[image_key].coords["c"].values
 
-    # Handle image dimensions
     if image.ndim == 2:
-        image = image[None, :, :]  # Add channel dimension
+        image = image[None, :, :]
     elif image.ndim != 3:
         raise ValueError(f"Expected 2D or 3D image, got shape {image.shape}")
 
-    # Check if image and labels have matching dimensions
     if image.shape[1:] != labels.shape:
-        raise ValueError(f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}")
+        raise ValueError(
+            f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}"
+        )
 
     if "cpmeasure:correlation" in measurements:
         measurements_corr = get_correlation_measurements()
 
-    # Get unique cell IDs from labels, excluding background (0)
     cell_ids = np.unique(labels)
     cell_ids = cell_ids[cell_ids != 0]
+    # Sort cell_ids to ensure consistent order
+    cell_ids = np.sort(cell_ids)
 
-    # Process each channel
     all_features = []
     n_channels = image.shape[0]
     n_jobs = _get_n_cores(n_jobs)
@@ -540,12 +237,12 @@ def calculate_image_features(
         )(labels=labels, intensity_image=None)
         all_features.append(res)
 
-    # skimage features that need a mask and an image
     if "skimage:label+image" in measurements:
         for ch_idx in range(n_channels):
-            ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}"
+            ch_name = (
+                channel_names[ch_idx] if channel_names is not None else f"{ch_idx}"
+            )
             ch_image = image[ch_idx]
-
             logg.info(f"Calculating 'skimage' image features for channel '{ch_idx}'.")
             res = parallelize(
                 _get_regionprops_features,
@@ -556,42 +253,47 @@ def calculate_image_features(
                 show_progress_bar=show_progress_bar,
                 verbose=verbose,
             )(labels=labels, intensity_image=ch_image)
+            # Append channel names to each feature column
+            res = res.rename(columns=lambda col: f"{col}_{ch_name}")
             all_features.append(res)
 
-    # cpmeasure features that need a mask and an image
     if "cpmeasure:core" in measurements:
         measurements_core = get_core_measurements()
-
         for ch_idx in range(n_channels):
-            ch_name = channel_names[ch_idx] if channel_names is not None else f"ch{ch_idx}"
+            ch_name = (
+                channel_names[ch_idx] if channel_names is not None else f"{ch_idx}"
+            )
             ch_image = image[ch_idx]
-            if "cpmeasure:core" in measurements:
-                logg.info(f"Calculating 'cpmeasure' core features for channel '{ch_idx}'.")
-
-                res = parallelize(
-                    _calculate_features_helper,
-                    collection=cell_ids,
-                    extractor=pd.concat,
-                    n_jobs=n_jobs,
-                    backend=backend,
-                    show_progress_bar=show_progress_bar,
-                    verbose=verbose,
-                )(labels, ch_image, None, measurements_core, ch_name)
-                all_features.append(res)
+            logg.info(f"Calculating 'cpmeasure' core features for channel '{ch_idx}'.")
+            res = parallelize(
+                _calculate_features_helper,
+                collection=cell_ids,
+                extractor=pd.concat,
+                n_jobs=n_jobs,
+                backend=backend,
+                show_progress_bar=show_progress_bar,
+                verbose=verbose,
+            )(labels, ch_image, None, measurements_core, ch_name)
+            all_features.append(res)
 
-    # cpmeasure features that correlate two channels
     if "cpmeasure:correlation" in measurements:
         for ch1_idx in range(n_channels):
             for ch2_idx in range(ch1_idx + 1, n_channels):
-                ch1_name = channel_names[ch1_idx] if channel_names is not None else f"ch{ch1_idx}"
-                ch2_name = channel_names[ch2_idx] if channel_names is not None else f"ch{ch2_idx}"
-
-                logg.info(f"Calculating correlation features between channels '{ch1_name}' and '{ch2_name}'.")
-
+                ch1_name = (
+                    channel_names[ch1_idx]
+                    if channel_names is not None
+                    else f"{ch1_idx}"
+                )
+                ch2_name = (
+                    channel_names[ch2_idx]
+                    if channel_names is not None
+                    else f"{ch2_idx}"
+                )
+                logg.info(
+                    f"Calculating 'cpmeasure' correlation features between channels '{ch1_name}' and '{ch2_name}'."
+                )
                 ch1_image = image[ch1_idx]
                 ch2_image = image[ch2_idx]
-
-                # Parallelize feature calculation
                 res = parallelize(
                     _calculate_features_helper,
                     collection=cell_ids,
@@ -603,7 +305,6 @@ def calculate_image_features(
                 )(labels, ch1_image, ch2_image, measurements_corr, ch1_name, ch2_name)
                 all_features.append(res)
 
-    # Create AnnData object from results
     combined_features = pd.concat(all_features, axis=1)
 
     if invalid_as_zero:
@@ -611,7 +312,6 @@ def calculate_image_features(
         combined_features = combined_features.fillna(0)
 
     # Ensure cell IDs are preserved in the correct order
-    cell_ids = sorted(combined_features.index)
     combined_features = combined_features.loc[cell_ids]
 
     adata = ad.AnnData(X=combined_features)
@@ -619,12 +319,395 @@ def calculate_image_features(
     adata.var_names = combined_features.columns
 
     adata.uns["spatialdata_attrs"] = {
-        "region": labels_key,
+        "region": labels_key if labels_key is not None else shapes_key,
         "region_key": "region",
         "instance_key": "label_id",
     }
-    adata.obs["region"] = pd.Categorical([labels_key] * len(adata))
+    adata.obs["region"] = pd.Categorical(
+        [labels_key if labels_key is not None else shapes_key] * len(adata)
+    )
     adata.obs["label_id"] = cell_ids
 
-    # Add the AnnData object to the SpatialData object
-    sdata.tables[adata_key_added] = TableModel.parse(adata)
+    if inplace:
+        sdata.tables[adata_key_added] = TableModel.parse(adata)
+    else:
+        return combined_features
+
+
+def _extract_features_from_regionprops(
+    region_obj: Any, props: set[str], cell_id: int, skip_callable: bool = False
+) -> dict[str, float]:
+    """Extract features from a regionprops object given a list of properties."""
+    cell_features = {}
+    for prop in props:
+        try:
+            value = getattr(region_obj, prop)
+            if skip_callable and callable(value):
+                continue
+            if isinstance(value, np.ndarray | list | tuple):
+                value = np.array(value)
+                if value.ndim == 1:
+                    for i, v in enumerate(value):
+                        cell_features[f"{prop}_{i}"] = float(v)
+                elif value.ndim == 2:
+                    for i, j in itertools.product(
+                        range(value.shape[0]), range(value.shape[1])
+                    ):
+                        cell_features[f"{prop}_{i}x{j}"] = float(value[i, j])
+                else:
+                    cell_features[prop] = value
+            else:
+                cell_features[prop] = float(value)
+        except Exception as e:
+            logg.warning(f"Error calculating {prop} for cell {cell_id}: {str(e)}")
+            continue
+    return cell_features
+
+
+def _calculate_regionprops_from_crop(
+    cell_mask_cropped: np.ndarray,
+    intensity_image_cropped: np.ndarray | None,
+    cell_id: int,
+) -> dict[str, float]:
+    """
+    Calculate regionprops features from pre-cropped arrays.
+    Uses intensity-based properties if an intensity image is provided.
+    """
+    if intensity_image_cropped is None:
+        region_props = measure.regionprops(label_image=label(cell_mask_cropped))
+        if not region_props:
+            return {}
+        return _extract_features_from_regionprops(region_props[0], _MASK_PROPS, cell_id)
+    else:
+        region_props = measure.regionprops(
+            label_image=label(cell_mask_cropped),
+            intensity_image=intensity_image_cropped,
+        )
+        if not region_props:
+            return {}
+        return _extract_features_from_regionprops(
+            region_props[0], _INTENSITY_PROPS, cell_id, skip_callable=True
+        )
+
+
+def _append_channel_names(
+    features: dict, channel1: str, channel2: str | None = None
+) -> dict:
+    """Append channel name(s) to all keys in the feature dictionary."""
+    if channel2 is None:
+        return {f"{k}_{channel1}": v for k, v in features.items()}
+    else:
+        return {f"{k}_{channel1}_{channel2}": v for k, v in features.items()}
+
+
+def _prepare_images_for_measurement(
+    name: str,
+    cell_mask: np.ndarray,
+    img1: np.ndarray,
+    img2: np.ndarray | None,
+    conv_params: dict,
+) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
+    """
+    Convert inputs to the appropriate dtype based on the measurement type.
+    """
+    if name in conv_params.get("uint8_features", []):
+        mask = cell_mask.astype(np.uint8)
+        image1_prepared = img1.astype(np.uint8)
+        image2_prepared = None if img2 is None else img2.astype(np.uint8)
+    elif name == "texture":
+        mask = cell_mask.astype(np.uint8)
+        image1_prepared = (
+            img1.astype(np.float32) - conv_params["img1_min"]
+        ) / conv_params["img1_range"]
+        image2_prepared = (
+            None
+            if img2 is None
+            else (img2.astype(np.float32) - conv_params["img2_min"])
+            / conv_params["img2_range"]
+        )
+    elif name in conv_params.get("float_features", []):
+        mask = cell_mask.astype(np.float32)
+        image1_prepared = img1.astype(np.float32)
+        image2_prepared = None if img2 is None else img2.astype(np.float32)
+    else:
+        mask = cell_mask.astype(np.float32)
+        image1_prepared = img1.astype(np.float32)
+        image2_prepared = None if img2 is None else img2.astype(np.float32)
+    return mask, image1_prepared, image2_prepared
+
+
+def _get_cell_crops(
+    cell_id: int,
+    labels: np.ndarray,
+    image1: np.ndarray | None = None,
+    image2: np.ndarray | None = None,
+    pad: int = 1,
+    verbose: bool = False,
+) -> tuple[np.ndarray, np.ndarray, np.ndarray | None] | None:
+    """Generator function to get cropped arrays for a cell.
+
+    Parameters
+    ----------
+    cell_id
+        The ID of the cell to process
+    labels
+        The labels array containing cell masks
+    image1
+        First image to crop
+    image2
+        Optional second image to crop
+    pad
+        Amount of padding to add around the cell
+    verbose
+        Whether to print warning messages
+
+    Returns
+    -------
+    Tuple of (cell_mask_cropped, image1_cropped, image2_cropped) or None if cell is empty
+    """
+    cell_mask = labels == cell_id
+    y_indices, x_indices = np.where(cell_mask)
+    if len(y_indices) == 0:  # Skip empty cells
+        return None
+
+    y_min, y_max = y_indices.min(), y_indices.max()
+    x_min, x_max = x_indices.min(), x_indices.max()
+    height, width = labels.shape
+
+    y_pad_min = min(pad, y_min)
+    y_pad_max = min(pad, height - y_max - 1)
+    x_pad_min = min(pad, x_min)
+    x_pad_max = min(pad, width - x_max - 1)
+
+    y_min -= y_pad_min
+    y_max += y_pad_max
+    x_min -= x_pad_min
+    x_max += x_pad_max
+
+    if verbose and (
+        y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad
+    ):
+        logg.warning(
+            f"Cell {cell_id} is at image border. Padding is asymmetric: "
+            f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, "
+            f"x: {x_pad_min}/{pad} left, {x_pad_max}/{pad} right"
+        )
+
+    cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max]
+    image1_cropped = None if image1 is None else image1[y_min:y_max, x_min:x_max]
+    image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max]
+
+    return cell_mask_cropped, image1_cropped, image2_cropped
+
+
+def _get_regionprops_features(
+    cell_ids: Sequence[int],
+    labels: np.ndarray,
+    intensity_image: np.ndarray | None = None,
+    queue: Any | None = None,
+) -> pd.DataFrame:
+    """Calculate regionprops features for each cell from the full label image."""
+    # Initialize features dictionary with None values to preserve order
+    features = {cell_id: None for cell_id in cell_ids}
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        # Process cells in order to preserve order
+        for cell_id in cell_ids:
+            crop = _get_cell_crops(cell_id, labels, image1=intensity_image)
+            if crop is None:
+                continue
+            cell_mask_cropped, intensity_image_cropped, _ = crop
+            cell_features = _calculate_regionprops_from_crop(
+                cell_mask_cropped, intensity_image_cropped, cell_id
+            )
+            features[cell_id] = cell_features
+            if queue is not None:
+                queue.put(Signal.UPDATE)
+        if queue is not None:
+            queue.put(Signal.FINISH)
+
+    # Convert to DataFrame while preserving order
+    df = pd.DataFrame.from_dict(features, orient="index")
+    # Ensure the index matches the input cell_ids order
+    df = df.reindex(cell_ids)
+    return df
+
+
+def _measurement_wrapper(
+    func: Callable,
+    mask: np.ndarray,
+    image1: np.ndarray,
+    image2: np.ndarray | None = None,
+) -> dict[str, Any]:
+    """Wrapper function to handle both core and correlation measurements.
+
+    Parameters
+    ----------
+    func
+        The measurement function to call
+    mask
+        The cell mask
+    image1
+        First image (or only image for core measurements)
+    image2
+        Second image for correlation measurements. If None, this is a core
+        measurement.
+
+    Returns
+    -------
+    Dictionary of feature values
+    """
+    return func(mask, image1) if image2 is None else func(image1, image2, mask)
+
+
+def _calculate_features_helper(
+    cell_ids: Sequence[int],
+    labels: np.ndarray,
+    image1: np.ndarray,
+    image2: np.ndarray | None,
+    measurements: dict[str, Any],
+    channel1_name: str | None = None,
+    channel2_name: str | None = None,
+    queue: Any | None = None,
+    verbose: bool = False,
+) -> pd.DataFrame:
+    """Helper function to calculate features for a subset of cells."""
+    # Initialize features dictionary with None values to preserve order
+    features_dict = {cell_id: None for cell_id in cell_ids}
+
+    # Pre-allocate lists for type conversion
+    uint8_features = [
+        "radial_distribution",
+        "radial_zernikes",
+        "intensity",
+        "sizeshape",
+        "zernike",
+        "ferret",
+    ]
+    float_features = ["manders_fold", "rwc"]
+
+    # Pre-compute normalization if needed
+    conv_params: dict[str, Any] = {
+        "uint8_features": uint8_features,
+        "float_features": float_features,
+    }
+    if "texture" in measurements:
+        img1_min = image1.min()
+        img1_max = image1.max()
+        conv_params["img1_min"] = img1_min
+        conv_params["img1_range"] = img1_max - img1_min + 1e-10
+        if image2 is not None:
+            img2_min = image2.min()
+            img2_max = image2.max()
+            conv_params["img2_min"] = img2_min
+            conv_params["img2_range"] = img2_max - img2_min + 1e-10
+
+    # Process cells in order to preserve order
+    for cell_id in cell_ids:
+        crop = _get_cell_crops(cell_id, labels, image1, image2, verbose=verbose)
+        if crop is None:
+            continue
+        cell_mask_cropped, image1_cropped, image2_cropped = crop
+        cell_features = {}
+
+        # Calculate regionprops features using cached crop
+        try:
+            region_features = _calculate_regionprops_from_crop(
+                cell_mask_cropped,
+                image1_cropped if image2 is None else None,
+                cell_id,
+            )
+            if image2 is None:
+                region_features = _append_channel_names(region_features, channel1_name)
+            else:
+                region_features = _append_channel_names(
+                    region_features, channel1_name, channel2_name
+                )
+            cell_features.update(region_features)
+        except Exception as e:
+            if verbose:
+                logg.warning(
+                    f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}"
+                )
+
+        # Calculate cp-measure features for each measurement
+        for name, func in measurements.items():
+            try:
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore")
+                    mask_conv, img1_conv, img2_conv = _prepare_images_for_measurement(
+                        name,
+                        cell_mask_cropped,
+                        image1_cropped,
+                        image2_cropped,
+                        conv_params,
+                    )
+                    feature_dict = _measurement_wrapper(
+                        func, mask_conv, img1_conv, img2_conv
+                    )
+                    # Ensure each feature returns a single value
+                    for k, v in feature_dict.items():
+                        if len(v) > 1:
+                            raise ValueError(f"Feature {k} has more than one value.")
+                        else:
+                            feature_dict[k] = float(v[0])
+                    if image2 is None:
+                        feature_dict = _append_channel_names(
+                            feature_dict, channel1_name
+                        )
+                    else:
+                        feature_dict = _append_channel_names(
+                            feature_dict, channel1_name, channel2_name
+                        )
+                    cell_features.update(feature_dict)
+            except Exception as e:
+                if verbose:
+                    logg.warning(
+                        f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}"
+                    )
+
+        features_dict[cell_id] = cell_features
+
+        if queue is not None:
+            queue.put(Signal.UPDATE)
+
+    if queue is not None:
+        queue.put(Signal.FINISH)
+
+    # Convert to DataFrame while preserving order
+    df = pd.DataFrame.from_dict(features_dict, orient="index")
+    # Ensure the index matches the input cell_ids order
+    df = df.reindex(cell_ids)
+    return df
+
+
+def _get_array_from_DataTree_or_DataArray(
+    data: xr.DataTree | xr.DataArray, scale: str | None = None
+) -> np.ndarray:
+    """
+    Returns a NumPy array for the given data and scale.
+    If data is an xr.DataTree, it checks for the scale key and computes the image.
+    If data is an xr.DataArray, it computes the array (ignoring scale).
+
+    Parameters
+    ----------
+    data
+        The xarray data to convert to a NumPy array
+    scale
+        Optional scale key for DataTree data
+
+    Returns
+    -------
+    np.ndarray
+        The computed NumPy array
+    """
+    if not isinstance(data, xr.DataTree):
+        return np.asarray(data.compute())
+    if scale is None:
+        raise ValueError("Scale must be provided for DataTree data")
+    if scale not in data:
+        raise ValueError(
+            f"Scale '{scale}' not found. Available scales: {list(data.keys())}"
+        )
+    return np.asarray(data[scale].image.compute())

From 0bc950764e34f3c85f993acec490840c5a298a0c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 31 Mar 2025 16:40:41 +0000
Subject: [PATCH 07/15] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/squidpy/exp/_feature.py | 119 +++++++++---------------------------
 1 file changed, 28 insertions(+), 91 deletions(-)

diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py
index cb0e41101..c49ddc5c4 100644
--- a/src/squidpy/exp/_feature.py
+++ b/src/squidpy/exp/_feature.py
@@ -108,26 +108,19 @@ def calculate_image_features(
     """
 
     if image_key not in sdata.images.keys():
-        raise ValueError(
-            f"Image key '{image_key}' not found, valid keys: {list(sdata.images.keys())}"
-        )
+        raise ValueError(f"Image key '{image_key}' not found, valid keys: {list(sdata.images.keys())}")
 
     if labels_key is not None and shapes_key is not None:
         raise ValueError("Use either `labels_key` or `shapes_key`, not both.")
 
     if labels_key is not None and labels_key not in sdata.labels.keys():
-        raise ValueError(
-            f"Labels key '{labels_key}' not found, valid keys: {list(sdata.labels.keys())}"
-        )
+        raise ValueError(f"Labels key '{labels_key}' not found, valid keys: {list(sdata.labels.keys())}")
 
     if shapes_key is not None and shapes_key not in sdata.shapes.keys():
-        raise ValueError(
-            f"Shapes key '{shapes_key}' not found, valid keys: {list(sdata.shapes.keys())}"
-        )
+        raise ValueError(f"Shapes key '{shapes_key}' not found, valid keys: {list(sdata.shapes.keys())}")
 
     if (
-        isinstance(sdata.images[image_key], xr.DataTree)
-        or isinstance(sdata.labels[labels_key], xr.DataTree)
+        isinstance(sdata.images[image_key], xr.DataTree) or isinstance(sdata.labels[labels_key], xr.DataTree)
     ) and scale is None:
         raise ValueError("When using multi-scale data, please specify the scale.")
 
@@ -135,11 +128,7 @@ def calculate_image_features(
         raise ValueError("Scale must be a string.")
 
     image = _get_array_from_DataTree_or_DataArray(sdata.images[image_key], scale)
-    labels = (
-        _get_array_from_DataTree_or_DataArray(sdata.labels[labels_key], scale)
-        if labels_key is not None
-        else None
-    )
+    labels = _get_array_from_DataTree_or_DataArray(sdata.labels[labels_key], scale) if labels_key is not None else None
 
     if labels is not None and image.shape[1:] != labels.shape:
         raise ValueError(
@@ -178,9 +167,7 @@ def calculate_image_features(
         measurements = [measurements]
 
     if isinstance(measurements, list):
-        invalid_measurements = [
-            m for m in measurements if m not in available_measurements
-        ]
+        invalid_measurements = [m for m in measurements if m not in available_measurements]
         if invalid_measurements:
             raise ValueError(
                 f"Invalid measurement(s): {invalid_measurements}, available measurements: {available_measurements}"
@@ -194,10 +181,7 @@ def calculate_image_features(
         raise ValueError("No cells found in labels (max label is 0)")
 
     channel_names = None
-    if (
-        hasattr(sdata.images[image_key], "coords")
-        and "c" in sdata.images[image_key].coords
-    ):
+    if hasattr(sdata.images[image_key], "coords") and "c" in sdata.images[image_key].coords:
         channel_names = sdata.images[image_key].coords["c"].values
 
     if image.ndim == 2:
@@ -206,9 +190,7 @@ def calculate_image_features(
         raise ValueError(f"Expected 2D or 3D image, got shape {image.shape}")
 
     if image.shape[1:] != labels.shape:
-        raise ValueError(
-            f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}"
-        )
+        raise ValueError(f"Image and labels have mismatched dimensions: image {image.shape[1:]}, labels {labels.shape}")
 
     if "cpmeasure:correlation" in measurements:
         measurements_corr = get_correlation_measurements()
@@ -239,9 +221,7 @@ def calculate_image_features(
 
     if "skimage:label+image" in measurements:
         for ch_idx in range(n_channels):
-            ch_name = (
-                channel_names[ch_idx] if channel_names is not None else f"{ch_idx}"
-            )
+            ch_name = channel_names[ch_idx] if channel_names is not None else f"{ch_idx}"
             ch_image = image[ch_idx]
             logg.info(f"Calculating 'skimage' image features for channel '{ch_idx}'.")
             res = parallelize(
@@ -260,9 +240,7 @@ def calculate_image_features(
     if "cpmeasure:core" in measurements:
         measurements_core = get_core_measurements()
         for ch_idx in range(n_channels):
-            ch_name = (
-                channel_names[ch_idx] if channel_names is not None else f"{ch_idx}"
-            )
+            ch_name = channel_names[ch_idx] if channel_names is not None else f"{ch_idx}"
             ch_image = image[ch_idx]
             logg.info(f"Calculating 'cpmeasure' core features for channel '{ch_idx}'.")
             res = parallelize(
@@ -279,16 +257,8 @@ def calculate_image_features(
     if "cpmeasure:correlation" in measurements:
         for ch1_idx in range(n_channels):
             for ch2_idx in range(ch1_idx + 1, n_channels):
-                ch1_name = (
-                    channel_names[ch1_idx]
-                    if channel_names is not None
-                    else f"{ch1_idx}"
-                )
-                ch2_name = (
-                    channel_names[ch2_idx]
-                    if channel_names is not None
-                    else f"{ch2_idx}"
-                )
+                ch1_name = channel_names[ch1_idx] if channel_names is not None else f"{ch1_idx}"
+                ch2_name = channel_names[ch2_idx] if channel_names is not None else f"{ch2_idx}"
                 logg.info(
                     f"Calculating 'cpmeasure' correlation features between channels '{ch1_name}' and '{ch2_name}'."
                 )
@@ -323,9 +293,7 @@ def calculate_image_features(
         "region_key": "region",
         "instance_key": "label_id",
     }
-    adata.obs["region"] = pd.Categorical(
-        [labels_key if labels_key is not None else shapes_key] * len(adata)
-    )
+    adata.obs["region"] = pd.Categorical([labels_key if labels_key is not None else shapes_key] * len(adata))
     adata.obs["label_id"] = cell_ids
 
     if inplace:
@@ -350,9 +318,7 @@ def _extract_features_from_regionprops(
                     for i, v in enumerate(value):
                         cell_features[f"{prop}_{i}"] = float(v)
                 elif value.ndim == 2:
-                    for i, j in itertools.product(
-                        range(value.shape[0]), range(value.shape[1])
-                    ):
+                    for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])):
                         cell_features[f"{prop}_{i}x{j}"] = float(value[i, j])
                 else:
                     cell_features[prop] = value
@@ -385,14 +351,10 @@ def _calculate_regionprops_from_crop(
         )
         if not region_props:
             return {}
-        return _extract_features_from_regionprops(
-            region_props[0], _INTENSITY_PROPS, cell_id, skip_callable=True
-        )
+        return _extract_features_from_regionprops(region_props[0], _INTENSITY_PROPS, cell_id, skip_callable=True)
 
 
-def _append_channel_names(
-    features: dict, channel1: str, channel2: str | None = None
-) -> dict:
+def _append_channel_names(features: dict, channel1: str, channel2: str | None = None) -> dict:
     """Append channel name(s) to all keys in the feature dictionary."""
     if channel2 is None:
         return {f"{k}_{channel1}": v for k, v in features.items()}
@@ -416,14 +378,9 @@ def _prepare_images_for_measurement(
         image2_prepared = None if img2 is None else img2.astype(np.uint8)
     elif name == "texture":
         mask = cell_mask.astype(np.uint8)
-        image1_prepared = (
-            img1.astype(np.float32) - conv_params["img1_min"]
-        ) / conv_params["img1_range"]
+        image1_prepared = (img1.astype(np.float32) - conv_params["img1_min"]) / conv_params["img1_range"]
         image2_prepared = (
-            None
-            if img2 is None
-            else (img2.astype(np.float32) - conv_params["img2_min"])
-            / conv_params["img2_range"]
+            None if img2 is None else (img2.astype(np.float32) - conv_params["img2_min"]) / conv_params["img2_range"]
         )
     elif name in conv_params.get("float_features", []):
         mask = cell_mask.astype(np.float32)
@@ -484,9 +441,7 @@ def _get_cell_crops(
     x_min -= x_pad_min
     x_max += x_pad_max
 
-    if verbose and (
-        y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad
-    ):
+    if verbose and (y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad):
         logg.warning(
             f"Cell {cell_id} is at image border. Padding is asymmetric: "
             f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, "
@@ -518,9 +473,7 @@ def _get_regionprops_features(
             if crop is None:
                 continue
             cell_mask_cropped, intensity_image_cropped, _ = crop
-            cell_features = _calculate_regionprops_from_crop(
-                cell_mask_cropped, intensity_image_cropped, cell_id
-            )
+            cell_features = _calculate_regionprops_from_crop(cell_mask_cropped, intensity_image_cropped, cell_id)
             features[cell_id] = cell_features
             if queue is not None:
                 queue.put(Signal.UPDATE)
@@ -621,15 +574,11 @@ def _calculate_features_helper(
             if image2 is None:
                 region_features = _append_channel_names(region_features, channel1_name)
             else:
-                region_features = _append_channel_names(
-                    region_features, channel1_name, channel2_name
-                )
+                region_features = _append_channel_names(region_features, channel1_name, channel2_name)
             cell_features.update(region_features)
         except Exception as e:
             if verbose:
-                logg.warning(
-                    f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}"
-                )
+                logg.warning(f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}")
 
         # Calculate cp-measure features for each measurement
         for name, func in measurements.items():
@@ -643,9 +592,7 @@ def _calculate_features_helper(
                         image2_cropped,
                         conv_params,
                     )
-                    feature_dict = _measurement_wrapper(
-                        func, mask_conv, img1_conv, img2_conv
-                    )
+                    feature_dict = _measurement_wrapper(func, mask_conv, img1_conv, img2_conv)
                     # Ensure each feature returns a single value
                     for k, v in feature_dict.items():
                         if len(v) > 1:
@@ -653,19 +600,13 @@ def _calculate_features_helper(
                         else:
                             feature_dict[k] = float(v[0])
                     if image2 is None:
-                        feature_dict = _append_channel_names(
-                            feature_dict, channel1_name
-                        )
+                        feature_dict = _append_channel_names(feature_dict, channel1_name)
                     else:
-                        feature_dict = _append_channel_names(
-                            feature_dict, channel1_name, channel2_name
-                        )
+                        feature_dict = _append_channel_names(feature_dict, channel1_name, channel2_name)
                     cell_features.update(feature_dict)
             except Exception as e:
                 if verbose:
-                    logg.warning(
-                        f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}"
-                    )
+                    logg.warning(f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}")
 
         features_dict[cell_id] = cell_features
 
@@ -682,9 +623,7 @@ def _calculate_features_helper(
     return df
 
 
-def _get_array_from_DataTree_or_DataArray(
-    data: xr.DataTree | xr.DataArray, scale: str | None = None
-) -> np.ndarray:
+def _get_array_from_DataTree_or_DataArray(data: xr.DataTree | xr.DataArray, scale: str | None = None) -> np.ndarray:
     """
     Returns a NumPy array for the given data and scale.
     If data is an xr.DataTree, it checks for the scale key and computes the image.
@@ -707,7 +646,5 @@ def _get_array_from_DataTree_or_DataArray(
     if scale is None:
         raise ValueError("Scale must be provided for DataTree data")
     if scale not in data:
-        raise ValueError(
-            f"Scale '{scale}' not found. Available scales: {list(data.keys())}"
-        )
+        raise ValueError(f"Scale '{scale}' not found. Available scales: {list(data.keys())}")
     return np.asarray(data[scale].image.compute())

From 60ed2234e93e08043ecac1cc2afb1e0dddae4e1c Mon Sep 17 00:00:00 2001
From: Tim Treis <tim.treis@stud.uni-heidelberg.de>
Date: Tue, 1 Apr 2025 15:13:21 +0200
Subject: [PATCH 08/15] fixed off-by-one error

---
 src/squidpy/exp/_feature.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py
index cb0e41101..7c4c8b5db 100644
--- a/src/squidpy/exp/_feature.py
+++ b/src/squidpy/exp/_feature.py
@@ -326,7 +326,11 @@ def calculate_image_features(
     adata.obs["region"] = pd.Categorical(
         [labels_key if labels_key is not None else shapes_key] * len(adata)
     )
-    adata.obs["label_id"] = cell_ids
+    # here we either use the cell_ids or the index of the shapes. Needed 
+    # because when converting the shapes to labels, a potential index 0
+    # in the shapes is set to 1 in the labels and therefore we'd otherwise 
+    # be off-by-one in the label_id.
+    adata.obs["label_id"] = sdata.shapes[shapes_key].index.values if shapes_key is not None else cell_ids
 
     if inplace:
         sdata.tables[adata_key_added] = TableModel.parse(adata)

From 9aade33593deeb90ede9175204f532ceac726ac5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 1 Apr 2025 13:14:11 +0000
Subject: [PATCH 09/15] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/squidpy/exp/_feature.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py
index 357b5c4bb..bee79a1a8 100644
--- a/src/squidpy/exp/_feature.py
+++ b/src/squidpy/exp/_feature.py
@@ -293,12 +293,10 @@ def calculate_image_features(
         "region_key": "region",
         "instance_key": "label_id",
     }
-    adata.obs["region"] = pd.Categorical(
-        [labels_key if labels_key is not None else shapes_key] * len(adata)
-    )
-    # here we either use the cell_ids or the index of the shapes. Needed 
+    adata.obs["region"] = pd.Categorical([labels_key if labels_key is not None else shapes_key] * len(adata))
+    # here we either use the cell_ids or the index of the shapes. Needed
     # because when converting the shapes to labels, a potential index 0
-    # in the shapes is set to 1 in the labels and therefore we'd otherwise 
+    # in the shapes is set to 1 in the labels and therefore we'd otherwise
     # be off-by-one in the label_id.
     adata.obs["label_id"] = sdata.shapes[shapes_key].index.values if shapes_key is not None else cell_ids
 

From 6d056b48f06bcd9460d664b90496dd296319b94a Mon Sep 17 00:00:00 2001
From: Tim Treis <tim.treis@stud.uni-heidelberg.de>
Date: Tue, 29 Apr 2025 15:55:10 -0400
Subject: [PATCH 10/15] fixed mypy

---
 src/squidpy/exp/_feature.py | 90 +++++++++++++++++++++++--------------
 1 file changed, 56 insertions(+), 34 deletions(-)

diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py
index bee79a1a8..e642fc581 100644
--- a/src/squidpy/exp/_feature.py
+++ b/src/squidpy/exp/_feature.py
@@ -9,6 +9,7 @@
 
 import anndata as ad
 import numpy as np
+import numpy.typing as npt
 import pandas as pd
 import xarray as xr
 from cp_measure.bulk import get_core_measurements, get_correlation_measurements
@@ -53,6 +54,12 @@
     "intensity_std",
 }
 
+# Define array types using modern syntax
+NDArray = npt.NDArray[Any]  # Generic array
+FloatArray = npt.NDArray[np.float32]  # Float32 array
+IntArray = npt.NDArray[np.int_]  # Integer array
+BoolArray = npt.NDArray[np.bool_]  # Boolean array
+
 
 @d.dedent
 @inject_docs(f=ImageFeature)
@@ -199,6 +206,7 @@ def calculate_image_features(
     cell_ids = cell_ids[cell_ids != 0]
     # Sort cell_ids to ensure consistent order
     cell_ids = np.sort(cell_ids)
+    cell_ids_list = cell_ids.tolist()  # Convert to list for parallelize
 
     all_features = []
     n_channels = image.shape[0]
@@ -210,7 +218,7 @@ def calculate_image_features(
         logg.info("Calculating 'skimage' label features.")
         res = parallelize(
             _get_regionprops_features,
-            collection=cell_ids,
+            collection=cell_ids_list,
             extractor=pd.concat,
             n_jobs=n_jobs,
             backend=backend,
@@ -226,7 +234,7 @@ def calculate_image_features(
             logg.info(f"Calculating 'skimage' image features for channel '{ch_idx}'.")
             res = parallelize(
                 _get_regionprops_features,
-                collection=cell_ids,
+                collection=cell_ids_list,
                 extractor=pd.concat,
                 n_jobs=n_jobs,
                 backend=backend,
@@ -234,7 +242,7 @@ def calculate_image_features(
                 verbose=verbose,
             )(labels=labels, intensity_image=ch_image)
             # Append channel names to each feature column
-            res = res.rename(columns=lambda col: f"{col}_{ch_name}")
+            res = res.rename(columns=lambda col, ch_name=ch_name: f"{col}_{ch_name}")
             all_features.append(res)
 
     if "cpmeasure:core" in measurements:
@@ -245,7 +253,7 @@ def calculate_image_features(
             logg.info(f"Calculating 'cpmeasure' core features for channel '{ch_idx}'.")
             res = parallelize(
                 _calculate_features_helper,
-                collection=cell_ids,
+                collection=cell_ids_list,
                 extractor=pd.concat,
                 n_jobs=n_jobs,
                 backend=backend,
@@ -266,7 +274,7 @@ def calculate_image_features(
                 ch2_image = image[ch2_idx]
                 res = parallelize(
                     _calculate_features_helper,
-                    collection=cell_ids,
+                    collection=cell_ids_list,
                     extractor=pd.concat,
                     n_jobs=n_jobs,
                     backend=backend,
@@ -307,7 +315,10 @@ def calculate_image_features(
 
 
 def _extract_features_from_regionprops(
-    region_obj: Any, props: set[str], cell_id: int, skip_callable: bool = False
+    region_obj: Any,
+    props: set[str],
+    cell_id: int,
+    skip_callable: bool = False,
 ) -> dict[str, float]:
     """Extract features from a regionprops object given a list of properties."""
     cell_features = {}
@@ -325,18 +336,18 @@ def _extract_features_from_regionprops(
                     for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])):
                         cell_features[f"{prop}_{i}x{j}"] = float(value[i, j])
                 else:
-                    cell_features[prop] = value
+                    cell_features[prop] = float(value.flatten()[0])  # Convert to float
             else:
                 cell_features[prop] = float(value)
-        except Exception as e:
+        except (ValueError, TypeError, AttributeError) as e:
             logg.warning(f"Error calculating {prop} for cell {cell_id}: {str(e)}")
             continue
     return cell_features
 
 
 def _calculate_regionprops_from_crop(
-    cell_mask_cropped: np.ndarray,
-    intensity_image_cropped: np.ndarray | None,
+    cell_mask_cropped: NDArray,
+    intensity_image_cropped: NDArray | None,
     cell_id: int,
 ) -> dict[str, float]:
     """
@@ -358,7 +369,11 @@ def _calculate_regionprops_from_crop(
         return _extract_features_from_regionprops(region_props[0], _INTENSITY_PROPS, cell_id, skip_callable=True)
 
 
-def _append_channel_names(features: dict, channel1: str, channel2: str | None = None) -> dict:
+def _append_channel_names(
+    features: dict[str, Any],
+    channel1: str | None,
+    channel2: str | None = None,
+) -> dict[str, Any]:
     """Append channel name(s) to all keys in the feature dictionary."""
     if channel2 is None:
         return {f"{k}_{channel1}": v for k, v in features.items()}
@@ -368,11 +383,11 @@ def _append_channel_names(features: dict, channel1: str, channel2: str | None =
 
 def _prepare_images_for_measurement(
     name: str,
-    cell_mask: np.ndarray,
-    img1: np.ndarray,
-    img2: np.ndarray | None,
-    conv_params: dict,
-) -> tuple[np.ndarray, np.ndarray, np.ndarray | None]:
+    cell_mask: NDArray,
+    img1: NDArray,
+    img2: NDArray | None,
+    conv_params: dict[str, Any],
+) -> tuple[NDArray, NDArray | None, NDArray | None]:
     """
     Convert inputs to the appropriate dtype based on the measurement type.
     """
@@ -399,12 +414,12 @@ def _prepare_images_for_measurement(
 
 def _get_cell_crops(
     cell_id: int,
-    labels: np.ndarray,
-    image1: np.ndarray | None = None,
-    image2: np.ndarray | None = None,
+    labels: NDArray,
+    image1: NDArray | None = None,
+    image2: NDArray | None = None,
     pad: int = 1,
     verbose: bool = False,
-) -> tuple[np.ndarray, np.ndarray, np.ndarray | None] | None:
+) -> tuple[NDArray, NDArray | None, NDArray | None] | None:
     """Generator function to get cropped arrays for a cell.
 
     Parameters
@@ -461,13 +476,13 @@ def _get_cell_crops(
 
 def _get_regionprops_features(
     cell_ids: Sequence[int],
-    labels: np.ndarray,
-    intensity_image: np.ndarray | None = None,
+    labels: NDArray,
+    intensity_image: NDArray | None = None,
     queue: Any | None = None,
 ) -> pd.DataFrame:
     """Calculate regionprops features for each cell from the full label image."""
     # Initialize features dictionary with None values to preserve order
-    features = {cell_id: None for cell_id in cell_ids}
+    features = dict.fromkeys(cell_ids, None)
 
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
@@ -492,10 +507,10 @@ def _get_regionprops_features(
 
 
 def _measurement_wrapper(
-    func: Callable,
-    mask: np.ndarray,
-    image1: np.ndarray,
-    image2: np.ndarray | None = None,
+    func: Callable[..., dict[str, Any]],
+    mask: NDArray,
+    image1: NDArray | None,
+    image2: NDArray | None = None,
 ) -> dict[str, Any]:
     """Wrapper function to handle both core and correlation measurements.
 
@@ -515,14 +530,16 @@ def _measurement_wrapper(
     -------
     Dictionary of feature values
     """
+    if image1 is None:
+        return {}  # Return empty dict if no image data
     return func(mask, image1) if image2 is None else func(image1, image2, mask)
 
 
 def _calculate_features_helper(
     cell_ids: Sequence[int],
-    labels: np.ndarray,
-    image1: np.ndarray,
-    image2: np.ndarray | None,
+    labels: NDArray,
+    image1: NDArray,
+    image2: NDArray | None,
     measurements: dict[str, Any],
     channel1_name: str | None = None,
     channel2_name: str | None = None,
@@ -531,7 +548,7 @@ def _calculate_features_helper(
 ) -> pd.DataFrame:
     """Helper function to calculate features for a subset of cells."""
     # Initialize features dictionary with None values to preserve order
-    features_dict = {cell_id: None for cell_id in cell_ids}
+    features_dict = dict.fromkeys(cell_ids, None)
 
     # Pre-allocate lists for type conversion
     uint8_features = [
@@ -580,7 +597,7 @@ def _calculate_features_helper(
             else:
                 region_features = _append_channel_names(region_features, channel1_name, channel2_name)
             cell_features.update(region_features)
-        except Exception as e:
+        except (ValueError, TypeError, AttributeError) as e:
             if verbose:
                 logg.warning(f"Failed to calculate regionprops features for cell {cell_id}: {str(e)}")
 
@@ -589,6 +606,8 @@ def _calculate_features_helper(
             try:
                 with warnings.catch_warnings():
                     warnings.simplefilter("ignore")
+                    if image1_cropped is None:
+                        continue
                     mask_conv, img1_conv, img2_conv = _prepare_images_for_measurement(
                         name,
                         cell_mask_cropped,
@@ -608,7 +627,7 @@ def _calculate_features_helper(
                     else:
                         feature_dict = _append_channel_names(feature_dict, channel1_name, channel2_name)
                     cell_features.update(feature_dict)
-            except Exception as e:
+            except (ValueError, TypeError, AttributeError) as e:
                 if verbose:
                     logg.warning(f"Failed to calculate '{name}' features for cell {cell_id}: {str(e)}")
 
@@ -627,7 +646,10 @@ def _calculate_features_helper(
     return df
 
 
-def _get_array_from_DataTree_or_DataArray(data: xr.DataTree | xr.DataArray, scale: str | None = None) -> np.ndarray:
+def _get_array_from_DataTree_or_DataArray(
+    data: xr.DataTree | xr.DataArray,
+    scale: str | None = None,
+) -> NDArray:
     """
     Returns a NumPy array for the given data and scale.
     If data is an xr.DataTree, it checks for the scale key and computes the image.

From 5e11b8dcb60002d03c718865f24a46f703f8168e Mon Sep 17 00:00:00 2001
From: Tim Treis <tim.treis@stud.uni-heidelberg.de>
Date: Tue, 29 Apr 2025 16:33:20 -0400
Subject: [PATCH 11/15] numba

---
 src/squidpy/exp/_feature.py | 150 +++++++++++++++++++++++++-----------
 1 file changed, 106 insertions(+), 44 deletions(-)

diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py
index e642fc581..8f734ba00 100644
--- a/src/squidpy/exp/_feature.py
+++ b/src/squidpy/exp/_feature.py
@@ -23,6 +23,8 @@
 from squidpy._constants._constants import ImageFeature
 from squidpy._docs import d, inject_docs
 from squidpy._utils import Signal, _get_n_cores, parallelize
+from numba import njit, prange
+import os
 
 __all__ = ["calculate_image_features"]
 
@@ -321,27 +323,63 @@ def _extract_features_from_regionprops(
     skip_callable: bool = False,
 ) -> dict[str, float]:
     """Extract features from a regionprops object given a list of properties."""
-    cell_features = {}
+    # Pre-allocate arrays for common cases
+    max_1d_features = 10  # Adjust based on typical usage
+    max_2d_features = 10  # Adjust based on typical usage
+    
+    # Initialize arrays for batch processing
+    names_1d = np.empty(max_1d_features, dtype=object)
+    values_1d = np.empty(max_1d_features, dtype=float)
+    names_2d = np.empty(max_2d_features, dtype=object)
+    values_2d = np.empty(max_2d_features, dtype=float)
+    
+    # Initialize counters
+    count_1d = 0
+    count_2d = 0
+    scalar_features = {}
+    
     for prop in props:
         try:
             value = getattr(region_obj, prop)
             if skip_callable and callable(value):
                 continue
+                
             if isinstance(value, np.ndarray | list | tuple):
                 value = np.array(value)
                 if value.ndim == 1:
-                    for i, v in enumerate(value):
-                        cell_features[f"{prop}_{i}"] = float(v)
+                    # Vectorized operation for 1D arrays
+                    n = len(value)
+                    if count_1d + n <= max_1d_features:
+                        indices = np.arange(n)
+                        names_1d[count_1d:count_1d+n] = [f"{prop}_{i}" for i in indices]
+                        values_1d[count_1d:count_1d+n] = value.astype(float)
+                        count_1d += n
                 elif value.ndim == 2:
-                    for i, j in itertools.product(range(value.shape[0]), range(value.shape[1])):
-                        cell_features[f"{prop}_{i}x{j}"] = float(value[i, j])
+                    # Vectorized operation for 2D arrays
+                    rows, cols = value.shape
+                    n = rows * cols
+                    if count_2d + n <= max_2d_features:
+                        i, j = np.meshgrid(range(rows), range(cols), indexing='ij')
+                        names_2d[count_2d:count_2d+n] = [f"{prop}_{i_}_{j_}" for i_, j_ in zip(i.ravel(), j.ravel())]
+                        values_2d[count_2d:count_2d+n] = value.ravel().astype(float)
+                        count_2d += n
                 else:
-                    cell_features[prop] = float(value.flatten()[0])  # Convert to float
+                    scalar_features[prop] = float(value.flatten()[0])
             else:
-                cell_features[prop] = float(value)
+                scalar_features[prop] = float(value)
+                
         except (ValueError, TypeError, AttributeError) as e:
             logg.warning(f"Error calculating {prop} for cell {cell_id}: {str(e)}")
             continue
+    
+    # Combine all features
+    cell_features = {}
+    if count_1d > 0:
+        cell_features.update(dict(zip(names_1d[:count_1d], values_1d[:count_1d])))
+    if count_2d > 0:
+        cell_features.update(dict(zip(names_2d[:count_2d], values_2d[:count_2d])))
+    cell_features.update(scalar_features)
+    
     return cell_features
 
 
@@ -412,65 +450,89 @@ def _prepare_images_for_measurement(
     return mask, image1_prepared, image2_prepared
 
 
-def _get_cell_crops(
+@njit(fastmath=True)
+def _get_cell_crops_numba(
     cell_id: int,
-    labels: NDArray,
-    image1: NDArray | None = None,
-    image2: NDArray | None = None,
+    labels: np.ndarray,
+    image1: np.ndarray,
+    image2: np.ndarray,
     pad: int = 1,
-    verbose: bool = False,
-) -> tuple[NDArray, NDArray | None, NDArray | None] | None:
-    """Generator function to get cropped arrays for a cell.
-
-    Parameters
-    ----------
-    cell_id
-        The ID of the cell to process
-    labels
-        The labels array containing cell masks
-    image1
-        First image to crop
-    image2
-        Optional second image to crop
-    pad
-        Amount of padding to add around the cell
-    verbose
-        Whether to print warning messages
-
-    Returns
-    -------
-    Tuple of (cell_mask_cropped, image1_cropped, image2_cropped) or None if cell is empty
+) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Numba-accelerated version of _get_cell_crops.
+    
+    Note: image1 and image2 should be passed as empty arrays (np.zeros((0,0))) if not used.
     """
+    # Find cell boundaries
     cell_mask = labels == cell_id
     y_indices, x_indices = np.where(cell_mask)
-    if len(y_indices) == 0:  # Skip empty cells
-        return None
+    if len(y_indices) == 0:
+        # Return empty arrays if no cell found
+        return np.zeros((0, 0), dtype=np.bool_), np.zeros((0, 0), dtype=image1.dtype), np.zeros((0, 0), dtype=image2.dtype)
 
     y_min, y_max = y_indices.min(), y_indices.max()
     x_min, x_max = x_indices.min(), x_indices.max()
     height, width = labels.shape
 
+    # Calculate padding
     y_pad_min = min(pad, y_min)
     y_pad_max = min(pad, height - y_max - 1)
     x_pad_min = min(pad, x_min)
     x_pad_max = min(pad, width - x_max - 1)
 
+    # Apply padding
     y_min -= y_pad_min
     y_max += y_pad_max
     x_min -= x_pad_min
     x_max += x_pad_max
 
-    if verbose and (y_pad_min != pad or y_pad_max != pad or x_pad_min != pad or x_pad_max != pad):
-        logg.warning(
-            f"Cell {cell_id} is at image border. Padding is asymmetric: "
-            f"y: {y_pad_min}/{pad} top, {y_pad_max}/{pad} bottom, "
-            f"x: {x_pad_min}/{pad} left, {x_pad_max}/{pad} right"
-        )
+    # Calculate crop dimensions
+    y_size = y_max - y_min
+    x_size = x_max - x_min
+
+    # Create output arrays
+    cell_mask_cropped = np.zeros((y_size, x_size), dtype=np.bool_)
+    image1_cropped = np.zeros((y_size, x_size), dtype=image1.dtype)
+    image2_cropped = np.zeros((y_size, x_size), dtype=image2.dtype)
+
+    # Copy data
+    for i in range(y_size):
+        for j in range(x_size):
+            cell_mask_cropped[i, j] = cell_mask[y_min + i, x_min + j]
+            if image1.size > 0:  # Only copy if image1 is not empty
+                image1_cropped[i, j] = image1[y_min + i, x_min + j]
+            if image2.size > 0:  # Only copy if image2 is not empty
+                image2_cropped[i, j] = image2[y_min + i, x_min + j]
+
+    return cell_mask_cropped, image1_cropped, image2_cropped
 
-    cell_mask_cropped = cell_mask[y_min:y_max, x_min:x_max]
-    image1_cropped = None if image1 is None else image1[y_min:y_max, x_min:x_max]
-    image2_cropped = None if image2 is None else image2[y_min:y_max, x_min:x_max]
 
+def _get_cell_crops(
+    cell_id: int,
+    labels: NDArray,
+    image1: NDArray | None = None,
+    image2: NDArray | None = None,
+    pad: int = 1,
+    verbose: bool = False,
+) -> tuple[NDArray, NDArray | None, NDArray | None] | None:
+    """Generator function to get cropped arrays for a cell."""
+    # Create empty arrays for unused images
+    empty_image = np.zeros((0, 0), dtype=np.float32)
+    image1_np = image1 if image1 is not None else empty_image
+    image2_np = image2 if image2 is not None else empty_image
+    
+    # Use Numba-accelerated version
+    cell_mask_cropped, image1_cropped, image2_cropped = _get_cell_crops_numba(
+        cell_id, labels, image1_np, image2_np, pad
+    )
+    
+    # Return None if no cell found
+    if cell_mask_cropped.size == 0:
+        return None
+        
+    # Convert back to None for unused images
+    image1_cropped = image1_cropped if image1 is not None else None
+    image2_cropped = image2_cropped if image2 is not None else None
+    
     return cell_mask_cropped, image1_cropped, image2_cropped
 
 

From 7963e845f73972f6231ffb01df164eb3f35402ca Mon Sep 17 00:00:00 2001
From: Tim Treis <tim.treis@stud.uni-heidelberg.de>
Date: Tue, 29 Apr 2025 16:39:24 -0400
Subject: [PATCH 12/15] pre-alloc types

---
 src/squidpy/exp/_feature.py | 89 +++++++++++++++++++------------------
 1 file changed, 45 insertions(+), 44 deletions(-)

diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py
index 8f734ba00..74a635d55 100644
--- a/src/squidpy/exp/_feature.py
+++ b/src/squidpy/exp/_feature.py
@@ -62,6 +62,18 @@
 IntArray = npt.NDArray[np.int_]  # Integer array
 BoolArray = npt.NDArray[np.bool_]  # Boolean array
 
+# Define property sets at module level for better performance
+_SCALAR_PROPS = frozenset({
+    "area", "area_filled", "area_convex", "num_pixels",
+    "axis_major_length", "axis_minor_length", "eccentricity",
+    "equivalent_diameter", "extent", "feret_diameter_max",
+    "solidity", "euler_number", "perimeter", "perimeter_crofton"
+})
+
+_ARRAY_1D_PROPS = frozenset({"centroid", "centroid_local"})
+_ARRAY_2D_PROPS = frozenset({"inertia_tensor"})
+_SPECIAL_PROPS = frozenset({"inertia_tensor_eigvals"})
+
 
 @d.dedent
 @inject_docs(f=ImageFeature)
@@ -323,20 +335,7 @@ def _extract_features_from_regionprops(
     skip_callable: bool = False,
 ) -> dict[str, float]:
     """Extract features from a regionprops object given a list of properties."""
-    # Pre-allocate arrays for common cases
-    max_1d_features = 10  # Adjust based on typical usage
-    max_2d_features = 10  # Adjust based on typical usage
-    
-    # Initialize arrays for batch processing
-    names_1d = np.empty(max_1d_features, dtype=object)
-    values_1d = np.empty(max_1d_features, dtype=float)
-    names_2d = np.empty(max_2d_features, dtype=object)
-    values_2d = np.empty(max_2d_features, dtype=float)
-    
-    # Initialize counters
-    count_1d = 0
-    count_2d = 0
-    scalar_features = {}
+    cell_features = {}
     
     for prop in props:
         try:
@@ -344,42 +343,44 @@ def _extract_features_from_regionprops(
             if skip_callable and callable(value):
                 continue
                 
-            if isinstance(value, np.ndarray | list | tuple):
-                value = np.array(value)
-                if value.ndim == 1:
-                    # Vectorized operation for 1D arrays
-                    n = len(value)
-                    if count_1d + n <= max_1d_features:
-                        indices = np.arange(n)
-                        names_1d[count_1d:count_1d+n] = [f"{prop}_{i}" for i in indices]
-                        values_1d[count_1d:count_1d+n] = value.astype(float)
-                        count_1d += n
-                elif value.ndim == 2:
-                    # Vectorized operation for 2D arrays
-                    rows, cols = value.shape
-                    n = rows * cols
-                    if count_2d + n <= max_2d_features:
-                        i, j = np.meshgrid(range(rows), range(cols), indexing='ij')
-                        names_2d[count_2d:count_2d+n] = [f"{prop}_{i_}_{j_}" for i_, j_ in zip(i.ravel(), j.ravel())]
-                        values_2d[count_2d:count_2d+n] = value.ravel().astype(float)
-                        count_2d += n
-                else:
-                    scalar_features[prop] = float(value.flatten()[0])
+            if prop in _SCALAR_PROPS:
+                cell_features[prop] = float(value)
+            elif prop in _ARRAY_1D_PROPS:
+                # Convert to array only once
+                value = np.asarray(value)
+                for i, v in enumerate(value):
+                    cell_features[f"{prop}_{i}"] = float(v)
+            elif prop in _ARRAY_2D_PROPS:
+                # Convert to array only once
+                value = np.asarray(value)
+                for i in range(value.shape[0]):
+                    for j in range(value.shape[1]):
+                        cell_features[f"{prop}_{i}x{j}"] = float(value[i, j])
+            elif prop in _SPECIAL_PROPS:
+                # Convert to array only once
+                value = np.asarray(value)
+                for i, v in enumerate(value):
+                    cell_features[f"{prop}_{i}"] = float(v)
             else:
-                scalar_features[prop] = float(value)
+                # Fallback for any other properties
+                if isinstance(value, (np.ndarray, list, tuple)):
+                    value = np.asarray(value)
+                    if value.ndim == 1:
+                        for i, v in enumerate(value):
+                            cell_features[f"{prop}_{i}"] = float(v)
+                    elif value.ndim == 2:
+                        for i in range(value.shape[0]):
+                            for j in range(value.shape[1]):
+                                cell_features[f"{prop}_{i}x{j}"] = float(value[i, j])
+                    else:
+                        cell_features[prop] = float(value.flatten()[0])
+                else:
+                    cell_features[prop] = float(value)
                 
         except (ValueError, TypeError, AttributeError) as e:
             logg.warning(f"Error calculating {prop} for cell {cell_id}: {str(e)}")
             continue
     
-    # Combine all features
-    cell_features = {}
-    if count_1d > 0:
-        cell_features.update(dict(zip(names_1d[:count_1d], values_1d[:count_1d])))
-    if count_2d > 0:
-        cell_features.update(dict(zip(names_2d[:count_2d], values_2d[:count_2d])))
-    cell_features.update(scalar_features)
-    
     return cell_features
 
 

From 6998a7d46ab9f7a30eee1f0343f3477c764e9ae2 Mon Sep 17 00:00:00 2001
From: Tim Treis <tim.treis@stud.uni-heidelberg.de>
Date: Tue, 29 Apr 2025 17:31:11 -0400
Subject: [PATCH 13/15] pyproject + numba

---
 pyproject.toml              |  2 +-
 src/squidpy/exp/_feature.py | 84 ++++++++++++++++++++++++++-----------
 2 files changed, 60 insertions(+), 26 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index cd4e7a665..007118929 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -75,7 +75,7 @@ dependencies = [
    "zarr>=2.6.1,<3.0.0",
    "spatialdata>=0.2.5",
    "centrosome>=1.2.3",
-   "cp-measure>=0.1.4"
+   "cp_measure>=0.1.4"
 ]
 
 [project.optional-dependencies]
diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py
index 74a635d55..562286e99 100644
--- a/src/squidpy/exp/_feature.py
+++ b/src/squidpy/exp/_feature.py
@@ -463,46 +463,57 @@ def _get_cell_crops_numba(
     
     Note: image1 and image2 should be passed as empty arrays (np.zeros((0,0))) if not used.
     """
-    # Find cell boundaries
+    # Find cell boundaries using vectorized operations
     cell_mask = labels == cell_id
-    y_indices, x_indices = np.where(cell_mask)
-    if len(y_indices) == 0:
-        # Return empty arrays if no cell found
+    if not np.any(cell_mask):
         return np.zeros((0, 0), dtype=np.bool_), np.zeros((0, 0), dtype=image1.dtype), np.zeros((0, 0), dtype=image2.dtype)
 
+    # Get non-zero indices efficiently
+    y_indices, x_indices = np.nonzero(cell_mask)
     y_min, y_max = y_indices.min(), y_indices.max()
     x_min, x_max = x_indices.min(), x_indices.max()
+    
+    # Get image dimensions
     height, width = labels.shape
 
-    # Calculate padding
+    # Calculate padding with boundary checks in one step
     y_pad_min = min(pad, y_min)
     y_pad_max = min(pad, height - y_max - 1)
     x_pad_min = min(pad, x_min)
     x_pad_max = min(pad, width - x_max - 1)
 
-    # Apply padding
-    y_min -= y_pad_min
-    y_max += y_pad_max
-    x_min -= x_pad_min
-    x_max += x_pad_max
-
-    # Calculate crop dimensions
-    y_size = y_max - y_min
-    x_size = x_max - x_min
+    # Calculate crop dimensions with padding
+    y_start = y_min - y_pad_min
+    y_end = y_max + y_pad_max + 1
+    x_start = x_min - x_pad_min
+    x_end = x_max + x_pad_max + 1
 
-    # Create output arrays
+    # Create output arrays with exact size
+    y_size = y_end - y_start
+    x_size = x_end - x_start
+    
+    # Create cell mask crop
     cell_mask_cropped = np.zeros((y_size, x_size), dtype=np.bool_)
-    image1_cropped = np.zeros((y_size, x_size), dtype=image1.dtype)
-    image2_cropped = np.zeros((y_size, x_size), dtype=image2.dtype)
-
-    # Copy data
     for i in range(y_size):
         for j in range(x_size):
-            cell_mask_cropped[i, j] = cell_mask[y_min + i, x_min + j]
-            if image1.size > 0:  # Only copy if image1 is not empty
-                image1_cropped[i, j] = image1[y_min + i, x_min + j]
-            if image2.size > 0:  # Only copy if image2 is not empty
-                image2_cropped[i, j] = image2[y_min + i, x_min + j]
+            cell_mask_cropped[i, j] = cell_mask[y_start + i, x_start + j]
+    
+    # Handle image crops efficiently
+    if image1.size > 0:
+        image1_cropped = np.zeros((y_size, x_size), dtype=image1.dtype)
+        for i in range(y_size):
+            for j in range(x_size):
+                image1_cropped[i, j] = image1[y_start + i, x_start + j]
+    else:
+        image1_cropped = np.zeros((0, 0), dtype=image1.dtype)
+        
+    if image2.size > 0:
+        image2_cropped = np.zeros((y_size, x_size), dtype=image2.dtype)
+        for i in range(y_size):
+            for j in range(x_size):
+                image2_cropped[i, j] = image2[y_start + i, x_start + j]
+    else:
+        image2_cropped = np.zeros((0, 0), dtype=image2.dtype)
 
     return cell_mask_cropped, image1_cropped, image2_cropped
 
@@ -595,7 +606,30 @@ def _measurement_wrapper(
     """
     if image1 is None:
         return {}  # Return empty dict if no image data
-    return func(mask, image1) if image2 is None else func(image1, image2, mask)
+        
+    try:
+        if image2 is None:
+            return func(mask, image1)
+        else:
+            # Check if we have valid data for correlation
+            if not np.any(mask) or not np.any(image1) or not np.any(image2):
+                # Get feature names from a successful call to maintain structure
+                dummy_mask = np.ones((2, 2), dtype=bool)
+                dummy_img = np.ones((2, 2), dtype=image1.dtype)
+                feature_names = func(dummy_img, dummy_img, dummy_mask).keys()
+                # Return dictionary with NaN values for all features
+                return {name: np.nan for name in feature_names}
+            return func(image1, image2, mask)
+    except (IndexError, ValueError) as e:
+        # Handle cases where correlation calculation fails
+        if "index 0 is out of bounds" in str(e) or "size 0" in str(e):
+            # Get feature names from a successful call to maintain structure
+            dummy_mask = np.ones((2, 2), dtype=bool)
+            dummy_img = np.ones((2, 2), dtype=image1.dtype)
+            feature_names = func(dummy_img, dummy_img, dummy_mask).keys()
+            # Return dictionary with NaN values for all features
+            return {name: np.nan for name in feature_names}
+        raise  # Re-raise other errors
 
 
 def _calculate_features_helper(

From f7342c8320820739d00ccf272f0df1d311ef3024 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 29 Apr 2025 21:31:30 +0000
Subject: [PATCH 14/15] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/squidpy/exp/_feature.py | 62 +++++++++++++++++++++++--------------
 1 file changed, 39 insertions(+), 23 deletions(-)

diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py
index 562286e99..6cc87a007 100644
--- a/src/squidpy/exp/_feature.py
+++ b/src/squidpy/exp/_feature.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import itertools
+import os
 import warnings
 from collections.abc import Callable, Sequence
 from typing import Any
@@ -13,6 +14,7 @@
 import pandas as pd
 import xarray as xr
 from cp_measure.bulk import get_core_measurements, get_correlation_measurements
+from numba import njit, prange
 from scipy import ndimage
 from skimage import measure
 from skimage.measure import label
@@ -23,8 +25,6 @@
 from squidpy._constants._constants import ImageFeature
 from squidpy._docs import d, inject_docs
 from squidpy._utils import Signal, _get_n_cores, parallelize
-from numba import njit, prange
-import os
 
 __all__ = ["calculate_image_features"]
 
@@ -63,12 +63,24 @@
 BoolArray = npt.NDArray[np.bool_]  # Boolean array
 
 # Define property sets at module level for better performance
-_SCALAR_PROPS = frozenset({
-    "area", "area_filled", "area_convex", "num_pixels",
-    "axis_major_length", "axis_minor_length", "eccentricity",
-    "equivalent_diameter", "extent", "feret_diameter_max",
-    "solidity", "euler_number", "perimeter", "perimeter_crofton"
-})
+_SCALAR_PROPS = frozenset(
+    {
+        "area",
+        "area_filled",
+        "area_convex",
+        "num_pixels",
+        "axis_major_length",
+        "axis_minor_length",
+        "eccentricity",
+        "equivalent_diameter",
+        "extent",
+        "feret_diameter_max",
+        "solidity",
+        "euler_number",
+        "perimeter",
+        "perimeter_crofton",
+    }
+)
 
 _ARRAY_1D_PROPS = frozenset({"centroid", "centroid_local"})
 _ARRAY_2D_PROPS = frozenset({"inertia_tensor"})
@@ -336,13 +348,13 @@ def _extract_features_from_regionprops(
 ) -> dict[str, float]:
     """Extract features from a regionprops object given a list of properties."""
     cell_features = {}
-    
+
     for prop in props:
         try:
             value = getattr(region_obj, prop)
             if skip_callable and callable(value):
                 continue
-                
+
             if prop in _SCALAR_PROPS:
                 cell_features[prop] = float(value)
             elif prop in _ARRAY_1D_PROPS:
@@ -376,11 +388,11 @@ def _extract_features_from_regionprops(
                         cell_features[prop] = float(value.flatten()[0])
                 else:
                     cell_features[prop] = float(value)
-                
+
         except (ValueError, TypeError, AttributeError) as e:
             logg.warning(f"Error calculating {prop} for cell {cell_id}: {str(e)}")
             continue
-    
+
     return cell_features
 
 
@@ -460,19 +472,23 @@ def _get_cell_crops_numba(
     pad: int = 1,
 ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
     """Numba-accelerated version of _get_cell_crops.
-    
+
     Note: image1 and image2 should be passed as empty arrays (np.zeros((0,0))) if not used.
     """
     # Find cell boundaries using vectorized operations
     cell_mask = labels == cell_id
     if not np.any(cell_mask):
-        return np.zeros((0, 0), dtype=np.bool_), np.zeros((0, 0), dtype=image1.dtype), np.zeros((0, 0), dtype=image2.dtype)
+        return (
+            np.zeros((0, 0), dtype=np.bool_),
+            np.zeros((0, 0), dtype=image1.dtype),
+            np.zeros((0, 0), dtype=image2.dtype),
+        )
 
     # Get non-zero indices efficiently
     y_indices, x_indices = np.nonzero(cell_mask)
     y_min, y_max = y_indices.min(), y_indices.max()
     x_min, x_max = x_indices.min(), x_indices.max()
-    
+
     # Get image dimensions
     height, width = labels.shape
 
@@ -491,13 +507,13 @@ def _get_cell_crops_numba(
     # Create output arrays with exact size
     y_size = y_end - y_start
     x_size = x_end - x_start
-    
+
     # Create cell mask crop
     cell_mask_cropped = np.zeros((y_size, x_size), dtype=np.bool_)
     for i in range(y_size):
         for j in range(x_size):
             cell_mask_cropped[i, j] = cell_mask[y_start + i, x_start + j]
-    
+
     # Handle image crops efficiently
     if image1.size > 0:
         image1_cropped = np.zeros((y_size, x_size), dtype=image1.dtype)
@@ -506,7 +522,7 @@ def _get_cell_crops_numba(
                 image1_cropped[i, j] = image1[y_start + i, x_start + j]
     else:
         image1_cropped = np.zeros((0, 0), dtype=image1.dtype)
-        
+
     if image2.size > 0:
         image2_cropped = np.zeros((y_size, x_size), dtype=image2.dtype)
         for i in range(y_size):
@@ -531,20 +547,20 @@ def _get_cell_crops(
     empty_image = np.zeros((0, 0), dtype=np.float32)
     image1_np = image1 if image1 is not None else empty_image
     image2_np = image2 if image2 is not None else empty_image
-    
+
     # Use Numba-accelerated version
     cell_mask_cropped, image1_cropped, image2_cropped = _get_cell_crops_numba(
         cell_id, labels, image1_np, image2_np, pad
     )
-    
+
     # Return None if no cell found
     if cell_mask_cropped.size == 0:
         return None
-        
+
     # Convert back to None for unused images
     image1_cropped = image1_cropped if image1 is not None else None
     image2_cropped = image2_cropped if image2 is not None else None
-    
+
     return cell_mask_cropped, image1_cropped, image2_cropped
 
 
@@ -606,7 +622,7 @@ def _measurement_wrapper(
     """
     if image1 is None:
         return {}  # Return empty dict if no image data
-        
+
     try:
         if image2 is None:
             return func(mask, image1)

From b2eb6aa5426a8992dc38ad252d6090a96cd4c00a Mon Sep 17 00:00:00 2001
From: Tim Treis <tim.treis@stud.uni-heidelberg.de>
Date: Tue, 29 Apr 2025 17:35:07 -0400
Subject: [PATCH 15/15] mypy

---
 src/squidpy/exp/_feature.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/squidpy/exp/_feature.py b/src/squidpy/exp/_feature.py
index 6cc87a007..41e0a9e86 100644
--- a/src/squidpy/exp/_feature.py
+++ b/src/squidpy/exp/_feature.py
@@ -375,7 +375,7 @@ def _extract_features_from_regionprops(
                     cell_features[f"{prop}_{i}"] = float(v)
             else:
                 # Fallback for any other properties
-                if isinstance(value, (np.ndarray, list, tuple)):
+                if isinstance(value, np.ndarray | list | tuple):
                     value = np.asarray(value)
                     if value.ndim == 1:
                         for i, v in enumerate(value):
@@ -466,11 +466,11 @@ def _prepare_images_for_measurement(
 @njit(fastmath=True)
 def _get_cell_crops_numba(
     cell_id: int,
-    labels: np.ndarray,
-    image1: np.ndarray,
-    image2: np.ndarray,
+    labels: npt.NDArray[np.int_],
+    image1: npt.NDArray[np.float32],
+    image2: npt.NDArray[np.float32],
     pad: int = 1,
-) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+) -> tuple[npt.NDArray[np.bool_], npt.NDArray[np.float32], npt.NDArray[np.float32]]:
     """Numba-accelerated version of _get_cell_crops.
 
     Note: image1 and image2 should be passed as empty arrays (np.zeros((0,0))) if not used.
@@ -553,7 +553,6 @@ def _get_cell_crops(
         cell_id, labels, image1_np, image2_np, pad
     )
 
-    # Return None if no cell found
     if cell_mask_cropped.size == 0:
         return None
 
@@ -634,7 +633,7 @@ def _measurement_wrapper(
                 dummy_img = np.ones((2, 2), dtype=image1.dtype)
                 feature_names = func(dummy_img, dummy_img, dummy_mask).keys()
                 # Return dictionary with NaN values for all features
-                return {name: np.nan for name in feature_names}
+                return dict.fromkeys(feature_names, np.nan)
             return func(image1, image2, mask)
     except (IndexError, ValueError) as e:
         # Handle cases where correlation calculation fails
@@ -644,7 +643,7 @@ def _measurement_wrapper(
             dummy_img = np.ones((2, 2), dtype=image1.dtype)
             feature_names = func(dummy_img, dummy_img, dummy_mask).keys()
             # Return dictionary with NaN values for all features
-            return {name: np.nan for name in feature_names}
+            return dict.fromkeys(feature_names, np.nan)
         raise  # Re-raise other errors