|
| 1 | +# Copyright 2024 (C) BioVisionCenter, University of Zurich |
| 2 | +# |
| 3 | +# Original authors: |
| 4 | + |
| 5 | +# Derived from APx label assignment task |
| 6 | +# https://github.com/Apricot-Therapeutics/APx_fractal_task_collection/blob/main/src/apx_fractal_task_collection/tasks/label_assignment_by_overlap.py |
| 7 | +# Simplified to avoid compound task and updated to use ngio |
| 8 | +"""Task to assign labels based on overlap between two label images.""" |
| 9 | + |
| 10 | +import logging |
| 11 | +from typing import Optional |
| 12 | + |
| 13 | +import ngio |
| 14 | +import numpy as np |
| 15 | +import pandas as pd |
| 16 | +from ngio.tables import FeatureTable |
| 17 | +from pydantic import validate_call |
| 18 | +from skimage.measure import regionprops_table |
| 19 | + |
| 20 | +logger = logging.getLogger(__name__) |
| 21 | + |
| 22 | + |
| 23 | +def label_overlap( |
| 24 | + regionmask: np.ndarray, |
| 25 | + intensity_image: np.ndarray, |
| 26 | +) -> list[float, float]: |
| 27 | + """Calculates label overlap between 2 numpy arrays. |
| 28 | +
|
| 29 | + Scikit-image regionprops_table extra_properties function to compute |
| 30 | + max overlap between two label images. Based on APX implementation. |
| 31 | +
|
| 32 | + regionmask: 2D numpy array of labels, where each label corresponds to a |
| 33 | + child object. |
| 34 | + intensity_image: 2D numpy array with parent objects. |
| 35 | + """ |
| 36 | + parent_labels = np.where(regionmask > 0, intensity_image, 0) |
| 37 | + |
| 38 | + labels, counts = np.unique(parent_labels[parent_labels > 0], return_counts=True) |
| 39 | + |
| 40 | + if len(labels > 0): |
| 41 | + # if there is a tie in the overlap, the first label is selected |
| 42 | + max_label = labels[np.argmax(counts)] |
| 43 | + max_label_area = counts.max() |
| 44 | + child_area = regionmask[regionmask > 0].size |
| 45 | + overlap = max_label_area / child_area |
| 46 | + else: |
| 47 | + max_label = np.nan |
| 48 | + overlap = np.nan |
| 49 | + |
| 50 | + return [max_label, overlap] |
| 51 | + |
| 52 | + |
| 53 | +def assign_objects( |
| 54 | + parent_label: np.ndarray, |
| 55 | + child_label: np.ndarray, |
| 56 | + overlap_threshold=1.0, |
| 57 | +) -> pd.DataFrame: |
| 58 | + """Assigns objects of child label to parent label based on overlap. |
| 59 | +
|
| 60 | + Calculate the overlap between labels in label_a and label_b, |
| 61 | + and return a DataFrame of matching labels. Based on APX implementation. |
| 62 | +
|
| 63 | + label_a: 4D numpy array. |
| 64 | + label_b: 4D numpy array. |
| 65 | + overlap_threshold: float, the minimum fraction of child label object that |
| 66 | + must be contained in parent label object to be considered a match. |
| 67 | + """ |
| 68 | + parent_label = np.squeeze(parent_label) |
| 69 | + child_label = np.squeeze(child_label) |
| 70 | + |
| 71 | + t = pd.DataFrame( |
| 72 | + regionprops_table( |
| 73 | + child_label, |
| 74 | + parent_label, |
| 75 | + properties=["label"], |
| 76 | + extra_properties=[label_overlap], |
| 77 | + ) |
| 78 | + ) |
| 79 | + |
| 80 | + t.columns = ["child_label", "parent_label", "overlap"] |
| 81 | + t.loc[t.overlap < overlap_threshold, "parent_label"] = np.nan |
| 82 | + t["parent_label"] = t["parent_label"].astype("Int32") |
| 83 | + t.set_index("child_label", inplace=True) |
| 84 | + |
| 85 | + return t |
| 86 | + |
| 87 | + |
| 88 | +@validate_call |
| 89 | +def label_assignment_by_overlap( |
| 90 | + *, |
| 91 | + # Default arguments for fractal tasks: |
| 92 | + zarr_url: str, |
| 93 | + # Task-specific arguments: |
| 94 | + parent_label_name: str, |
| 95 | + child_label_name: str, |
| 96 | + overlap_threshold: float = 1.0, |
| 97 | + overlap_table_name: Optional[str] = None, |
| 98 | + level_path: Optional[str] = None, |
| 99 | +): |
| 100 | + """Assign labels to each other based on overlap. |
| 101 | +
|
| 102 | + Takes a parent label image and a child label image and calculates |
| 103 | + overlaps between their labels. Child labels will be assigned to parent |
| 104 | + labels based on an overlap threshold. |
| 105 | +
|
| 106 | + Args: |
| 107 | + zarr_url: Path or url to the individual OME-Zarr image to be processed. |
| 108 | + (standard argument for Fractal tasks, managed by Fractal server). |
| 109 | + parent_label_name: Name of the parent label. |
| 110 | + child_label_name: Name of the child label. This label will be assigned |
| 111 | + to the parent label based on overlap. The parent label will appear |
| 112 | + in the child feature table as the "(parent_label_name)_label" |
| 113 | + column in the obs table of the anndata table. |
| 114 | + overlap_threshold: The minimum percentage (between 0 and 1) of child |
| 115 | + label object that must be contained in parent label object to |
| 116 | + be considered a match. |
| 117 | + overlap_table_name: Name of the feature table to which the overlap |
| 118 | + should be added. If the feature table already exists, the overlap |
| 119 | + measurements are added to it. Otherwise, an overlap table is |
| 120 | + created. If no name was given, a new table named parent_label_name |
| 121 | + + child_label_name + '_overlap' is created. |
| 122 | + level_path: Resolution of the label image to calculate overlap. Full |
| 123 | + resolution label is used by default. Typically overriden with |
| 124 | + 0 (full resolution), 1 (half resolution), etc. |
| 125 | +
|
| 126 | + """ |
| 127 | + ome_zarr_container = ngio.open_ome_zarr_container(zarr_url) |
| 128 | + child_label_container = ome_zarr_container.get_label( |
| 129 | + name=child_label_name, path=level_path |
| 130 | + ) |
| 131 | + child_label = child_label_container.get_array() |
| 132 | + |
| 133 | + # if there are no child labels, assignments will be all NaN |
| 134 | + if np.unique(child_label).size == 1: |
| 135 | + assignments = pd.DataFrame( |
| 136 | + {"parent_label": pd.NA, "overlap": pd.NA}, index=pd.Index([]) |
| 137 | + ) |
| 138 | + logger.info( |
| 139 | + f"Label image was empty for child label {child_label_name}. " |
| 140 | + "No labels could be matched." |
| 141 | + ) |
| 142 | + |
| 143 | + else: |
| 144 | + # Load the parent label image at the resolution of the child label image |
| 145 | + parent_label = ome_zarr_container.get_label( |
| 146 | + name=parent_label_name, |
| 147 | + pixel_size=child_label_container.pixel_size, |
| 148 | + ).get_array() |
| 149 | + # make the assignment |
| 150 | + logger.info( |
| 151 | + "Calculating label assignments with overlap threshold " |
| 152 | + f"{overlap_threshold}." |
| 153 | + ) |
| 154 | + assignments = assign_objects( |
| 155 | + parent_label, |
| 156 | + child_label, |
| 157 | + overlap_threshold, |
| 158 | + ) |
| 159 | + |
| 160 | + parent_label_column_name = f"{parent_label_name}_label" |
| 161 | + overlap_column_name = f"{child_label_name}_{parent_label_name}_overlap" |
| 162 | + |
| 163 | + assignments.rename( |
| 164 | + columns={ |
| 165 | + "parent_label": parent_label_column_name, |
| 166 | + "overlap": overlap_column_name, |
| 167 | + }, |
| 168 | + inplace=True, |
| 169 | + ) |
| 170 | + |
| 171 | + # Check if the feature table already exists |
| 172 | + if overlap_table_name in ome_zarr_container.list_tables(): |
| 173 | + base_table_container = ome_zarr_container.get_table( |
| 174 | + name=overlap_table_name, |
| 175 | + ) |
| 176 | + if base_table_container.table_type() != "feature_table": |
| 177 | + raise ValueError( |
| 178 | + f"The existing table {overlap_table_name} is not a " |
| 179 | + "FeatureTable. Cannot add overlap measurements to it." |
| 180 | + ) |
| 181 | + base_table = base_table_container.dataframe |
| 182 | + # If the table already contains the overlap measurement, drop it |
| 183 | + if parent_label_column_name in base_table.columns: |
| 184 | + base_table.drop( |
| 185 | + columns=[ |
| 186 | + parent_label_column_name, |
| 187 | + overlap_column_name, |
| 188 | + ], |
| 189 | + inplace=True, |
| 190 | + ) |
| 191 | + |
| 192 | + else: |
| 193 | + if overlap_table_name is None: |
| 194 | + overlap_table_name = f"{parent_label_name}_{child_label_name}_overlap" |
| 195 | + |
| 196 | + # Initialize a new empty table with label index |
| 197 | + labels = np.unique(child_label)[1:] # FIXME: Handle empty label image |
| 198 | + base_table = pd.DataFrame(index=labels) |
| 199 | + base_table.index.name = "label" |
| 200 | + |
| 201 | + # merge with child feature obs data |
| 202 | + merged_data = pd.merge( |
| 203 | + base_table, assignments, left_on="label", right_index=True, how="left" |
| 204 | + ) |
| 205 | + |
| 206 | + merged_table = FeatureTable( |
| 207 | + table_data=merged_data, |
| 208 | + reference_label=child_label_name, |
| 209 | + ) |
| 210 | + |
| 211 | + ome_zarr_container.add_table( |
| 212 | + name=overlap_table_name, |
| 213 | + table=merged_table, |
| 214 | + overwrite=True, |
| 215 | + ) |
| 216 | + |
| 217 | + |
| 218 | +if __name__ == "__main__": |
| 219 | + from fractal_task_tools.task_wrapper import run_fractal_task |
| 220 | + |
| 221 | + run_fractal_task( |
| 222 | + task_function=label_assignment_by_overlap, |
| 223 | + logger_name=logger.name, |
| 224 | + ) |
0 commit comments