diff --git a/pyproject.toml b/pyproject.toml index cc0bcfb..de49369 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ dependencies = [ "ngio>=0.4.1, <0.5.0", "fractal-task-tools==0.0.12", "pyarrow<21.0.0", + "scikit-image" ] # Optional dependencies (e.g. for `pip install -e ".[dev]"`, see diff --git a/src/fractal_helper_tasks/__FRACTAL_MANIFEST__.json b/src/fractal_helper_tasks/__FRACTAL_MANIFEST__.json index 8aae4db..bddc7ac 100644 --- a/src/fractal_helper_tasks/__FRACTAL_MANIFEST__.json +++ b/src/fractal_helper_tasks/__FRACTAL_MANIFEST__.json @@ -249,6 +249,64 @@ "title": "AddZSingleton" }, "docs_link": "https://github.com/jluethi/fractal-helper-tasks" + }, + { + "name": "Assign Label by Overlap", + "tags": [ + "Label assignment", + "Label processing" + ], + "docs_info": "### Purpose\n- Assigns labels of child label to the parent label based on overlap.\n- Uses a threshold and fills NA for labels with no sufficient overlap.\n- When multiple parent labels overlap with a child label, assigns the parent label with the maximum overlap.\n\n### Outputs\n- A new FeatureTable or an addition to an existing FeatureTable with the overlap measurements & assignments.\n\n### Limitations\n- Only processes OME-Zarrs where both parent and child label are in the same OME-Zarr image (in some multiplexing scenarios, those labels are in different images).", + "type": "parallel", + "executable_parallel": "label_assignment_by_overlap.py", + "meta_parallel": { + "cpus_per_task": 1, + "mem": 4000 + }, + "args_schema_parallel": { + "additionalProperties": false, + "properties": { + "zarr_url": { + "title": "Zarr Url", + "type": "string", + "description": "Path or url to the individual OME-Zarr image to be processed. (standard argument for Fractal tasks, managed by Fractal server)." + }, + "parent_label_name": { + "title": "Parent Label Name", + "type": "string", + "description": "Name of the parent label." + }, + "child_label_name": { + "title": "Child Label Name", + "type": "string", + "description": "Name of the child label. This label will be assigned to the parent label based on overlap. The parent label will appear in the child feature table as the \"(parent_label_name)_label\" column in the obs table of the anndata table." + }, + "overlap_threshold": { + "default": 1.0, + "title": "Overlap Threshold", + "type": "number", + "description": "The minimum percentage (between 0 and 1) of child label object that must be contained in parent label object to be considered a match." + }, + "overlap_table_name": { + "title": "Overlap Table Name", + "type": "string", + "description": "Name of the feature table to which the overlap should be added. If the feature table already exists, the overlap measurements are added to it. Otherwise, an overlap table is created. If no name was given, a new table named parent_label_name + child_label_name + '_overlap' is created." + }, + "level_path": { + "title": "Level Path", + "type": "string", + "description": "Resolution of the label image to calculate overlap. Full resolution label is used by default. Typically overriden with 0 (full resolution), 1 (half resolution), etc." + } + }, + "required": [ + "zarr_url", + "parent_label_name", + "child_label_name" + ], + "type": "object", + "title": "LabelAssignmentByOverlap" + }, + "docs_link": "https://github.com/jluethi/fractal-helper-tasks" } ], "has_args_schemas": true, diff --git a/src/fractal_helper_tasks/dev/docs_info/label_assignment_by_overlap.md b/src/fractal_helper_tasks/dev/docs_info/label_assignment_by_overlap.md new file mode 100644 index 0000000..98126eb --- /dev/null +++ b/src/fractal_helper_tasks/dev/docs_info/label_assignment_by_overlap.md @@ -0,0 +1,10 @@ +### Purpose +- Assigns labels of child label to the parent label based on overlap. +- Uses a threshold and fills NA for labels with no sufficient overlap. +- When multiple parent labels overlap with a child label, assigns the parent label with the maximum overlap. + +### Outputs +- A new FeatureTable or an addition to an existing FeatureTable with the overlap measurements & assignments. + +### Limitations +- Only processes OME-Zarrs where both parent and child label are in the same OME-Zarr image (in some multiplexing scenarios, those labels are in different images). \ No newline at end of file diff --git a/src/fractal_helper_tasks/dev/task_list.py b/src/fractal_helper_tasks/dev/task_list.py index 4b5b461..a57e115 100644 --- a/src/fractal_helper_tasks/dev/task_list.py +++ b/src/fractal_helper_tasks/dev/task_list.py @@ -43,4 +43,11 @@ tags=["Singleton Z dimension"], docs_info="file:docs_info/drop_t_dimension.md", ), + ParallelTask( + name="Assign Label by Overlap", + executable="label_assignment_by_overlap.py", + meta={"cpus_per_task": 1, "mem": 4000}, + tags=["Label assignment", "Label processing"], + docs_info="file:docs_info/label_assignment_by_overlap.md", + ), ] diff --git a/src/fractal_helper_tasks/label_assignment_by_overlap.py b/src/fractal_helper_tasks/label_assignment_by_overlap.py new file mode 100644 index 0000000..a499feb --- /dev/null +++ b/src/fractal_helper_tasks/label_assignment_by_overlap.py @@ -0,0 +1,224 @@ +# Copyright 2024 (C) BioVisionCenter, University of Zurich +# +# Original authors: +# Joel Lüthi +# Derived from APx label assignment task +# https://github.com/Apricot-Therapeutics/APx_fractal_task_collection/blob/main/src/apx_fractal_task_collection/tasks/label_assignment_by_overlap.py +# Simplified to avoid compound task and updated to use ngio +"""Task to assign labels based on overlap between two label images.""" + +import logging +from typing import Optional + +import ngio +import numpy as np +import pandas as pd +from ngio.tables import FeatureTable +from pydantic import validate_call +from skimage.measure import regionprops_table + +logger = logging.getLogger(__name__) + + +def label_overlap( + regionmask: np.ndarray, + intensity_image: np.ndarray, +) -> list[float, float]: + """Calculates label overlap between 2 numpy arrays. + + Scikit-image regionprops_table extra_properties function to compute + max overlap between two label images. Based on APX implementation. + + regionmask: 2D numpy array of labels, where each label corresponds to a + child object. + intensity_image: 2D numpy array with parent objects. + """ + parent_labels = np.where(regionmask > 0, intensity_image, 0) + + labels, counts = np.unique(parent_labels[parent_labels > 0], return_counts=True) + + if len(labels > 0): + # if there is a tie in the overlap, the first label is selected + max_label = labels[np.argmax(counts)] + max_label_area = counts.max() + child_area = regionmask[regionmask > 0].size + overlap = max_label_area / child_area + else: + max_label = np.nan + overlap = np.nan + + return [max_label, overlap] + + +def assign_objects( + parent_label: np.ndarray, + child_label: np.ndarray, + overlap_threshold=1.0, +) -> pd.DataFrame: + """Assigns objects of child label to parent label based on overlap. + + Calculate the overlap between labels in label_a and label_b, + and return a DataFrame of matching labels. Based on APX implementation. + + label_a: 4D numpy array. + label_b: 4D numpy array. + overlap_threshold: float, the minimum fraction of child label object that + must be contained in parent label object to be considered a match. + """ + parent_label = np.squeeze(parent_label) + child_label = np.squeeze(child_label) + + t = pd.DataFrame( + regionprops_table( + child_label, + parent_label, + properties=["label"], + extra_properties=[label_overlap], + ) + ) + + t.columns = ["child_label", "parent_label", "overlap"] + t.loc[t.overlap < overlap_threshold, "parent_label"] = np.nan + t["parent_label"] = t["parent_label"].astype("Int32") + t.set_index("child_label", inplace=True) + + return t + + +@validate_call +def label_assignment_by_overlap( + *, + # Default arguments for fractal tasks: + zarr_url: str, + # Task-specific arguments: + parent_label_name: str, + child_label_name: str, + overlap_threshold: float = 1.0, + overlap_table_name: Optional[str] = None, + level_path: Optional[str] = None, +): + """Assign labels to each other based on overlap. + + Takes a parent label image and a child label image and calculates + overlaps between their labels. Child labels will be assigned to parent + labels based on an overlap threshold. + + Args: + zarr_url: Path or url to the individual OME-Zarr image to be processed. + (standard argument for Fractal tasks, managed by Fractal server). + parent_label_name: Name of the parent label. + child_label_name: Name of the child label. This label will be assigned + to the parent label based on overlap. The parent label will appear + in the child feature table as the "(parent_label_name)_label" + column in the obs table of the anndata table. + overlap_threshold: The minimum percentage (between 0 and 1) of child + label object that must be contained in parent label object to + be considered a match. + overlap_table_name: Name of the feature table to which the overlap + should be added. If the feature table already exists, the overlap + measurements are added to it. Otherwise, an overlap table is + created. If no name was given, a new table named parent_label_name + + child_label_name + '_overlap' is created. + level_path: Resolution of the label image to calculate overlap. Full + resolution label is used by default. Typically overriden with + 0 (full resolution), 1 (half resolution), etc. + + """ + ome_zarr_container = ngio.open_ome_zarr_container(zarr_url) + child_label_container = ome_zarr_container.get_label( + name=child_label_name, path=level_path + ) + child_label = child_label_container.get_array() + + # if there are no child labels, assignments will be all NaN + if np.unique(child_label).size == 1: + assignments = pd.DataFrame( + {"parent_label": pd.NA, "overlap": pd.NA}, index=pd.Index([]) + ) + logger.info( + f"Label image was empty for child label {child_label_name}. " + "No labels could be matched." + ) + + else: + # Load the parent label image at the resolution of the child label image + parent_label = ome_zarr_container.get_label( + name=parent_label_name, + pixel_size=child_label_container.pixel_size, + ).get_array() + # make the assignment + logger.info( + "Calculating label assignments with overlap threshold " + f"{overlap_threshold}." + ) + assignments = assign_objects( + parent_label, + child_label, + overlap_threshold, + ) + + parent_label_column_name = f"{parent_label_name}_label" + overlap_column_name = f"{child_label_name}_{parent_label_name}_overlap" + + assignments.rename( + columns={ + "parent_label": parent_label_column_name, + "overlap": overlap_column_name, + }, + inplace=True, + ) + + # Check if the feature table already exists + if overlap_table_name in ome_zarr_container.list_tables(): + base_table_container = ome_zarr_container.get_table( + name=overlap_table_name, + ) + if base_table_container.table_type() != "feature_table": + raise ValueError( + f"The existing table {overlap_table_name} is not a " + "FeatureTable. Cannot add overlap measurements to it." + ) + base_table = base_table_container.dataframe + # If the table already contains the overlap measurement, drop it + if parent_label_column_name in base_table.columns: + base_table.drop( + columns=[ + parent_label_column_name, + overlap_column_name, + ], + inplace=True, + ) + + else: + if overlap_table_name is None: + overlap_table_name = f"{parent_label_name}_{child_label_name}_overlap" + + # Initialize a new empty table with label index + labels = np.unique(child_label)[1:] # FIXME: Handle empty label image + base_table = pd.DataFrame(index=labels) + base_table.index.name = "label" + + # merge with child feature obs data + merged_data = pd.merge( + base_table, assignments, left_on="label", right_index=True, how="left" + ) + + merged_table = FeatureTable( + table_data=merged_data, + reference_label=child_label_name, + ) + + ome_zarr_container.add_table( + name=overlap_table_name, + table=merged_table, + overwrite=True, + ) + + +if __name__ == "__main__": + from fractal_task_tools.task_wrapper import run_fractal_task + + run_fractal_task( + task_function=label_assignment_by_overlap, + logger_name=logger.name, + ) diff --git a/tests/test_label_assignment_by_overlap.py b/tests/test_label_assignment_by_overlap.py new file mode 100644 index 0000000..fedf8e6 --- /dev/null +++ b/tests/test_label_assignment_by_overlap.py @@ -0,0 +1,219 @@ +"""Test label assignment by overlap task.""" + +from pathlib import Path + +import ngio +import numpy as np +import pandas as pd +import pytest +from ngio.tables import FeatureTable + +from fractal_helper_tasks.label_assignment_by_overlap import ( + label_assignment_by_overlap, +) + + +@pytest.mark.parametrize( + ["overlap_threshold", "expected_assignment"], + [(0.8, [1, 2, 3, np.nan, 2]), (1.0, [1, 2, 3, np.nan, np.nan])], +) +def test_label_assignment_by_overlap_new_table( + tmp_path: Path, + overlap_threshold: float, + expected_assignment: list[int], +): + zarr_url = str(tmp_path / "my_zarr.zarr") + orig_dimensions = (1, 100, 100) + orig_axes_names = ["c", "y", "x"] + parent_label_name = "parent_label" + child_label_name = "child_label" + overlap_table_name = "overlap_table" + + ome_zarr_container = ngio.create_ome_zarr_from_array( + store=zarr_url, + array=np.zeros(orig_dimensions), + xy_pixelsize=0.5, + axes_names=orig_axes_names, + overwrite=True, + ) + + parent_label_array = np.zeros(orig_dimensions[1:], dtype=np.uint32) + parent_label_array[0:50, 0:50] = 1 + parent_label_array[0:50, 50:100] = 2 + parent_label_array[50:100, 0:50] = 3 + + child_label_array = np.zeros(orig_dimensions[1:], dtype=np.uint32) + child_label_array[10:30, 10:30] = 1 + child_label_array[10:30, 70:90] = 2 + child_label_array[70:90, 10:30] = 3 + child_label_array[70:90, 70:90] = 4 + child_label_array[10:30, 45:80] = 5 + + parent_label = ome_zarr_container.derive_label( + name=parent_label_name, + ) + parent_label.set_array(patch=parent_label_array) + + child_label = ome_zarr_container.derive_label( + name=child_label_name, + ) + child_label.set_array(patch=child_label_array) + + label_assignment_by_overlap( + zarr_url=zarr_url, + parent_label_name=parent_label_name, + child_label_name=child_label_name, + overlap_threshold=overlap_threshold, + overlap_table_name=overlap_table_name, + ) + + new_ome_zarr_container = ngio.open_ome_zarr_container(zarr_url) + assert overlap_table_name in new_ome_zarr_container.list_tables() + overlap_table = new_ome_zarr_container.get_table_as( + name=overlap_table_name, + table_cls=FeatureTable, + ).dataframe + assert len(overlap_table) == 5 + expected_output_columns = [ + f"{child_label_name}_{parent_label_name}_overlap", + f"{parent_label_name}_label", + ] + assert overlap_table.columns.tolist() == expected_output_columns + assigned_overlaps = overlap_table[f"{parent_label_name}_label"].to_numpy( + dtype=float + ) + assert np.allclose(assigned_overlaps, expected_assignment, equal_nan=True) + + +def test_label_assignment_by_overlap_existing_feature_table( + tmp_path: Path, +): + zarr_url = str(tmp_path / "my_zarr.zarr") + orig_dimensions = (1, 100, 100) + orig_axes_names = ["c", "y", "x"] + parent_label_name = "parent_label" + child_label_name = "child_label" + overlap_table_name = "overlap_table" + + ome_zarr_container = ngio.create_ome_zarr_from_array( + store=zarr_url, + array=np.zeros(orig_dimensions), + xy_pixelsize=0.5, + axes_names=orig_axes_names, + overwrite=True, + ) + + parent_label_array = np.zeros(orig_dimensions[1:], dtype=np.uint32) + parent_label_array[0:50, 0:50] = 1 + parent_label_array[0:50, 50:100] = 2 + parent_label_array[50:100, 0:50] = 3 + + child_label_array = np.zeros(orig_dimensions[1:], dtype=np.uint32) + child_label_array[10:30, 10:30] = 1 + child_label_array[10:30, 70:90] = 2 + child_label_array[70:90, 10:30] = 3 + child_label_array[70:90, 70:90] = 4 + child_label_array[10:30, 45:80] = 5 + + parent_label = ome_zarr_container.derive_label( + name=parent_label_name, + ) + parent_label.set_array(patch=parent_label_array) + + child_label = ome_zarr_container.derive_label( + name=child_label_name, + ) + child_label.set_array(patch=child_label_array) + + # Create existing tables + base_table = pd.DataFrame( + { + "some_other_measurement": [10, 20, 30, 40, 50], + }, + index=[1, 2, 3, 4, 5], + ) + base_table.index.name = "label" + + feature_table = FeatureTable( + table_data=base_table, + reference_label=child_label_name, + ) + ome_zarr_container.add_table( + name=overlap_table_name, + table=feature_table, + overwrite=True, + ) + + label_assignment_by_overlap( + zarr_url=zarr_url, + parent_label_name=parent_label_name, + child_label_name=child_label_name, + overlap_threshold=1.0, + overlap_table_name=overlap_table_name, + ) + + new_ome_zarr_container = ngio.open_ome_zarr_container(zarr_url) + assert overlap_table_name in new_ome_zarr_container.list_tables() + overlap_table = new_ome_zarr_container.get_table_as( + name=overlap_table_name, + table_cls=FeatureTable, + ).dataframe + assert overlap_table.shape == (5, 3) + + +def test_label_assignment_by_overlap_existing_roi_table( + tmp_path: Path, +): + zarr_url = str(tmp_path / "my_zarr.zarr") + orig_dimensions = (1, 100, 100) + orig_axes_names = ["c", "y", "x"] + parent_label_name = "parent_label" + child_label_name = "child_label" + overlap_table_name = "image_roi_table" + + ome_zarr_container = ngio.create_ome_zarr_from_array( + store=zarr_url, + array=np.zeros(orig_dimensions), + xy_pixelsize=0.5, + axes_names=orig_axes_names, + overwrite=True, + ) + + parent_label_array = np.zeros(orig_dimensions[1:], dtype=np.uint32) + parent_label_array[0:50, 0:50] = 1 + parent_label_array[0:50, 50:100] = 2 + parent_label_array[50:100, 0:50] = 3 + + child_label_array = np.zeros(orig_dimensions[1:], dtype=np.uint32) + child_label_array[10:30, 10:30] = 1 + child_label_array[10:30, 70:90] = 2 + child_label_array[70:90, 10:30] = 3 + child_label_array[70:90, 70:90] = 4 + child_label_array[10:30, 45:80] = 5 + + parent_label = ome_zarr_container.derive_label( + name=parent_label_name, + ) + parent_label.set_array(patch=parent_label_array) + + child_label = ome_zarr_container.derive_label( + name=child_label_name, + ) + child_label.set_array(patch=child_label_array) + + # Create existing table + roi_table = ome_zarr_container.build_image_roi_table(name=overlap_table_name) + + ome_zarr_container.add_table( + name=overlap_table_name, + table=roi_table, + overwrite=True, + ) + with pytest.raises(ValueError): + label_assignment_by_overlap( + zarr_url=zarr_url, + parent_label_name=parent_label_name, + child_label_name=child_label_name, + overlap_threshold=1.0, + overlap_table_name=overlap_table_name, + )