Merge pull request #2 from computational-cell-analytics/embedding-instance-seg

constantinpape · web-flow · commit 6eed43fd5918 · 2023-05-07T19:34:07.000+02:00
Add initial version of embedding based instance segmentation
diff --git a/examples/instance_segmentation.py b/examples/instance_segmentation.py
@@ -0,0 +1,60 @@
+import micro_sam.util as util
+import napari
+
+from elf.io import open_file
+from micro_sam.segment_instances import segment_from_embeddings
+from micro_sam.visualization import compute_pca
+
+
+def mito_segmentation():
+    input_path = "./data/Lucchi++/Test_In"
+    with open_file(input_path) as f:
+        raw = f["*.png"][-1, :768, :768]
+
+    predictor = util.get_sam_model()
+    image_embeddings = util.precompute_image_embeddings(predictor, raw, "./embeddings/embeddings-mito2d.zarr")
+    embedding_pca = compute_pca(image_embeddings["features"])
+
+    seg, initial_seg = segment_from_embeddings(predictor, image_embeddings=image_embeddings, return_initial_seg=True)
+
+    v = napari.Viewer()
+    v.add_image(raw)
+    v.add_image(embedding_pca, scale=(12, 12))
+    v.add_labels(seg)
+    v.add_labels(initial_seg)
+    napari.run()
+
+
+def cell_segmentation():
+    path = "./DIC-C2DH-HeLa/train/01"
+    with open_file(path, mode="r") as f:
+        timeseries = f["*.tif"][:50]
+
+    frame = 11
+
+    predictor = util.get_sam_model()
+    image_embeddings = util.precompute_image_embeddings(predictor, timeseries, "./embeddings/embeddings-ctc.zarr")
+    embedding_pca = compute_pca(image_embeddings["features"][frame])
+
+    seg, initial_seg = segment_from_embeddings(
+        predictor, image_embeddings=image_embeddings, i=frame, return_initial_seg=True
+    )
+
+    v = napari.Viewer()
+    v.add_image(timeseries[frame])
+    v.add_image(embedding_pca, scale=(8, 8))
+    v.add_labels(seg)
+    v.add_labels(initial_seg)
+    napari.run()
+
+
+def main():
+    # automatic segmentation for the data from Lucchi et al. (see 'sam_annotator_3d.py')
+    # mito_segmentation()
+
+    # automatic segmentation for data from the cell tracking challenge (see 'sam_annotator_tracking.py')
+    cell_segmentation()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/sam_annotator_2d.py b/examples/sam_annotator_2d.py
@@ -2,12 +2,21 @@
 from micro_sam.sam_annotator import annotator_2d
 
 
-def main():
+# TODO describe how to get the data and don't use hard-coded system path
+def livecell_annotator():
     im = imageio.imread(
         "/home/pape/Work/data/incu_cyte/livecell/images/livecell_test_images/A172_Phase_C7_1_01d04h00m_4.tif"
     )
     embedding_path = "./embeddings/embeddings-livecell_cropped.zarr"
-    annotator_2d(im, embedding_path, show_embeddings=False)
+    annotator_2d(im, embedding_path, show_embeddings=True)
+
+
+def main():
+    # 2d annotator for livecell data
+    # livecell_annotator()
+
+    # 2d annotator for cell tracking challenge hela data
+    hela_2d_annotator()
 
 
 if __name__ == "__main__":
diff --git a/examples/sam_annotator_tracking.py b/examples/sam_annotator_tracking.py
@@ -1,23 +1,40 @@
 from glob import glob
 
-import h5py
 import numpy as np
+from elf.io import open_file
 from micro_sam.sam_annotator import annotator_tracking
 
 
-def main():
+def track_incucyte_data():
     pattern = "/home/pape/Work/data/incu_cyte/carmello/videos/MiaPaCa_flat_B3-3_registered/image-*"
     paths = glob(pattern)
     paths.sort()
 
     timeseries = []
     for p in paths[:45]:
-        with h5py.File(p) as f:
+        with open_file(p, mode="r") as f:
             timeseries.append(f["phase-contrast"][:])
     timeseries = np.stack(timeseries)
 
     annotator_tracking(timeseries, embedding_path="./embeddings/embeddings-tracking.zarr", show_embeddings=False)
 
 
+# TODO describe how to get the data from CTC
+def track_ctc_data():
+    path = "./data/DIC-C2DH-HeLa/train/01"
+    with open_file(path, mode="r") as f:
+        timeseries = f["*.tif"][:50]
+
+    annotator_tracking(timeseries, embedding_path="./embeddings/embeddings-ctc.zarr")
+
+
+def main():
+    # private data used for initial tests
+    # track_incucyte_data()
+
+    # data from the cell tracking challenges
+    track_ctc_data()
+
+
 if __name__ == "__main__":
     main()
diff --git a/micro_sam/sam_annotator/annotator_2d.py b/micro_sam/sam_annotator/annotator_2d.py
@@ -6,6 +6,7 @@
 
 from .. import util
 from ..visualization import project_embeddings_for_visualization
+from ..segment_instances import segment_from_embeddings
 from ..segment_from_prompts import segment_from_points
 from .util import commit_segmentation_widget, create_prompt_menu, prompt_layer_to_points
 
@@ -20,13 +21,22 @@ def segment_wigdet(v: Viewer):
     v.layers["current_object"].refresh()
 
 
+# TODO enable choosing setting the segmentation method and setting other params
+@magicgui(call_button="Segment All Objects")
+def autosegment_widget(v: Viewer):
+    # choose if we segment with/without tiling based on the image shape
+    seg = segment_from_embeddings(PREDICTOR, IMAGE_EMBEDDINGS)
+    v.layers["auto_segmentation"].data = seg
+    v.layers["auto_segmentation"].refresh()
+
+
 def annotator_2d(raw, embedding_path=None, show_embeddings=False, segmentation_result=None):
     # for access to the predictor and the image embeddings in the widgets
-    global PREDICTOR
+    global PREDICTOR, IMAGE_EMBEDDINGS
 
     PREDICTOR = util.get_sam_model()
-    image_embeddings = util.precompute_image_embeddings(PREDICTOR, raw, save_path=embedding_path)
-    util.set_precomputed(PREDICTOR, image_embeddings)
+    IMAGE_EMBEDDINGS = util.precompute_image_embeddings(PREDICTOR, raw, save_path=embedding_path)
+    util.set_precomputed(PREDICTOR, IMAGE_EMBEDDINGS)
 
     #
     # initialize the viewer and add layers
@@ -35,6 +45,7 @@ def annotator_2d(raw, embedding_path=None, show_embeddings=False, segmentation_r
     v = Viewer()
 
     v.add_image(raw)
+    v.add_labels(data=np.zeros(raw.shape, dtype="uint32"), name="auto_segmentation")
     if segmentation_result is None:
         v.add_labels(data=np.zeros(raw.shape, dtype="uint32"), name="committed_objects")
     else:
@@ -43,7 +54,7 @@ def annotator_2d(raw, embedding_path=None, show_embeddings=False, segmentation_r
 
     # show the PCA of the image embeddings
     if show_embeddings:
-        embedding_vis, scale = project_embeddings_for_visualization(image_embeddings["features"], raw.shape)
+        embedding_vis, scale = project_embeddings_for_visualization(IMAGE_EMBEDDINGS["features"], raw.shape)
         v.add_image(embedding_vis, name="embeddings", scale=scale)
 
     labels = ["positive", "negative"]
@@ -65,11 +76,12 @@ def annotator_2d(raw, embedding_path=None, show_embeddings=False, segmentation_r
     # add the widgets
     #
 
-    # TODO add (optional) auto-segmentation functionality
-
     prompt_widget = create_prompt_menu(prompts, labels)
     v.window.add_dock_widget(prompt_widget)
 
+    # (optional) auto-segmentation functionality
+    v.window.add_dock_widget(autosegment_widget)
+
     v.window.add_dock_widget(segment_wigdet)
     v.window.add_dock_widget(commit_segmentation_widget)
 
diff --git a/micro_sam/sam_annotator/util.py b/micro_sam/sam_annotator/util.py
@@ -7,20 +7,23 @@
 from ..segment_from_prompts import segment_from_points
 
 
-@magicgui(call_button="Commit [C]")
-def commit_segmentation_widget(v: Viewer):
-    seg = v.layers["current_object"].data
+@magicgui(call_button="Commit [C]", layer={"choices": ["current_object", "auto_segmentation"]})
+def commit_segmentation_widget(v: Viewer, layer: str = "current_object"):
+    seg = v.layers[layer].data
 
-    next_id = int(v.layers["committed_objects"].data.max() + 1)
-    v.layers["committed_objects"].data[seg == 1] = next_id
+    id_offset = int(v.layers["committed_objects"].data.max())
+    mask = seg != 0
+
+    v.layers["committed_objects"].data[mask] = (seg[mask] + id_offset)
     v.layers["committed_objects"].refresh()
 
     shape = v.layers["raw"].data.shape
-    v.layers["current_object"].data = np.zeros(shape, dtype="uint32")
-    v.layers["current_object"].refresh()
+    v.layers[layer].data = np.zeros(shape, dtype="uint32")
+    v.layers[layer].refresh()
 
-    v.layers["prompts"].data = []
-    v.layers["prompts"].refresh()
+    if layer == "current_object":
+        v.layers["prompts"].data = []
+        v.layers["prompts"].refresh()
 
 
 def create_prompt_menu(points_layer, labels):
diff --git a/micro_sam/segment_instances.py b/micro_sam/segment_instances.py
@@ -0,0 +1,102 @@
+import numpy as np
+import vigra
+
+from elf.segmentation import embeddings as embed
+from skimage.transform import resize
+try:
+    from napari.utils import progress as tqdm
+except ImportError:
+    from tqdm import tqdm
+
+from . import util
+from .segment_from_prompts import segment_from_mask
+
+
+#
+# Original SegmentAnything instance segmentation functionality
+#
+
+
+# TODO implement automatic instance segmentation based on the functionalities from segment anything:
+# https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/automatic_mask_generator.py
+
+
+#
+# Instance segmentation from embeddings
+#
+
+
+def _refine_initial_segmentation(predictor, initial_seg, image_embeddings, i, verbose):
+    util.set_precomputed(predictor, image_embeddings, i)
+
+    original_size = image_embeddings["original_size"]
+    seg = np.zeros(original_size, dtype="uint32")
+
+    seg_ids = np.unique(initial_seg)
+    # TODO be smarter for overlapping masks, (use automatic_mask_generation from SAM as template)
+    for seg_id in tqdm(seg_ids[1:], disable=not verbose, desc="Refine masks for automatic instance segmentation"):
+        mask = (initial_seg == seg_id)
+        assert mask.shape == (256, 256)
+        refined = segment_from_mask(predictor,  mask, original_size=original_size).squeeze()
+        assert refined.shape == seg.shape
+        seg[refined.squeeze()] = seg_id
+
+        # import napari
+        # v = napari.Viewer()
+        # v.add_image(mask)
+        # v.add_labels(refined)
+        # napari.run()
+
+    return seg
+
+
+# This is a first prototype for generating automatic instance segmentations from the image embeddings
+# predicted by the segment anything image encoder.
+
+# Main challenge: the larger the image the worse this will get because of the fixed embedding size.
+# Ideas:
+# - Can we get intermediate, larger embeddings from SAM?
+# - Can we run the encoder in a sliding window and somehow stitch the embeddings?
+# - Or: run the encoder in a sliding window and stitch the initial segmentation result.
+def segment_from_embeddings(
+    predictor, image_embeddings, size_threshold=10, i=None,
+    offsets=[[-1, 0], [0, -1], [-3, 0], [0, -3]], distance_type="l2", bias=0.0,
+    verbose=True, return_initial_seg=False,
+):
+    util.set_precomputed(predictor, image_embeddings, i)
+
+    embeddings = predictor.get_image_embedding().squeeze().cpu().numpy()
+    assert embeddings.shape == (256, 64, 64), f"{embeddings.shape}"
+    initial_seg = embed.segment_embeddings_mws(
+        embeddings, distance_type=distance_type, offsets=offsets, bias=bias
+    ).astype("uint32")
+    assert initial_seg.shape == (64, 64), f"{initial_seg.shape}"
+
+    # filter out small objects
+    seg_ids, sizes = np.unique(initial_seg, return_counts=True)
+    initial_seg[np.isin(initial_seg, seg_ids[sizes < size_threshold])] = 0
+    vigra.analysis.relabelConsecutive(initial_seg, out=initial_seg)
+
+    # resize to 256 x 256, which is the mask input expected by SAM
+    initial_seg = resize(
+        initial_seg, (256, 256), order=0, preserve_range=True, anti_aliasing=False
+    ).astype(initial_seg.dtype)
+    seg = _refine_initial_segmentation(predictor, initial_seg, image_embeddings, i, verbose)
+
+    if return_initial_seg:
+        initial_seg = resize(
+            initial_seg, seg.shape, order=0, preserve_range=True, anti_aliasing=False
+        ).astype(seg.dtype)
+        return seg, initial_seg
+    else:
+        return seg
+
+
+# TODO
+def segment_from_embeddings_with_tiling(
+    predictor, image, image_embeddings, tile_shape=(256, 256), tile_overlap=(32, 32),
+    size_threshold=10, i=None,
+    offsets=[[-1, 0], [0, -1], [-3, 0], [0, -3]], distance_type="l2", bias=0.0,
+    verbose=True, return_initial_seg=False,
+):
+    pass