Merge branch 'dev' of https://github.com/computational-cell-analytics/micro-sam into dev

constantinpape · constantinpape · commit 343361f17144 · 2023-08-10T22:40:07.000+02:00
diff --git a/finetuning/livecell/evaluation/iterative_prompting.py b/finetuning/livecell/evaluation/iterative_prompting.py
@@ -0,0 +1,52 @@
+import os
+from glob import glob
+
+from micro_sam.evaluation.inference import run_inference_with_iterative_prompting
+from micro_sam.evaluation.evaluation import run_evaluation
+
+from util import get_checkpoint, get_paths
+
+LIVECELL_GT_ROOT = "/scratch-grete/projects/nim00007/data/LiveCELL/annotations_corrected/livecell_test_images"
+# TODO update to make fit other models
+PREDICTION_ROOT = "./pred_interactive_prompting"
+
+
+def run_interactive_prompting():
+    prediction_root = PREDICTION_ROOT
+
+    checkpoint, model_type = get_checkpoint("vit_b")
+    image_paths, gt_paths = get_paths()
+
+    run_inference_with_iterative_prompting(
+        checkpoint, model_type, image_paths, gt_paths,
+        prediction_root, use_boxes=False, batch_size=16,
+    )
+
+
+def get_pg_paths(pred_folder):
+    pred_paths = sorted(glob(os.path.join(pred_folder, "*.tif")))
+    names = [os.path.split(path)[1] for path in pred_paths]
+    gt_paths = [
+        os.path.join(LIVECELL_GT_ROOT, name.split("_")[0], name) for name in names
+    ]
+    assert all(os.path.exists(pp) for pp in gt_paths)
+    return pred_paths, gt_paths
+
+
+def evaluate_interactive_prompting():
+    prediction_root = PREDICTION_ROOT
+    prediction_folders = sorted(glob(os.path.join(prediction_root, "iteration*")))
+    for pred_folder in prediction_folders:
+        print("Evaluating", pred_folder)
+        pred_paths, gt_paths = get_pg_paths(pred_folder)
+        res = run_evaluation(gt_paths, pred_paths, save_path=None)
+        print(res)
+
+
+def main():
+    # run_interactive_prompting()
+    evaluate_interactive_prompting()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/micro_sam/evaluation/inference.py b/micro_sam/evaluation/inference.py
@@ -1,5 +1,6 @@
 import os
 import pickle
+import warnings
 
 from copy import deepcopy
 from typing import Any, Dict, List, Optional, Union
@@ -15,8 +16,9 @@
 from segment_anything.utils.transforms import ResizeLongestSide
 
 from .. import util as util
-from ..training import get_trainable_sam_model, ConvertToSamInputs
+from ..instance_segmentation import mask_data_to_segmentation
 from ..prompt_generators import PointAndBoxPromptGenerator, IterativePromptGenerator
+from ..training import get_trainable_sam_model, ConvertToSamInputs
 
 
 def _load_prompts(
@@ -422,48 +424,127 @@ def run_inference_with_prompts(
             pickle.dump(cached_box_prompts, f)
 
 
-def run_inference_with_iterative_prompting(
-        image, gt, model_type, checkpoint_path, n_iterations, n_positive, n_negative,
-        use_boxes, device=None, _sigmoid=torch.nn.Sigmoid()
+def _save_segmentation(masks, prediction_path):
+    # masks to segmentation
+    masks = masks.cpu().numpy().squeeze().astype("bool")
+    shape = masks.shape[-2:]
+    masks = [{"segmentation": mask, "area": mask.sum()} for mask in masks]
+    segmentation = mask_data_to_segmentation(masks, shape, with_background=True)
+    imageio.imwrite(prediction_path, segmentation)
+
+
+def _run_inference_with_iterative_prompting_for_image(
+    model,
+    image,
+    gt,
+    n_iterations,
+    device,
+    use_boxes,
+    prediction_paths,
+    batch_size,
 ):
-    if device is None:
-        device = "cuda" if torch.cuda.is_available() else "cpu"
+    assert len(prediction_paths) == n_iterations, f"{len(prediction_paths)}, {n_iterations}"
+    to_sam_inputs = ConvertToSamInputs()
 
-    model = get_trainable_sam_model(model_type, checkpoint_path)
-    _to_sam_inputs = ConvertToSamInputs()
-    batched_inputs, sampled_ids = _to_sam_inputs(image, gt, n_positive, n_negative, use_boxes)
-    sampled_binary_y = [np.isin(gt, idx) for idx in sampled_ids]
+    image = torch.from_numpy(
+        image[None, None] if image.ndim == 2 else image[None]
+    )
+    gt = torch.from_numpy(gt[None].astype("int32"))
+
+    n_pos = 0 if use_boxes else 1
+    batched_inputs, sampled_ids = to_sam_inputs(image, gt, n_pos=n_pos, n_neg=0, get_boxes=use_boxes)
 
     input_images = torch.stack([model.preprocess(x=x["image"].to(device)) for x in batched_inputs], dim=0)
     image_embeddings = model.image_embeddings_oft(input_images)
 
+    multimasking = n_pos == 1
     prompt_generator = IterativePromptGenerator(device)
 
-    multimasking = False
-    if n_positive == 1 and n_negative == 0:
-        if not use_boxes:
-            multimasking = True
+    n_samples = len(sampled_ids[0])
+    n_batches = int(np.ceil(float(n_samples) / batch_size))
 
     for iteration in range(n_iterations):
-        batched_outputs = model(
-            batched_inputs,
-            multimask_output=multimasking if iteration == 0 else False,
-            image_embeddings=image_embeddings
-        )
+        final_masks = []
+        for batch_idx in range(n_batches):
+            batch_start = batch_idx * batch_size
+            batch_stop = min((batch_idx + 1) * batch_size, n_samples)
+
+            this_batched_inputs = [{
+                k: v[batch_start:batch_stop] if k in ("point_coords", "point_labels") else v
+                for k, v in batched_inputs[0].items()
+            }]
+
+            sampled_binary_y = torch.stack([
+                torch.stack([_gt == idx for idx in sampled[batch_start:batch_stop]])[:, None]
+                for _gt, sampled in zip(gt, sampled_ids)
+            ]).to(torch.float32)
+
+            batched_outputs = model(
+                this_batched_inputs,
+                multimask_output=multimasking if iteration == 0 else False,
+                image_embeddings=image_embeddings
+            )
+
+            masks, logits_masks = [], []
+            for m in batched_outputs:
+                mask, l_mask = [], []
+                for _m, _l, _iou in zip(m["masks"], m["low_res_masks"], m["iou_predictions"]):
+                    best_iou_idx = torch.argmax(_iou)
+                    mask.append(torch.sigmoid(_m[best_iou_idx][None]))
+                    l_mask.append(_l[best_iou_idx][None])
+                mask, l_mask = torch.stack(mask), torch.stack(l_mask)
+                masks.append(mask)
+                logits_masks.append(l_mask)
+
+            masks, logits_masks = torch.stack(masks), torch.stack(logits_masks)
+            masks = (masks > 0.5).to(torch.float32)
+            final_masks.append(masks)
+
+            for _pred, _gt, _inp, logits in zip(masks, sampled_binary_y, this_batched_inputs, logits_masks):
+                next_coords, next_labels = prompt_generator(_gt, _pred, _inp["point_coords"], _inp["point_labels"])
+                _inp["point_coords"], _inp["point_labels"], _inp["mask_inputs"] = next_coords, next_labels, logits
+
+        final_masks = torch.cat(final_masks, dim=1)
+        _save_segmentation(final_masks, prediction_paths[iteration])
+
 
-        masks, logits_masks = [], []
-        for m in batched_outputs:
-            mask, l_mask = [], []
-            for _m, _l, _iou in zip(m["masks"], m["low_res_masks"], m["iou_predictions"]):
-                best_iou_idx = torch.argmax(_iou)
-                mask.append(_sigmoid(_m[best_iou_idx][None]))
-                l_mask.append(_l[best_iou_idx][None])
-            mask, l_mask = torch.stack(mask), torch.stack(l_mask)
-            masks.append(mask)
-            logits_masks.append(l_mask)
-        masks, logits_masks = torch.stack(masks), torch.stack(logits_masks)
-        masks = (masks > 0.5).to(torch.float32)
-
-        for _pred, _gt, _inp, logits in zip(masks, sampled_binary_y, batched_inputs, logits_masks):
-            net_coords, net_labels = prompt_generator(_gt, _pred, _inp["point_coords"], _inp["point_labels"])
-            _inp["point_coords"], _inp["point_labels"], _inp["mask_inputs"] = net_coords, net_labels, logits
+def run_inference_with_iterative_prompting(
+    checkpoint_path: Union[str, os.PathLike],
+    model_type: str,
+    image_paths: List[Union[str, os.PathLike]],
+    gt_paths: List[Union[str, os.PathLike]],
+    prediction_root: Union[str, os.PathLike],
+    use_boxes: bool,
+    n_iterations: int = 8,
+    batch_size: int = 32,
+) -> None:
+    """@private"""
+    warnings.warn("The iterative prompting functionality is not working correctly yet.")
+
+    device = torch.device("cuda")
+    model = get_trainable_sam_model(model_type, checkpoint_path)
+
+    # create all prediction folders
+    for i in range(n_iterations):
+        os.makedirs(os.path.join(prediction_root, f"iteration{i:02}"), exist_ok=True)
+
+    for image_path, gt_path in tqdm(
+        zip(image_paths, gt_paths), total=len(image_paths), desc="Run inference with prompts"
+    ):
+        image_name = os.path.basename(image_path)
+
+        prediction_paths = [os.path.join(prediction_root, f"iteration{i:02}", image_name) for i in range(n_iterations)]
+        if all(os.path.exists(prediction_path) for prediction_path in prediction_paths):
+            continue
+
+        assert os.path.exists(image_path), image_path
+        assert os.path.exists(gt_path), gt_path
+
+        image = imageio.imread(image_path)
+        gt = imageio.imread(gt_path).astype("uint32")
+        gt = relabel_sequential(gt)[0]
+
+        with torch.no_grad():
+            _run_inference_with_iterative_prompting_for_image(
+                model, image, gt, n_iterations, device, use_boxes, prediction_paths, batch_size,
+            )
diff --git a/micro_sam/prompt_generators.py b/micro_sam/prompt_generators.py
@@ -199,56 +199,73 @@ def __init__(self, device=None):
         self.device = device if device is not None else "cuda" if torch.cuda.is_available() else "cpu"
 
     def get_positive_points(self, pos_region, overlap_region):
-        tmp_pos_loc = torch.where(pos_region)
-        # condiion below where there is no room for improvement for the model
-        # hence we put a positive point in the "already correct" regions
-        if torch.stack(tmp_pos_loc).shape[-1] == 0:
-            tmp_pos_loc = torch.where(overlap_region)
-
-        pos_index = np.random.choice(len(tmp_pos_loc[1]))
-        pos_coordinates = int(tmp_pos_loc[1][pos_index]), int(tmp_pos_loc[2][pos_index])
-        pos_coordinates = pos_coordinates[::-1]
-        pos_labels = 1
+        positive_locations = [torch.where(pos_reg) for pos_reg in pos_region]
+        # we may have objects withput a positive region (= missing true foreground)
+        # in this case we just sample a point where the model was already correct
+        positive_locations = [
+            torch.where(ovlp_reg) if len(pos_loc[0]) == 0 else pos_loc
+            for pos_loc, ovlp_reg in zip(positive_locations, overlap_region)
+        ]
+        # we sample one location for each object in the batch
+        sampled_indices = [np.random.choice(len(pos_loc[0])) for pos_loc in positive_locations]
+        # get the corresponding coordinates (Note that we flip the axis order here due to the expected order of SAM)
+        pos_coordinates = [
+            [pos_loc[-1][idx], pos_loc[-2][idx]] for pos_loc, idx in zip(positive_locations, sampled_indices)
+        ]
+
+        # make sure that we still have the correct batch size
+        assert len(pos_coordinates) == pos_region.shape[0]
+        pos_labels = [1] * len(pos_coordinates)
+
         return pos_coordinates, pos_labels
 
-    def get_negative_points(self, neg_region, true_object, gt):
-        tmp_neg_loc = torch.where(neg_region)
-        if torch.stack(tmp_neg_loc).shape[-1] == 0:
-            tmp_true_loc = torch.where(true_object)
-            x_coords, y_coords = tmp_true_loc[1], tmp_true_loc[2]
-            bbox = torch.stack([torch.min(x_coords), torch.min(y_coords),
-                                torch.max(x_coords) + 1, torch.max(y_coords) + 1])
-            bbox_mask = torch.zeros_like(true_object).squeeze(0)
-            bbox_mask[bbox[0]:bbox[2], bbox[1]:bbox[3]] = 1
-            bbox_mask = bbox_mask[None].to(self.device)
-
-            # NOTE: FIX: here we add dilation to the bbox because in some case we couldn't find objects at all
-            # TODO: just expand the pixels of bbox
-            dilated_bbox_mask = dilation(bbox_mask[None], torch.ones(3, 3).to(self.device)).squeeze(0)
-            background_mask = abs(dilated_bbox_mask - true_object)
-            tmp_neg_loc = torch.where(background_mask)
-
-            # there is a chance that the object is small to not return a decent-sized bounding box
-            # hence we might not find points sometimes there as well, hence we sample points from true background
-            if torch.stack(tmp_neg_loc).shape[-1] == 0:
-                tmp_neg_loc = torch.where(gt == 0)
+    # TODO get rid of this looped implementation and use proper batched computation instead
+    def get_negative_points(self, negative_region_batched, true_object_batched, gt_batched):
+        negative_coordinates, negative_labels = [], []
 
-        neg_index = np.random.choice(len(tmp_neg_loc[1]))
-        neg_coordinates = int(tmp_neg_loc[1][neg_index]), int(tmp_neg_loc[2][neg_index])
-        neg_coordinates = neg_coordinates[::-1]
-        neg_labels = 0
+        for neg_region, true_object, gt in zip(negative_region_batched, true_object_batched, gt_batched):
 
-        return neg_coordinates, neg_labels
+            tmp_neg_loc = torch.where(neg_region)
+            if torch.stack(tmp_neg_loc).shape[-1] == 0:
+                tmp_true_loc = torch.where(true_object)
+                x_coords, y_coords = tmp_true_loc[1], tmp_true_loc[2]
+                bbox = torch.stack([torch.min(x_coords), torch.min(y_coords),
+                                    torch.max(x_coords) + 1, torch.max(y_coords) + 1])
+                bbox_mask = torch.zeros_like(true_object).squeeze(0)
+                bbox_mask[bbox[0]:bbox[2], bbox[1]:bbox[3]] = 1
+                bbox_mask = bbox_mask[None].to(self.device)
+
+                # NOTE: FIX: here we add dilation to the bbox because in some case we couldn't find objects at all
+                # TODO: just expand the pixels of bbox
+                dilated_bbox_mask = dilation(bbox_mask[None], torch.ones(3, 3).to(self.device)).squeeze(0)
+                background_mask = abs(dilated_bbox_mask - true_object)
+                tmp_neg_loc = torch.where(background_mask)
+
+                # there is a chance that the object is small to not return a decent-sized bounding box
+                # hence we might not find points sometimes there as well, hence we sample points from true background
+                if torch.stack(tmp_neg_loc).shape[-1] == 0:
+                    tmp_neg_loc = torch.where(gt == 0)
+
+            neg_index = np.random.choice(len(tmp_neg_loc[1]))
+            neg_coordinates = [tmp_neg_loc[1][neg_index], tmp_neg_loc[2][neg_index]]
+            neg_coordinates = neg_coordinates[::-1]
+            neg_labels = 0
+
+            negative_coordinates.append(neg_coordinates)
+            negative_labels.append(neg_labels)
+
+        return negative_coordinates, negative_labels
 
     def __call__(
-            self,
-            gt,
-            object_mask,
-            current_points,
-            current_labels
+        self,
+        gt,
+        object_mask,
+        current_points,
+        current_labels
     ):
         """Generate the prompts for each object iteratively in the segmentation.
         """
+        assert gt.shape == object_mask.shape
         true_object = gt.to(self.device)
         expected_diff = (object_mask - true_object)
         neg_region = (expected_diff == 1).to(torch.float)
@@ -257,8 +274,12 @@ def __call__(
 
         pos_coordinates, pos_labels = self.get_positive_points(pos_region, overlap_region)
         neg_coordinates, neg_labels = self.get_negative_points(neg_region, true_object, gt)
+        assert len(pos_coordinates) == len(pos_labels) == len(neg_coordinates) == len(neg_labels)
+
+        pos_coordinates, neg_coordinates = torch.tensor(pos_coordinates)[:, None], torch.tensor(neg_coordinates)[:, None]
+        pos_labels, neg_labels = torch.tensor(pos_labels)[:, None], torch.tensor(neg_labels)[:, None]
 
-        net_coords = torch.cat([current_points, torch.tensor([[pos_coordinates, neg_coordinates]])], dim=1)
-        net_labels = torch.cat([current_labels, torch.tensor([[pos_labels, neg_labels]])], dim=1)
+        net_coords = torch.cat([current_points, pos_coordinates, neg_coordinates], dim=1)
+        net_labels = torch.cat([current_labels, pos_labels, neg_labels], dim=1)
 
         return net_coords, net_labels
diff --git a/micro_sam/training/util.py b/micro_sam/training/util.py
@@ -153,8 +153,9 @@ def __call__(self, x, y, n_pos, n_neg, get_boxes=False, n_samples=None):
             gt = gt.squeeze().numpy().astype(np.int32)
             point_coordinates, bbox_coordinates = get_centers_and_bounding_boxes(gt)
 
+            this_n_samples = len(point_coordinates) if n_samples is None else n_samples
             box_prompts, point_prompts, point_label_prompts, sampled_cell_ids = self._get_prompt_lists(
-                gt, n_samples,
+                gt, this_n_samples,
                 n_pos, n_neg,
                 get_boxes,
                 get_points,