Extend support for automatic seg functionality to return embeddings (#855)

anwai98 · web-flow · commit 01756a093132 · 2025-02-06T23:02:16.000+01:00
Extend support for automatic segmentation functionality to return embeddings
diff --git a/micro_sam/automatic_segmentation.py b/micro_sam/automatic_segmentation.py
@@ -74,6 +74,7 @@ def automatic_instance_segmentation(
     tile_shape: Optional[Tuple[int, int]] = None,
     halo: Optional[Tuple[int, int]] = None,
     verbose: bool = True,
+    return_embeddings: bool = False,
     **generate_kwargs
 ) -> np.ndarray:
     """Run automatic segmentation for the input image.
@@ -92,6 +93,7 @@ def automatic_instance_segmentation(
         tile_shape: Shape of the tiles for tiled prediction. By default prediction is run without tiling.
         halo: Overlap of the tiles for tiled prediction.
         verbose: Verbosity flag.
+        return_embeddings: Whether to return the precomputed image embeddings.
         generate_kwargs: optional keyword arguments for the generate function of the AMG or AIS class.
 
     Returns:
@@ -142,23 +144,32 @@ def automatic_instance_segmentation(
         if (image_data.ndim != 3) and (image_data.ndim != 4 and image_data.shape[-1] != 3):
             raise ValueError(f"The inputs does not match the shape expectation of 3d inputs: {image_data.shape}")
 
-        instances = automatic_3d_segmentation(
+        outputs = automatic_3d_segmentation(
             volume=image_data,
             predictor=predictor,
             segmentor=segmenter,
             embedding_path=embedding_path,
             tile_shape=tile_shape,
             halo=halo,
             verbose=verbose,
+            return_embeddings=return_embeddings,
             **generate_kwargs
         )
 
+        if return_embeddings:
+            instances, image_embeddings = outputs
+        else:
+            instances = outputs
+
+    # Save the instance segmentation, if 'output_path' provided.
     if output_path is not None:
-        # Save the instance segmentation
         output_path = Path(output_path).with_suffix(".tif")
         imageio.imwrite(output_path, instances, compression="zlib")
 
-    return instances
+    if return_embeddings:
+        return instances, image_embeddings
+    else:
+        return instances
 
 
 def main():
@@ -194,8 +205,7 @@ def main():
         help=f"The segment anything model that will be used, one of {available_models}."
     )
     parser.add_argument(
-        "-c", "--checkpoint", default=None,
-        help="Checkpoint from which the SAM model will be loaded."
+        "-c", "--checkpoint", default=None, help="Checkpoint from which the SAM model will be loaded."
     )
     parser.add_argument(
         "--tile_shape", nargs="+", type=int, help="The tile shape for using tiled prediction.", default=None
diff --git a/micro_sam/instance_segmentation.py b/micro_sam/instance_segmentation.py
@@ -572,7 +572,7 @@ def _process_tiled_embeddings(predictor, image, image_embeddings, tile_shape, ha
     # Use tile shape and halo from the precomputed embeddings if not given.
     # Otherwise check that they are consistent.
     feats = image_embeddings["features"]
-    tile_shape_, halo_ = feats.attrs["tile_shape"], feats.attrs["halo"]
+    tile_shape_, halo_ = tuple(feats.attrs["tile_shape"]), tuple(feats.attrs["halo"])
     if tile_shape is None:
         tile_shape = tile_shape_
     elif tile_shape != tile_shape_:
@@ -835,7 +835,7 @@ def get_predictor_and_decoder(
         model_type: The type of the image encoder used in the SAM model.
         checkpoint_path: Path to the checkpoint from which to load the data.
         device: The device.
-        peft_kwargs: Keyword arguments for th PEFT wrapper class.
+        peft_kwargs: Keyword arguments for the PEFT wrapper class.
 
     Returns:
         The SAM predictor.
@@ -1160,6 +1160,8 @@ def initialize(
                 See `util.precompute_image_embeddings` for details.
             i: Index for the image data. Required if `image` has three spatial dimensions
                 or a time dimension and two spatial dimensions.
+            tile_shape: Shape of the tiles for precomputing image embeddings.
+            halo: Overlap of the tiles for tiled precomputation of image embeddings.
             verbose: Dummy input to be compatible with other function signatures.
             pbar_init: Callback to initialize an external progress bar. Must accept number of steps and description.
                 Can be used together with pbar_update to handle napari progress bar in other thread.
diff --git a/micro_sam/multi_dimensional_segmentation.py b/micro_sam/multi_dimensional_segmentation.py
@@ -368,6 +368,7 @@ def automatic_3d_segmentation(
     tile_shape: Optional[Tuple[int, int]] = None,
     halo: Optional[Tuple[int, int]] = None,
     verbose: bool = True,
+    return_embeddings: bool = False,
     **kwargs,
 ) -> np.ndarray:
     """Segment volume in 3d.
@@ -388,6 +389,7 @@ def automatic_3d_segmentation(
         tile_shape: Shape of the tiles for tiled prediction. By default prediction is run without tiling.
         halo: Overlap of the tiles for tiled prediction.
         verbose: Verbosity flag.
+        return_embeddings: Whether to return the precomputed image embeddings.
         kwargs: Keyword arguments for the 'generate' method of the 'segmentor'.
 
     Returns:
@@ -430,4 +432,7 @@ def automatic_3d_segmentation(
         verbose=verbose,
     )
 
-    return segmentation
+    if return_embeddings:
+        return segmentation, image_embeddings
+    else:
+        return segmentation