Update embedding visualization and add support for ROIs in SAM datasets (#1065)

constantinpape · web-flow · commit 98385f49bedc · 2025-06-14T21:50:53.000+02:00
* Update embedding visualization code

* Add roi support for SAM datasets

* Update doc strings
diff --git a/micro_sam/training/training.py b/micro_sam/training/training.py
@@ -581,6 +581,7 @@ def default_sam_dataset(
     is_train: bool = True,
     min_size: int = 25,
     max_sampling_attempts: Optional[int] = None,
+    rois: Optional[Union[slice, Tuple[slice, ...]]] = None,
     **kwargs,
 ) -> Dataset:
     """Create a PyTorch Dataset for training a SAM model.
@@ -606,6 +607,7 @@ def default_sam_dataset(
         is_train: Whether this dataset is used for training or validation. By default, set to 'True'.
         min_size: Minimal object size. Smaller objects will be filtered. By default, set to '25'.
         max_sampling_attempts: Number of sampling attempts to make from a dataset.
+        rois: The region of interest(s) for the data.
         kwargs: Additional keyword arguments for `torch_em.default_segmentation_dataset`.
 
     Returns:
@@ -702,6 +704,7 @@ def default_sam_dataset(
             ndim=2,
             is_seg_dataset=is_seg_dataset,
             raw_transform=raw_transform,
+            rois=rois,
             **kwargs
         )
         n_samples = max(len(loader), 100 if is_train else 5)
@@ -719,6 +722,7 @@ def default_sam_dataset(
         sampler=sampler,
         n_samples=n_samples,
         is_seg_dataset=is_seg_dataset,
+        rois=rois,
         **kwargs,
     )
 
diff --git a/micro_sam/visualization.py b/micro_sam/visualization.py
@@ -17,21 +17,23 @@
 # PCA visualization for the image embeddings
 #
 
-def compute_pca(embeddings: np.ndarray) -> np.ndarray:
+def compute_pca(embeddings: np.ndarray, n_components: int = 3, as_rgb: bool = True) -> np.ndarray:
     """Compute the pca projection of the embeddings to visualize them as RGB image.
 
     Args:
         embeddings: The embeddings. For example predicted by the SAM image encoder.
+        n_components: The number of PCA components to use for dimensionality reduction.
+        as_rgb: Whether to normalize the projected embeddings so that they can be displated as rgb.
 
     Returns:
         PCA of the embeddings, mapped to the pixels.
     """
     if embeddings.ndim == 4:
-        pca = embedding_pca(embeddings.squeeze()).transpose((1, 2, 0))
+        pca = embedding_pca(embeddings.squeeze(), n_components=n_components, as_rgb=as_rgb).transpose((1, 2, 0))
     elif embeddings.ndim == 5:
         pca = []
         for embed in embeddings:
-            vis = embedding_pca(embed.squeeze()).transpose((1, 2, 0))
+            vis = embedding_pca(embed.squeeze(), n_components=n_components, as_rgb=as_rgb).transpose((1, 2, 0))
             pca.append(vis)
         pca = np.stack(pca)
     else:
@@ -53,10 +55,10 @@ def _get_crop(embed_shape, shape):
     return crop
 
 
-def _project_embeddings(embeddings, shape, apply_crop=True):
+def _project_embeddings(embeddings, shape, apply_crop=True, n_components=3, as_rgb=True):
     assert embeddings.ndim == len(shape) + 2, f"{embeddings.shape}, {shape}"
 
-    embedding_vis = compute_pca(embeddings)
+    embedding_vis = compute_pca(embeddings, n_components=n_components, as_rgb=as_rgb)
     if not apply_crop:
         pass
     elif len(shape) == 2:
@@ -107,7 +109,7 @@ def resize_shape(shape):
     return np.concatenate([resize(arr, resize_shape(arr.shape)) for arr in arrays], axis=axis)
 
 
-def _project_tiled_embeddings(image_embeddings):
+def _project_tiled_embeddings(image_embeddings, n_components, as_rgb):
     features = image_embeddings["features"]
     tile_shape, halo, shape = features.attrs["tile_shape"], features.attrs["halo"], features.attrs["shape"]
     tiling = blocking([0, 0], shape, tile_shape)
@@ -141,30 +143,34 @@ def _project_tiled_embeddings(image_embeddings):
 
     if features["0"].ndim == 5:
         shape = (features["0"].shape[0],) + tuple(shape)
-    embedding_vis, scale = _project_embeddings(embeds, shape, apply_crop=False)
+    embedding_vis, scale = _project_embeddings(
+        embeds, shape, n_components=n_components, as_rgb=as_rgb, apply_crop=False
+    )
     return embedding_vis, scale
 
 
 def project_embeddings_for_visualization(
-    image_embeddings: ImageEmbeddings
+    image_embeddings: ImageEmbeddings, n_components: int = 3, as_rgb: bool = True,
 ) -> Tuple[np.ndarray, Tuple[float, ...]]:
     """Project image embeddings to pixel-wise PCA.
 
     Args:
         image_embeddings: The image embeddings.
+        n_components: The number of PCA components to use for dimensionality reduction.
+        as_rgb: Whether to normalize the projected embeddings so that they can be displated as rgb.
 
     Returns:
         The PCA of the embeddings.
         The scale factor for resizing to the original image size.
     """
     is_tiled = image_embeddings["input_size"] is None
     if is_tiled:
-        embedding_vis, scale = _project_tiled_embeddings(image_embeddings)
+        embedding_vis, scale = _project_tiled_embeddings(image_embeddings, n_components, as_rgb)
     else:
         embeddings = image_embeddings["features"]
         shape = tuple(image_embeddings["original_size"])
         if embeddings.ndim == 5:
             shape = (embeddings.shape[0],) + shape
-        embedding_vis, scale = _project_embeddings(embeddings, shape)
+        embedding_vis, scale = _project_embeddings(embeddings, shape, n_components=n_components, as_rgb=as_rgb)
 
     return embedding_vis, scale