Merge pull request #1091 from computational-cell-analytics/dev

constantinpape · web-flow · commit 973c778a574f · 2025-08-12T19:03:18.000+02:00
Merge dev to master
diff --git a/development/check_data_count.py b/development/check_data_count.py
@@ -0,0 +1,236 @@
+import os
+from glob import glob
+
+import numpy as np
+import imageio.v3 as imageio
+
+from torch_em.data import datasets
+
+from elf.io import open_file
+
+
+ROOT = "/mnt/vast-nhr/projects/cidas/cca/experiments/micro_sam/data"
+
+
+def check_data_count(lm_version="v3"):
+    image_counter, object_counter = 0, 0
+
+    # LIVECell data.
+    image_paths, label_paths = datasets.light_microscopy.livecell.get_livecell_paths(
+        path=os.path.join(ROOT, "livecell"), split="train",
+    )
+    image_counter += len(image_paths)
+    object_counter += sum(
+        [len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
+    )
+
+    print("LIVECell", image_counter, object_counter)
+
+    # DeepBacs data.
+    image_dir, label_dir = datasets.light_microscopy.deepbacs.get_deepbacs_paths(
+        path=os.path.join(ROOT, "deepbacs"), bac_type="mixed", split="train",
+    )
+    image_paths = sorted(glob(os.path.join(image_dir, "*.tif")))
+    label_paths = sorted(glob(os.path.join(label_dir, "*.tif")))
+
+    curr_image_counter = len(image_paths)
+    curr_object_counter = sum(
+        [len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
+    )
+
+    image_counter += curr_image_counter
+    object_counter += curr_object_counter
+
+    print("DeepBacs", curr_image_counter, curr_object_counter)
+
+    # TissueNet data.
+    sample_paths = datasets.light_microscopy.tissuenet.get_tissuenet_paths(
+        path=os.path.join(ROOT, "tissuenet"), split="train",
+    )
+    curr_image_counter = len(sample_paths)
+    curr_object_counter = sum(
+        [len(np.unique(open_file(p)["labels/cell"])[1:]) for p in sample_paths]
+    )
+
+    image_counter += curr_image_counter
+    object_counter += curr_object_counter
+
+    print("TissueNet", curr_image_counter, curr_object_counter)
+
+    # PlantSeg (Root) data.
+    volume_paths = datasets.light_microscopy.plantseg.get_plantseg_paths(
+        path=os.path.join(ROOT, "plantseg"), name="root", split="train",
+    )
+    curr_image_counter, curr_object_counter = 0, 0
+    for p in volume_paths:
+        f = open_file(p)
+        curr_image_counter += f["raw"].shape[0]
+        curr_object_counter += sum(
+            [len(np.unique(curr_label)[1:]) for curr_label in f["label"]]
+        )
+
+    image_counter += curr_image_counter
+    object_counter += curr_object_counter
+
+    print("PlantSeg (Root)", curr_image_counter, curr_object_counter)
+
+    # NeurIPS CellSeg data.
+    image_paths, label_paths = datasets.light_microscopy.neurips_cell_seg.get_neurips_cellseg_paths(
+        root=os.path.join(ROOT, "neurips_cellseg"), split="train",
+    )
+    curr_image_counter = len(image_paths)
+    curr_object_counter = sum(
+        [len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
+    )
+
+    image_counter += curr_image_counter
+    object_counter += curr_object_counter
+
+    print("NeurIPS CellSeg", curr_image_counter, curr_object_counter)
+
+    # CTC data.
+    curr_image_counter, curr_object_counter = 0, 0
+    for dataset_name in datasets.ctc.CTC_CHECKSUMS["train"].keys():
+        if dataset_name in ["Fluo-N2DH-GOWT1", "Fluo-N2DL-HeLa"]:
+            continue
+
+        image_dirs, label_dirs = datasets.light_microscopy.ctc.get_ctc_segmentation_paths(
+            path=os.path.join(ROOT, "ctc"), dataset_name=dataset_name,
+        )
+        image_paths = [p for d in image_dirs for p in sorted(glob(os.path.join(d, "*.tif")))]
+        label_paths = [p for d in label_dirs for p in sorted(glob(os.path.join(d, "*.tif")))]
+
+        curr_image_counter += len(image_paths)
+        curr_object_counter += sum(
+            [len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
+        )
+
+    image_counter += curr_image_counter
+    object_counter += curr_object_counter
+
+    print("CTC", curr_image_counter, curr_object_counter)
+
+    # DSB Nucleus data.
+    image_paths, label_paths = datasets.light_microscopy.dsb.get_dsb_paths(
+        path=os.path.join(ROOT, "dsb"), source="reduced", split="train",
+    )
+    curr_image_counter = len(image_paths)
+    curr_object_counter = sum(
+        [len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
+    )
+
+    image_counter += curr_image_counter
+    object_counter += curr_object_counter
+
+    print("DSB Nucleus", curr_image_counter, curr_object_counter)
+
+    if lm_version == "v2":
+        return image_counter, object_counter
+
+    # EmbedSeg data.
+    curr_image_counter, curr_object_counter = 0, 0
+    names = [
+        "Mouse-Organoid-Cells-CBG", "Mouse-Skull-Nuclei-CBG", "Platynereis-ISH-Nuclei-CBG", "Platynereis-Nuclei-CBG",
+    ]
+    for name in names:
+        image_paths, label_paths = datasets.light_microscopy.embedseg_data.get_embedseg_paths(
+            path=os.path.join(ROOT, "embedseg"), name=name, split="train",
+        )
+        curr_image_counter += sum(
+            [imageio.imread(p).shape[0] for p in image_paths]
+        )
+        curr_object_counter += sum(
+            [sum(len(np.unique(curr_label)[1:]) for curr_label in imageio.imread(p)) for p in label_paths]
+        )
+
+    image_counter += curr_image_counter
+    object_counter += curr_object_counter
+
+    print("EmbedSeg", curr_image_counter, curr_object_counter)
+
+    # CVZ Fluo data.
+    curr_image_counter, curr_object_counter = 0, 0
+    for stain_choice in ["cell", "dapi"]:
+        image_paths, label_paths = datasets.light_microscopy.cvz_fluo.get_cvz_fluo_paths(
+            path=os.path.join(ROOT, "cvz"), stain_choice=stain_choice,
+        )
+        curr_image_counter += len(image_paths)
+        curr_object_counter += sum(
+            [len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
+        )
+
+    image_counter += curr_image_counter
+    object_counter += curr_object_counter
+
+    print("CVZ Fluo", curr_image_counter, curr_object_counter)
+
+    # DynamicNuclearNet data.
+    sample_paths = datasets.light_microscopy.dynamicnuclearnet.get_dynamicnuclearnet_paths(
+        path=os.path.join(ROOT, "dynamicnuclearnet"), split="train",
+    )
+
+    curr_image_counter = len(sample_paths)
+    curr_object_counter = sum(
+        [len(np.unique(open_file(p)["labels"])[1:]) for p in sample_paths]
+    )
+
+    image_counter += curr_image_counter
+    object_counter += curr_object_counter
+
+    print("DynamicNuclearNet", curr_image_counter, curr_object_counter)
+
+    # CellPose data.
+    image_paths, label_paths = datasets.light_microscopy.cellpose.get_cellpose_paths(
+        path=os.path.join(ROOT, "cellpose"), split="train", choice="cyto",
+    )
+    curr_image_counter = len(image_paths)
+    curr_object_counter = sum(
+        [len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
+    )
+
+    image_counter += curr_image_counter
+    object_counter += curr_object_counter
+
+    print("CellPose", curr_image_counter, curr_object_counter)
+
+    # OmniPose data.
+    image_paths, label_paths = datasets.light_microscopy.omnipose.get_omnipose_paths(
+        path=os.path.join(ROOT, "omnipose"), split="train",
+    )
+    curr_image_counter = len(image_paths)
+    curr_object_counter = sum(
+        [len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
+    )
+
+    image_counter += curr_image_counter
+    object_counter += curr_object_counter
+
+    print("OmniPose", curr_image_counter, curr_object_counter)
+
+    # OrgaSegment data.
+    image_paths, label_paths = datasets.light_microscopy.orgasegment.get_orgasegment_paths(
+        path=os.path.join(ROOT, "orgasegment"), split="train",
+    )
+    curr_image_counter = len(image_paths)
+    curr_object_counter = sum(
+        [len(np.unique(imageio.imread(p))[1:]) for p in label_paths]
+    )
+
+    image_counter += curr_image_counter
+    object_counter += curr_object_counter
+
+    print("OrgaSegment", curr_image_counter, curr_object_counter)
+
+    return image_counter, object_counter
+
+
+def main():
+    # image_counts, object_counts = check_data_count("v2")
+    # print(f"v2 Model - Count of images: '{image_counts}'; and count of objects: '{object_counts}'")
+
+    image_counts, object_counts = check_data_count("v3")
+    print(f"v3 and v4 Model - Count of images: '{image_counts}'; and count of objects: '{object_counts}'")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/development/support/train_cell_fluo_embl_alm.py b/development/support/train_cell_fluo_embl_alm.py
@@ -0,0 +1,67 @@
+import os
+from glob import glob
+from natsort import natsorted
+
+import torch
+
+import micro_sam.training as sam_training
+from micro_sam.util import export_custom_sam_model
+
+
+def train_embl_alm_data(checkpoint_name):
+    """Training a MicroSAM model for https://github.com/computational-cell-analytics/micro-sam/issues/1084.
+    """
+    # All hyperparameters for training.
+    batch_size = 1
+    patch_shape = (512, 512)
+    n_objects_per_batch = 25
+    device = torch.device("cuda")
+
+    # Get the filepaths to images and corresponding labels.
+    image_paths = natsorted(glob(os.path.join(os.getcwd(), "data_same_size", "*.tif")))
+    label_paths = natsorted(glob(os.path.join(os.getcwd(), "masks_same_size", "*.tif")))
+
+    # Next, prepare the dataloaders.
+    kwargs = {
+        "batch_size": batch_size,
+        "patch_shape": patch_shape,
+        "with_segmentation_decoder": True,
+        "num_workers": 16,
+        "shuffle": True,
+    }
+
+    train_loader = sam_training.default_sam_loader(
+        raw_paths=image_paths[:-5], raw_key=None, label_paths=label_paths[:-5], label_key=None, **kwargs,
+    )
+    val_loader = sam_training.default_sam_loader(
+        raw_paths=image_paths[-5:], raw_key=None, label_paths=label_paths[-5:], label_key=None, **kwargs,
+    )
+
+    # Run training.
+    sam_training.train_sam(
+        name=checkpoint_name,
+        model_type="vit_b_lm",
+        train_loader=train_loader,
+        val_loader=val_loader,
+        n_epochs=10,
+        n_objects_per_batch=n_objects_per_batch,
+        with_segmentation_decoder=True,
+        device=device,
+    )
+
+
+def main():
+    checkpoint_name = "sam_embl_alm_fluo"  # Name of the checkpoint, stored at "./checkpoints/<CHECKPOINT_NAME>"
+
+    train_embl_alm_data(checkpoint_name)
+
+    # Export the trained model.
+    export_custom_sam_model(
+        checkpoint_path=os.path.join("checkpoints", checkpoint_name, "best.pt"),
+        model_type="vit_b",
+        save_path="./finetuned_embl_alm_fluo_model.pth",
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/doc/band.md b/doc/band.md
@@ -4,7 +4,7 @@ BAND is a service offered by EMBL Heidelberg under the "The German Network for B
 In order to use BAND and start `micro_sam` on it follow these steps:
 
 ## Start BAND
-- Go to https://bandv1.denbi.uni-tuebingen.de/ and click **Login**. If you have not used BAND before you will need to register for BAND. Currently you can only sign up via a Google account. NOTE: It takes a couple of seconds for the "Launch Desktops" window to appear.
+- Go to https://bandv1.denbi.uni-tuebingen.de/ (another site available at https://band.vm.fedcloud.eu/, choose either) and click **Login**. If you have not used BAND before you will need to register for BAND. Currently you can only sign up via a Google account. NOTE: It takes a couple of seconds for the "Launch Desktops" window to appear.
 - Launch a BAND desktop with sufficient resources. It's particularly important to select a GPU. The settings from the image below are a good choice.
 - Go to the desktop by clicking **GO TO DESKTOP** in the **Running Desktops** menu. See also the screenshot below.
 
diff --git a/environment.yaml b/environment.yaml
@@ -26,6 +26,7 @@ dependencies:
     - torch_em >=0.7.10
     - tqdm
     - timm
+    - trackastra
     - xarray
     - zarr
     - pip:
diff --git a/micro_sam/bioimageio/model_export.py b/micro_sam/bioimageio/model_export.py
@@ -476,7 +476,7 @@ def export_sam_model(
                 source=Path(checkpoint_path),
                 architecture=architecture,
                 pytorch_version=spec.Version(torch.__version__),
-                dependencies=spec.EnvironmentFileDescr(source=dependency_file),
+                dependencies=spec.FileDescr(source=dependency_file),
             )
         )
 
diff --git a/micro_sam/evaluation/instance_segmentation.py b/micro_sam/evaluation/instance_segmentation.py
@@ -229,20 +229,20 @@ def run_instance_segmentation_grid_search(
         image = _load_image(image_path, image_key, roi=None if rois is None else rois[i])
         gt = _load_image(gt_path, gt_key, roi=None if rois is None else rois[i])
 
+        if tiling_window_params is None:
+            tiling_window_params = {}
+
         if embedding_dir is None:
             embedding_path = None
+            segmenter.initialize(image, **tiling_window_params)
+
         else:
             assert predictor is not None
             embedding_path = os.path.join(embedding_dir, f"{os.path.splitext(image_name)[0]}.zarr")
-
-        if tiling_window_params is None:
-            tiling_window_params = {}
-
-        image_embeddings = util.precompute_image_embeddings(
-            predictor, image, embedding_path, ndim=2, verbose=verbose_embeddings, **tiling_window_params
-        )
-
-        segmenter.initialize(image, image_embeddings, **tiling_window_params)
+            image_embeddings = util.precompute_image_embeddings(
+                predictor, image, embedding_path, ndim=2, verbose=verbose_embeddings, **tiling_window_params
+            )
+            segmenter.initialize(image, image_embeddings, **tiling_window_params)
 
         _grid_search_iteration(
             segmenter, gs_combinations, gt, image_name,
diff --git a/micro_sam/instance_segmentation.py b/micro_sam/instance_segmentation.py
@@ -805,7 +805,6 @@ def get_unetr(
         use_skip_connection=False,
         resize_input=True,
         use_conv_transpose=use_conv_transpose,
-
     )
 
     if decoder_state is not None:
diff --git a/micro_sam/training/training.py b/micro_sam/training/training.py

Original file line number	Diff line number	Diff line change
`@@ -476,7 +476,7 @@ def export_sam_model(`
`476`	`476`	`source=Path(checkpoint_path),`
`477`	`477`	`architecture=architecture,`
`478`	`478`	`pytorch_version=spec.Version(torch.__version__),`
`479`		`- dependencies=spec.EnvironmentFileDescr(source=dependency_file),`
	`479`	`+ dependencies=spec.FileDescr(source=dependency_file),`
`480`	`480`	`)`
`481`	`481`	`)`
`482`	`482`
Original file line number	Diff line number	Diff line change
`@@ -805,7 +805,6 @@ def get_unetr(`
`805`	`805`	`use_skip_connection=False,`
`806`	`806`	`resize_input=True,`
`807`	`807`	`use_conv_transpose=use_conv_transpose,`
`808`		`-`
`809`	`808`	`)`
`810`	`809`
`811`	`810`	`if decoder_state is not None:`