computational-cell-analytics
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎environment_cpu.yaml‎
Lines changed: 1 addition & 0 deletions b/‎environment_cpu.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎environment_gpu.yaml‎
Lines changed: 1 addition & 0 deletions b/‎environment_gpu.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/annotator_with_custom_model.py‎
Lines changed: 23 additions & 0 deletions b/‎examples/annotator_with_custom_model.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎finetuning/.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎finetuning/.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎finetuning/README.md‎
Lines changed: 49 additions & 0 deletions b/‎finetuning/README.md‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎finetuning/generalists/data/precompute_prompts.py‎
Lines changed: 84 additions & 0 deletions b/‎finetuning/generalists/data/precompute_prompts.py‎
Lines changed: 84 additions & 0 deletions
diff --git a/‎finetuning/generalists/data/precompute_prompts.sbatch‎
Lines changed: 9 additions & 0 deletions b/‎finetuning/generalists/data/precompute_prompts.sbatch‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎finetuning/generalists/data/prepare_deepbacs_data.py‎
Lines changed: 79 additions & 0 deletions b/‎finetuning/generalists/data/prepare_deepbacs_data.py‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎finetuning/generalists/data/prepare_tissuenet_data.py‎
Lines changed: 39 additions & 0 deletions b/‎finetuning/generalists/data/prepare_tissuenet_data.py‎
Lines changed: 39 additions & 0 deletions
@@ -3,3 +3,4 @@ __pycache__/
 *.pth
 *.tif
 examples/data/*
+*.out
@@ -11,6 +11,7 @@ dependencies:
     - pytorch
     - segment-anything
     - torchvision
+    - torch_em >=0.5.1
     - tqdm
     # - pip:
     #     - git+https://github.com/facebookresearch/segment-anything.git
@@ -12,6 +12,7 @@ dependencies:
     - pytorch-cuda>=11.7  # you may need to update the cuda version to match your system
     - segment-anything
     - torchvision
+    - torch_em >=0.5.1
     - tqdm
     # - pip:
     #     - git+https://github.com/facebookresearch/segment-anything.git
@@ -0,0 +1,23 @@
+import h5py
+import micro_sam.sam_annotator as annotator
+from micro_sam.util import get_sam_model
+
+# TODO add an example for the 2d annotator with a custom model
+
+
+def annotator_3d_with_custom_model():
+    with h5py.File("./data/gut1_block_1.h5") as f:
+        raw = f["raw"][:]
+
+    custom_model = "/home/pape/Work/data/models/sam/user-study/vit_h_nuclei_em_finetuned.pt"
+    embedding_path = "./embeddings/nuclei3d-custom-vit-h.zarr"
+    predictor = get_sam_model(checkpoint_path=custom_model, model_type="vit_h")
+    annotator.annotator_3d(raw, embedding_path, predictor=predictor)
+
+
+def main():
+    annotator_3d_with_custom_model()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,4 @@
+checkpoints/
+logs/
+sam_embeddings/
+results/
@@ -0,0 +1,49 @@
+# Segment Anything Finetuning
+
+Preliminary examples for fine-tuning segment anything on custom datasets.
+
+## LiveCELL
+
+**Finetuning**
+
+Run the script `livecell_finetuning.py` for fine-tuning a model on LiveCELL.
+
+**Inference**
+
+The script `livecell_inference.py` can be used to run inference on the test set. It supports different arguments for inference with different configurations.
+For example run
+```
+python livecell_inference.py -c checkpoints/livecell_sam/best.pt -m vit_b -e experiment -i /scratch/projects/nim00007/data/LiveCELL --points --positive 1 --negative 0
+```
+for inference with 1 positive point prompt and no negative point prompt (the prompts are derived from ground-truth).
+
+The arguments `-c`, `-e` and `-i` specify where the checkpoint for the model is, where the predictions from the model and other experiment data will be saved, and where the input dataset (LiveCELL) is stored.
+
+To run the default set of experiments from our publication use the command
+```
+python livecell_inference.py -c checkpoints/livecell_sam/best.pt -m vit_b -e experiment -i /scratch/projects/nim00007/data/LiveCELL -d --prompt_folder /scratch/projects/nim00007/sam/experiments/prompts/livecell 
+```
+
+Here `-d` automatically runs the evaluation for these settings:
+- `--points --positive 1 --negative 0` (using point prompts with a single positive point)
+- `--points --positive 2 --negative 4` (using point prompts with two positive points and four negative points)
+- `--points --positive 4 --negative 8` (using point prompts with four positive points and eight negative points)
+- `--box` (using box prompts)
+
+In addition `--prompt_folder` specifies a folder with precomputed prompts. Using pre-computed prompts significantly speeds up the experiments and enables running them in a reproducible manner. (Without it the prompts will be recalculated each time.)
+
+You can also evaluate the automatic instance segmentation functionality, by running
+```
+python livecell_inference.py -c checkpoints/livecell_sam/best.pt -m vit_b -e experiment -i /scratch/projects/nim00007/data/LiveCELL -a 
+```
+
+This will first perform a grid-search for the best parameters on a subset of the validation set and then run inference on the test set. This can take up to a day.
+
+**Evaluation**
+
+The script `livecell_evaluation.py` can then be used to evaluate the results from the inference runs.
+E.g. run the script like below to evaluate the previous predictions.
+```
+python livecell_evaluation.py -i /scratch/projects/nim00007/data/LiveCELL -e experiment
+```
+This will create a folder `experiment/results` with csv tables with the results per cell type and averaged over all images.
@@ -0,0 +1,84 @@
+import argparse
+import os
+from glob import glob
+
+import pickle
+from subprocess import run
+
+import micro_sam.evaluation as evaluation
+from tqdm import tqdm
+
+DATA_ROOT = "/scratch/projects/nim00007/sam/ood/LM"
+PROMPT_ROOT = "/scratch/projects/nim00007/sam/experiments/prompts"
+
+
+def get_paths(dataset):
+    pattern = os.path.join(DATA_ROOT, dataset, "test", "labels_*.tif")
+    paths = sorted(glob(pattern))
+    assert len(paths) > 0, pattern
+    return paths
+
+
+def precompute_setting(prompt_settings, dataset):
+    gt_paths = get_paths(dataset)
+    prompt_folder = os.path.join(PROMPT_ROOT, dataset)
+    evaluation.precompute_all_prompts(gt_paths, prompt_folder, prompt_settings)
+
+
+def submit_array_job(prompt_settings, dataset):
+    n_settings = len(prompt_settings)
+    cmd = ["sbatch", "-a", f"0-{n_settings-1}", "precompute_prompts.sbatch", dataset]
+    run(cmd)
+
+
+def check_settings(dataset, settings, expected_len):
+    prompt_folder = os.path.join(PROMPT_ROOT, dataset)
+
+    def check_prompt_file(prompt_file):
+        assert os.path.exists(prompt_file), prompt_file
+        with open(prompt_file, "rb") as f:
+            prompts = pickle.load(f)
+        assert len(prompts) == expected_len, f"{len(prompts)}, {expected_len}"
+
+    for setting in tqdm(settings, desc="Check prompt files"):
+        pos, neg = setting["n_positives"], setting["n_negatives"]
+        prompt_file = os.path.join(prompt_folder, f"points-p{pos}-n{neg}.pkl")
+        if pos == 0 and neg == 0:
+            prompt_file = os.path.join(prompt_folder, "boxes.pkl")
+        check_prompt_file(prompt_file)
+
+    print("All files checked!")
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("dataset")
+    parser.add_argument("-c", "--check", action="store_true")
+    args = parser.parse_args()
+
+    # this will fail if the dataset is invalid
+    gt_paths = get_paths(args.dataset)
+
+    settings = evaluation.default_experiment_settings()
+    # we may use this as the point setting instead of p2-n4,
+    # so we also precompute it
+    settings.append(
+        {"use_points": True, "use_boxes": False, "n_positives": 4, "n_negatives": 8},  # p4-n8
+    )
+
+    if args.check:
+        check_settings(args.dataset, settings, len(gt_paths))
+        return
+
+    job_id = os.environ.get("SLURM_ARRAY_TASK_ID", None)
+
+    if job_id is None:  # this is the main script that submits slurm jobs
+        submit_array_job(settings, args.dataset)
+    else:  # we're in a slurm job and precompute a setting
+        job_id = int(job_id)
+        this_settings = [settings[job_id]]
+        precompute_setting(this_settings, args.dataset)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,9 @@
+#! /bin/bash
+#SBATCH -c 4
+#SBATCH --mem 48G
+#SBATCH -t 720
+#SBATCH -p grete:shared
+#SBATCH -G A100:1
+
+source activate sam
+python precompute_prompts.py $@
@@ -0,0 +1,79 @@
+import os
+from glob import glob
+
+import imageio.v3 as imageio
+import numpy as np
+
+from sklearn.model_selection import train_test_split
+
+ROOT = "/scratch-grete/projects/nim00007/data/deepbacs"
+
+
+def download_deepbacs():
+    from torch_em.data.datasets import get_deepbacs_loader
+    get_deepbacs_loader(ROOT, "train", bac_type="mixed", download=True, patch_shape=(256, 256), batch_size=1)
+    get_deepbacs_loader(ROOT, "test", bac_type="mixed", download=True, patch_shape=(256, 256), batch_size=1)
+
+
+# old code from Anwai
+def get_deepbacs_test_images():
+    root = ROOT
+    output_root = "/scratch-grete/projects/nim00007/sam/ood/LM/deepbacs"
+
+    def write_split(images, labels, split):
+        out_folder = os.path.join(output_root, split)
+        os.makedirs(out_folder, exist_ok=True)
+        for ii, (im, lab) in enumerate(zip(images, labels)):
+            out_im = os.path.join(out_folder, f"image_{ii:04}.tif")
+            out_lab = os.path.join(out_folder, f"labels_{ii:04}.tif")
+            im, lab = imageio.imread(im), imageio.imread(lab)
+            imageio.imwrite(out_im, im)
+            imageio.imwrite(out_lab, lab)
+
+    root_imgs = glob(os.path.join(root, "mixed", "test", "source", "*"))
+    root_gts = glob(os.path.join(root, "mixed", "test", "target", "*"))
+    np.random.seed(0)
+
+    val_images = np.random.choice(root_imgs, size=5, replace=False).tolist()
+    val_labels = [gt_p for gt_p in root_gts if os.path.basename(gt_p) in [os.path.basename(x) for x in val_images]]
+
+    test_images = [ip for ip in root_imgs if ip not in val_images]
+    test_labels = [gp for gp in root_gts if gp not in val_labels]
+
+    write_split(val_images, val_labels, "val")
+    write_split(test_images, test_labels, "test")
+
+
+# new simplified code
+def get_deepbacs_test_images_new():
+    root = ROOT
+    output_root = "/scratch-grete/projects/nim00007/sam/ood/LM/deepbacs"
+
+    def write_split(images, labels, split):
+        out_folder = os.path.join(output_root, split)
+        os.makedirs(out_folder, exist_ok=True)
+        for ii, (im, lab) in enumerate(zip(images, labels)):
+            out_im = os.path.join(out_folder, f"image_{ii:04}.tif")
+            out_lab = os.path.join(out_folder, f"labels_{ii:04}.tif")
+            im, lab = imageio.imread(im), imageio.imread(lab)
+            imageio.imwrite(out_im, im)
+            imageio.imwrite(out_lab, lab)
+
+    images = sorted(glob(os.path.join(root, "mixed", "test", "source", "*")))
+    labels = sorted(glob(os.path.join(root, "mixed", "test", "target", "*")))
+
+    test_images, val_images, test_labels, val_labels = train_test_split(
+        images, labels, test_size=0.15, random_state=42
+    )
+
+    write_split(val_images, val_labels, "val")
+    write_split(test_images, test_labels, "test")
+
+
+def main():
+    # download_deepbacs()
+    get_deepbacs_test_images_new()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,39 @@
+import os
+from glob import glob
+
+import imageio.v3 as imageio
+
+ROOT = "/scratch-grete/projects/nim00007/data/tissuenet"
+
+
+def get_tissuenet_images(split):
+    assert split in ["val", "test"]
+    val_set, test_set = glob(os.path.join(ROOT, "val", "*")), glob(os.path.join(ROOT, "test", "*"))
+    if split == "val":
+        return sorted(val_set)
+    else:
+        return sorted(test_set)
+
+
+# TODO
+def create_tissuenet_splits():
+    output_root = "/scratch-grete/projects/nim00007/sam/ood/LM/tissuenet"
+
+    def write_split(images, labels, split):
+        out_folder = os.path.join(output_root, split)
+        os.makedirs(out_folder, exist_ok=True)
+        for ii, (im, lab) in enumerate(zip(images, labels)):
+            out_im = os.path.join(out_folder, f"image_{ii:04}.tif")
+            out_lab = os.path.join(out_folder, f"labels_{ii:04}.tif")
+            im, lab = imageio.imread(im), imageio.imread(lab)
+            imageio.imwrite(out_im, im)
+            imageio.imwrite(out_lab, lab)
+
+    val_set = get_tissuenet_images("val")
+
+    write_split(val_images, val_labels, "val")
+    write_split(test_images, test_labels, "test")
+
+
+if __name__ == "__main__":
+    create_tissuenet_splits()
-Original file line number
+Diff line change
 *.pth
 *.tif
 examples/data/*
 +*.out