Merge branch 'intensity-masking' of https://github.com/computational-cell-analytics/flamingo-tools into intensity-masking

constantinpape · constantinpape · commit 0534db2fab48 · 2025-09-09T17:36:45.000+02:00
diff --git a/scripts/la-vision/la_vision_point_annotations.py b/scripts/la-vision/la_vision_point_annotations.py
@@ -0,0 +1,33 @@
+import os
+from glob import glob
+
+import imageio.v3 as imageio
+import napari
+import numpy as np
+from skimage.measure import regionprops
+
+
+def main():
+    image_files = sorted(glob("la-vision-sgn-new/images/*.tif"))
+    label_files = sorted(glob("la-vision-sgn-new/segmentation-postprocessed/*.tif"))
+
+    for imf, lf in zip(image_files, label_files):
+        im = imageio.imread(imf)
+        labels = imageio.imread(lf)
+
+        props = regionprops(labels)
+        centers = np.array([prop.centroid for prop in props])
+
+        name = os.path.basename(imf)
+        print(name)
+
+        v = napari.Viewer()
+        v.add_image(im)
+        v.add_labels(labels)
+        v.add_points(centers, size=5, out_of_slice_display=True)
+        v.title = name
+        napari.run()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/la-vision/train_sgn_detection.py b/scripts/la-vision/train_sgn_detection.py
@@ -0,0 +1,111 @@
+import os
+import sys
+import json
+from glob import glob
+
+from sklearn.model_selection import train_test_split
+
+sys.path.append("/home/pape/Work/my_projects/czii-protein-challenge")
+sys.path.append("/user/pape41/u12086/Work/my_projects/czii-protein-challenge")
+sys.path.append("../synapse_marker_detection")
+
+from utils.training.training import supervised_training  # noqa
+from detection_dataset import DetectionDataset, MinPointSampler  # noqa
+
+ROOT = "./la-vision-sgn-new"  # noqa
+
+TRAIN = os.path.join(ROOT, "images")
+TRAIN_EMPTY = os.path.join(ROOT, "empty_images")
+
+LABEL = os.path.join(ROOT, "centroids")
+LABEL_EMPTY = os.path.join(ROOT, "empty_centroids")
+
+
+def _get_paths(split, train_folder, label_folder, n=None):
+    image_paths = sorted(glob(os.path.join(train_folder, "*.tif")))
+    label_paths = sorted(glob(os.path.join(label_folder, "*.csv")))
+    assert len(image_paths) == len(label_paths)
+    if n is not None:
+        image_paths, label_paths = image_paths[:n], label_paths[:n]
+
+    train_images, val_images, train_labels, val_labels = train_test_split(
+        image_paths, label_paths, test_size=1, random_state=42
+    )
+
+    if split == "train":
+        image_paths = train_images
+        label_paths = train_labels
+    else:
+        image_paths = val_images
+        label_paths = val_labels
+
+    return image_paths, label_paths
+
+
+def get_paths(split):
+    image_paths, label_paths = _get_paths(split, TRAIN, LABEL)
+    empty_image_paths, empty_label_paths = _get_paths(split, TRAIN_EMPTY, LABEL_EMPTY, n=4)
+    return image_paths + empty_image_paths, label_paths + empty_label_paths
+
+
+def train():
+
+    model_name = "sgn-low-res-detection-v1"
+
+    train_paths, train_label_paths = get_paths("train")
+    val_paths, val_label_paths = get_paths("val")
+    # We need to give the paths for the test loader, although it's never used.
+    test_paths, test_label_paths = val_paths, val_label_paths
+
+    print("Start training with:")
+    print(len(train_paths), "tomograms for training")
+    print(len(val_paths), "tomograms for validation")
+
+    patch_shape = [48, 256, 256]
+    batch_size = 8
+    check = False
+
+    checkpoint_path = f"./checkpoints/{model_name}"
+    os.makedirs(checkpoint_path, exist_ok=True)
+    with open(os.path.join(checkpoint_path, "splits.json"), "w") as f:
+        json.dump(
+            {
+                "train": {"images": train_paths, "labels": train_label_paths},
+                "val": {"images": val_paths, "labels": val_label_paths},
+            },
+            f, indent=2, sort_keys=True
+        )
+
+    supervised_training(
+        name=model_name,
+        train_paths=train_paths,
+        train_label_paths=train_label_paths,
+        val_paths=val_paths,
+        val_label_paths=val_label_paths,
+        raw_key=None,
+        patch_shape=patch_shape, batch_size=batch_size,
+        check=check,
+        lr=1e-4,
+        n_iterations=int(1e5),
+        out_channels=1,
+        augmentations=None,
+        eps=1e-5,
+        sigma=4,
+        lower_bound=None,
+        upper_bound=None,
+        test_paths=test_paths,
+        test_label_paths=test_label_paths,
+        # save_root="",
+        dataset_class=DetectionDataset,
+        n_samples_train=3200,
+        n_samples_val=160,
+        sampler=MinPointSampler(min_points=1, p_reject=0.5),
+    )
+
+
+def main():
+    train()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/synapse_marker_detection/detection_dataset.py b/scripts/synapse_marker_detection/detection_dataset.py
@@ -1,3 +1,4 @@
+import imageio.v3 as imageio
 import numpy as np
 import pandas as pd
 import torch
@@ -38,7 +39,6 @@ def __call__(self, x: np.ndarray, n_points: int) -> bool:
 
 def load_labels(label_path, shape, bb):
     points = pd.read_csv(label_path)
-    assert len(points.columns) == len(shape)
     z_coords, y_coords, x_coords = points["axis-0"].values, points["axis-1"].values, points["axis-2"].values
 
     if bb is not None:
@@ -85,6 +85,25 @@ def process_labels(coords, shape, sigma, eps, bb=None):
     return labels
 
 
+def process_labels_hacky(coords, shape, sigma, eps, bb=None):
+
+    if bb:
+        (z_min, z_max), (y_min, y_max), (x_min, x_max) = [(s.start, s.stop) for s in bb]
+        restricted_shape = (z_max - z_min, y_max - y_min, x_max - x_min)
+        labels = np.zeros(restricted_shape, dtype="float32")
+        shape = restricted_shape
+    else:
+        labels = np.zeros(shape, dtype="float32")
+
+    labels[coords] = 1
+    labels = gaussian(labels, sigma)
+    labels = labels.clip(0, 0.0075)
+    labels /= (labels.max() + 1e-7)
+    labels *= 4
+    labels = labels.clip(0, 1)
+    return labels
+
+
 class DetectionDataset(torch.utils.data.Dataset):
     max_sampling_attempts = 500
 
@@ -132,8 +151,8 @@ def __init__(
         self.eps = eps
         self.sigma = sigma
 
-        with zarr.open(self.raw_path, "r") as f:
-            self.shape = f[self.raw_key].shape
+        self.raw = imageio.imread(self.raw_path) if raw_key is None else zarr.open(self.raw_path, "r")[raw_key][:]
+        self.shape = self.raw.shape
 
         if n_samples is None:
             self._len = self.compute_len(self.shape, self.patch_shape) if n_samples is None else n_samples
@@ -159,9 +178,8 @@ def _sample_bounding_box(self, shape):
         return tuple(slice(start, start + psh) for start, psh in zip(bb_start, self.patch_shape))
 
     def _get_sample(self, index):
-        raw, label_path = self.raw_path, self.label_path
+        raw, label_path = self.raw, self.label_path
 
-        raw = zarr.open(raw)[self.raw_key]
         have_raw_channels = raw.ndim == 4  # 3D with channels
         shape = raw.shape
 
@@ -187,7 +205,13 @@ def _get_sample(self, index):
                 if sample_id > self.max_sampling_attempts:
                     raise RuntimeError(f"Could not sample a valid batch in {self.max_sampling_attempts} attempts")
 
-        label = process_labels(coords, shape, self.sigma, self.eps, bb=bb)
+        # For synapse detection.
+        # label = process_labels(coords, shape, self.sigma, self.eps, bb=bb)
+
+        # For SGN detection with data specfic hacks
+        label = process_labels_hacky(coords, shape, self.sigma, self.eps, bb=bb)
+        gap = 6
+        raw_patch, label = raw_patch[gap:-gap], label[gap:-gap]
 
         have_label_channels = label.ndim == 4
         if have_label_channels: