Update training scripts

constantinpape · constantinpape · commit 8d03fe279d63 · 2025-05-19T11:34:46.000+02:00
diff --git a/flamingo_tools/training/__init__.py b/flamingo_tools/training/__init__.py
@@ -0,0 +1,2 @@
+from .util import get_3d_model, get_supervised_loader
+from .mean_teacher_training import mean_teacher_training
diff --git a/flamingo_tools/training/mean_teacher_training.py b/flamingo_tools/training/mean_teacher_training.py
@@ -6,13 +6,7 @@
 import torch_em.self_training as self_training
 from torchvision import transforms
 
-
-def get_3d_model(out_channels):
-    raise NotImplementedError
-
-
-def get_supervised_loader():
-    raise NotImplementedError
+from .util import get_supervised_loader, get_3d_model
 
 
 def weak_augmentations(p: float = 0.75) -> callable:
@@ -79,15 +73,17 @@ def get_unsupervised_loader(
     return loader
 
 
-def mean_teacher_adaptation(
+def mean_teacher_training(
     name: str,
     unsupervised_train_paths: Tuple[str],
     unsupervised_val_paths: Tuple[str],
     patch_shape: Tuple[int, int, int],
     save_root: Optional[str] = None,
     source_checkpoint: Optional[str] = None,
-    supervised_train_paths: Optional[Tuple[str]] = None,
-    supervised_val_paths: Optional[Tuple[str]] = None,
+    supervised_train_image_paths: Optional[Tuple[str]] = None,
+    supervised_val_image_paths: Optional[Tuple[str]] = None,
+    supervised_train_label_paths: Optional[Tuple[str]] = None,
+    supervised_val_label_paths: Optional[Tuple[str]] = None,
     confidence_threshold: float = 0.9,
     raw_key: Optional[str] = None,
     raw_key_supervised: Optional[str] = None,
@@ -99,14 +95,13 @@ def mean_teacher_adaptation(
     n_samples_val: Optional[int] = None,
     sampler: Optional[callable] = None,
 ) -> None:
-    """Run domain adapation to transfer a network trained on a source domain for a supervised
-    segmentation task to perform this task on a different target domain.
+    """This function implements network training with a mean teacher approach.
 
-    We support different domain adaptation settings:
-    - unsupervised domain adaptation: the default mode when 'supervised_train_paths' and
-     'supervised_val_paths' are not given.
-    - semi-supervised domain adaptation: domain adaptation on unlabeled and labeled data,
-      when 'supervised_train_paths' and 'supervised_val_paths' are given.
+    It can be used for semi-supervised learning, unsupervised domain adaptation and supervised domain adaptation.
+    These different training modes can be used as this:
+    - semi-supervised learning: pass 'unsupervised_train/val_paths' and 'supervised_train/val_paths'.
+    - unsupervised domain adaptation: pass 'unsupervised_train/val_paths' and 'source_checkpoint'.
+    - supervised domain adaptation: pass 'unsupervised_train/val_paths', 'supervised_train/val_paths', 'source_checkpoint'.
 
     Args:
         name: The name for the checkpoint to be trained.
@@ -125,30 +120,38 @@ def mean_teacher_adaptation(
             If the checkpoint is not given, then both student and teacher model are initialized
             from scratch. In this case `supervised_train_paths` and `supervised_val_paths` have to
             be given in order to provide training data from the source domain.
-        supervised_train_paths: Filepaths to the hdf5 files for the training data in the source domain.
-            This training data is optional. If given, it is used for unsupervised learnig and requires labels.
-        supervised_val_paths: Filepaths to the df5 files for the validation data in the source domain.
-            This validation data is optional. If given, it is used for unsupervised learnig and requires labels.
+        supervised_train_image_paths: Paths to the files for the supervised image data; training split.
+            This training data is optional. If given, it also requires labels.
+        supervised_val_image_paths: Ppaths to the files for the supervised image data; validation split.
+            This validation data is optional. If given, it also requires labels.
+        supervised_train_label_paths: Filepaths to the files for the supervised label masks; training split.
+            This training data is optional.
+        supervised_val_label_paths: Filepaths to the files for the supervised label masks; validation split.
+            This tvalidation data is optional.
         confidence_threshold: The threshold for filtering data in the unsupervised loss.
             The label filtering is done based on the uncertainty of network predictions, and only
             the data with higher certainty than this threshold is used for training.
-        raw_key: The key that holds the raw data inside of the hdf5 or similar files.
+        raw_key: The key that holds the raw data inside of the hdf5 or similar files;
+            for the unsupervised training data. Set to None for tifs.
+        raw_key_supervised: The key that holds the raw data inside of the hdf5 or similar files;
+            for the supervised training data. Set to None for tifs.
         label_key: The key that holds the labels inside of the hdf5 files for supervised learning.
-            This is only required if `supervised_train_paths` and `supervised_val_paths` are given.
+            This is only required if `supervised_train_label_paths` and `supervised_val_label_paths` are given.
+            Set to None for tifs.
         batch_size: The batch size for training.
         lr: The initial learning rate.
         n_iterations: The number of iterations to train for.
         n_samples_train: The number of train samples per epoch. By default this will be estimated
             based on the patch_shape and size of the volumes used for training.
         n_samples_val: The number of val samples per epoch. By default this will be estimated
             based on the patch_shape and size of the volumes used for validation.
-    """
-    assert (supervised_train_paths is None) == (supervised_val_paths is None)
+    """  # noqa
+    assert (supervised_train_image_paths is None) == (supervised_val_image_paths is None)
 
     if source_checkpoint is None:
-        # training from scratch only makes sense if we have supervised training data
+        # Training from scratch only makes sense if we have supervised training data
         # that's why we have the assertion here.
-        assert supervised_train_paths is not None
+        assert supervised_train_image_paths is not None
         model = get_3d_model(out_channels=3)
         reinit_teacher = True
     else:
@@ -174,15 +177,16 @@ def mean_teacher_adaptation(
         unsupervised_val_paths, raw_key, patch_shape, batch_size, n_samples=n_samples_val
     )
 
-    if supervised_train_paths is not None:
-        assert label_key is not None
+    if supervised_train_image_paths is not None:
         supervised_train_loader = get_supervised_loader(
-            supervised_train_paths, raw_key_supervised, label_key,
-            patch_shape, batch_size, n_samples=n_samples_train,
+            supervised_train_image_paths, supervised_train_label_paths,
+            patch_shape=patch_shape, batch_size=batch_size, n_samples=n_samples_train,
+            image_key=raw_key_supervised, label_key=label_key,
         )
         supervised_val_loader = get_supervised_loader(
-            supervised_val_paths, raw_key_supervised, label_key,
-            patch_shape, batch_size, n_samples=n_samples_val,
+            supervised_val_image_paths, supervised_val_label_paths,
+            patch_shape=patch_shape, batch_size=batch_size, n_samples=n_samples_val,
+            image_key=raw_key_supervised, label_key=label_key,
         )
     else:
         supervised_train_loader = None
diff --git a/scripts/training/sgn_domain_adaptation.py b/scripts/training/sgn_domain_adaptation.py
@@ -3,7 +3,7 @@
 
 import torch
 from torch_em.util import load_model
-from flamingo_tools.training.domain_adaptation import mean_teacher_adaptation
+from flamingo_tools.training import mean_teacher_training
 
 
 def get_paths():
@@ -21,7 +21,7 @@ def run_training(name):
     source_checkpoint = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/trained_models/SGN/cochlea_distance_unet_SGN_March2025Model"  # noqa
 
     train_paths, val_paths = get_paths()
-    mean_teacher_adaptation(
+    mean_teacher_training(
         name=name,
         unsupervised_train_paths=train_paths,
         unsupervised_val_paths=val_paths,
diff --git a/scripts/training/train_distance_unet.py b/scripts/training/train_distance_unet.py
@@ -4,7 +4,7 @@
 from glob import glob
 
 import torch_em
-from torch_em.model import UNet3d
+from flamingo_tools.training import get_supervised_loader, get_3d_model
 
 ROOT_CLUSTER = "/scratch-grete/usr/nimcpape/data/moser/lightsheet/training"
 
@@ -67,23 +67,12 @@ def get_loader(root, split, patch_shape, batch_size, filter_empty):
     assert len(this_image_paths) == len(this_label_paths)
     assert len(this_image_paths) > 0
 
-    label_transform = torch_em.transform.label.PerObjectDistanceTransform(
-            distances=True, boundary_distances=True, foreground=True,
-        )
-
     if split == "train":
         n_samples = 250 * batch_size
     elif split == "val":
-        n_samples = 20 * batch_size
-
-    sampler = torch_em.data.sampler.MinInstanceSampler(p_reject=0.8)
-    loader = torch_em.default_segmentation_loader(
-        raw_paths=image_paths, raw_key=None, label_paths=label_paths, label_key=None,
-        batch_size=batch_size, patch_shape=patch_shape, label_transform=label_transform,
-        n_samples=n_samples, num_workers=4, shuffle=True,
-        sampler=sampler
-    )
-    return loader
+        n_samples = 16 * batch_size
+
+    return get_supervised_loader(this_image_paths, this_label_paths, patch_shape, batch_size, n_samples=n_samples)
 
 
 def main():
@@ -120,7 +109,7 @@ def main():
     patch_shape = (64, 128, 128)
 
     # The U-Net.
-    model = UNet3d(in_channels=1, out_channels=3, initial_features=32, final_activation="Sigmoid")
+    model = get_3d_model()
 
     # Create the training loader with train and val set.
     train_loader = get_loader(root, "train", patch_shape, batch_size, filter_empty=filter_empty)
diff --git a/test/test_validation.py b/test/test_validation.py
@@ -0,0 +1,67 @@
+import unittest
+from shutil import rmtree
+
+import imageio.v3 as imageio
+import pandas as pd
+from skimage.measure import regionprops_table
+from skimage.segmentation import relabel_sequential
+
+
+class TestValidation(unittest.TestCase):
+    folder = "./tmp"
+
+    def setUp(self):
+        from flamingo_tools.test_data import get_test_volume_and_segmentation
+
+        _, self.seg_path, _ = get_test_volume_and_segmentation(self.folder)
+
+    def tearDown(self):
+        try:
+            rmtree(self.folder)
+        except Exception:
+            pass
+
+    def test_compute_scores_for_annotated_slice_2d(self):
+        from flamingo_tools.validation import compute_scores_for_annotated_slice
+
+        segmentation = imageio.imread(self.seg_path)
+        segmentation = segmentation[segmentation.shape[0] // 2]
+        segmentation, _, _ = relabel_sequential(segmentation)
+
+        properties = ("label", "centroid")
+        annotations = regionprops_table(segmentation, properties=properties)
+        annotations = pd.DataFrame(annotations).rename(columns={"centroid-0": "axis-0", "centroid-1": "axis-1"})
+        annotations = annotations.drop(columns="label")
+
+        result = compute_scores_for_annotated_slice(segmentation, annotations)
+
+        # Check the results. Note: we actually get 1 FP and 1 FN because 1 of the centroids is outside the object.
+        self.assertEqual(result["fp"], 1)
+        self.assertEqual(result["fn"], 1)
+        self.assertEqual(result["tp"], segmentation.max() - 1)
+
+    def test_compute_scores_for_annotated_slice_3d(self):
+        from flamingo_tools.validation import compute_scores_for_annotated_slice
+
+        segmentation = imageio.imread(self.seg_path)
+        z0, z1 = segmentation.shape[0] // 2 - 2, segmentation.shape[0] // 2 + 2
+        segmentation = segmentation[z0:z1]
+        segmentation, _, _ = relabel_sequential(segmentation)
+
+        properties = ("label", "centroid")
+        annotations = regionprops_table(segmentation, properties=properties)
+        annotations = pd.DataFrame(annotations).rename(
+            columns={"centroid-0": "axis-0", "centroid-1": "axis-1", "centroid-2": "axis-2"}
+        )
+        annotations = annotations.drop(columns="label")
+
+        result = compute_scores_for_annotated_slice(segmentation, annotations)
+
+        # Check the results. Note: we actually get 1 FP and 1 FN because 1 of the centroids is outside the object.
+        self.assertEqual(result["fp"], 1)
+        self.assertEqual(result["fn"], 1)
+        self.assertEqual(result["tp"], segmentation.max() - 1)
+
+
+if __name__ == "__main__":
+    unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .util import get_3d_model, get_supervised_loader`
	`2`	`+from .mean_teacher_training import mean_teacher_training`