Update the SynapseNet trainign CLI

constantinpape · constantinpape · commit 480d71465d9c · 2025-07-11T09:45:54.000+02:00
diff --git a/doc/start_page.md b/doc/start_page.md
@@ -147,10 +147,12 @@ For more options supported by the IMOD exports, please run `synapse_net.export_t
 
 > Note: to use these commands you have to install IMOD.
 
+SynapseNet also provides two CLI comamnds for training models, one for supervised network training (see [Supervised Training](#supervised-training) for details) and one for domain adaptation (see [Domain Adaptation](#domain-adaptation) for details).
+
 
 ## Python Library
 
-Using the `synapse_net` python library offers the most flexibility for using the SynapseNet functionality.
+Using the `synapse_net` python library offers the most flexibility for using SynapseNet's functionality.
 You can find an example analysis pipeline implemented with SynapseNet [here](https://github.com/computational-cell-analytics/synapse-net/blob/main/examples/analysis_pipeline.py).
 
 We offer different functionality for segmenting and analyzing synapses in electron microscopy:
@@ -161,17 +163,32 @@ We offer different functionality for segmenting and analyzing synapses in electr
 
 Please refer to the module documentation below for a full overview of our library's functionality.
 
+### Supervised Training
+
+SynapseNet provides functionality for training a UNet for segmentation tasks using supervised learning.
+In this case, you have to provide data **and** (manual) annotations for the structure(s) you want to segment.
+This functionality is implemented in `synapse_net.training.supervised_training`. You can find an example script that shows how to use it [here](https://github.com/computational-cell-analytics/synapse-net/blob/main/examples/network_training.py).
+
+We also provide a command line function to run supervised training: `synapse_net.run_supervised_training`. Run
+```bash
+synapse_net.run_supervised_training -h
+```
+for more information and instructions on how to use it.
+
 ### Domain Adaptation
 
-We provide functionality for domain adaptation. It implements a special form of neural network training that can improve segmentation for data from a different condition (e.g. different sample preparation, electron microscopy technique or different specimen), **without requiring additional annotated structures**.
+SynapseNet provides functionality for (unsupervised) domain adaptation.
+This functionality is implemented through a student-teacher training approach that can improve segmentation for data from a different condition (for example different sample preparation, imaging technique, or different specimen), **without requiring additional annotated structures**.
 Domain adaptation is implemented in `synapse_net.training.domain_adaptation`. You can find an example script that shows how to use it [here](https://github.com/computational-cell-analytics/synapse-net/blob/main/examples/domain_adaptation.py).
 
-> Note: Domain adaptation only works if the initial model you adapt already finds some of the structures in the data from a new condition. If it does not work you will have to train a network on annotated data.
+We also provide a command line function to run domain adaptation: `synapse_net.run_domain_adaptation`. Run
+```bash
+synapse_net.run_domain_adaptation -h
+```
+for more information and instructions on how to use it.
 
-### Network Training
+> Note: Domain adaptation only works if the initial model already finds some of the structures in the data from a new condition. If it does not work you will have to train a network on annotated data.
 
-We also provide functionality for 'regular' neural network training. In this case, you have to provide data **and** manual annotations for the structure(s) you want to segment.
-This functionality is implemented in `synapse_net.training.supervised_training`. You can find an example script that shows how to use it [here](https://github.com/computational-cell-analytics/synapse-net/blob/main/examples/network_training.py).
 
 ## Segmentation for the CryoET Data Portal
 
diff --git a/synapse_net/training/domain_adaptation.py b/synapse_net/training/domain_adaptation.py
@@ -12,7 +12,9 @@
 from sklearn.model_selection import train_test_split
 
 from .semisupervised_training import get_unsupervised_loader
-from .supervised_training import get_2d_model, get_3d_model, get_supervised_loader, _determine_ndim
+from .supervised_training import (
+    get_2d_model, get_3d_model, get_supervised_loader, _determine_ndim, _derive_key_from_files
+)
 from ..inference.inference import get_model_path, compute_scale_from_voxel_size
 from ..inference.util import _Scaler
 
@@ -166,13 +168,11 @@ def mean_teacher_adaptation(
 """
 
 
-def _get_paths(input_folder, pattern, resize_training_data, model_name, tmp_dir):
+def _get_paths(input_folder, pattern, resize_training_data, model_name, tmp_dir, val_fraction):
     files = sorted(glob(os.path.join(input_folder, "**", pattern), recursive=True))
     if len(files) == 0:
         raise ValueError(f"Could not load any files from {input_folder} with pattern {pattern}")
 
-    val_fraction = 0.15
-
     # Heuristic: if we have less then 4 files then we crop a part of the volumes for validation.
     # And resave the volumes.
     resave_val_crops = len(files) < 4
@@ -235,30 +235,61 @@ def main():
     import argparse
 
     parser = argparse.ArgumentParser(
-        description=""
+        description="Adapt a model to data from a different domain using unsupervised domain adaptation.\n\n"
+        "You can use this function to adapt the SynapseNet model for vesicle segmentation like this:\n"
+        "synapse_net.run_domain_adaptation -n adapted_model -i /path/to/data --file_pattern *.mrc --source_model vesicles_3d\n"  # noqa
+        "The trained model will be saved in the folder 'checkpoints/adapted_model' (or whichever name you pass to the '-n' argument)."  # noqa
+        "You can then use this model for segmentation with the SynapseNet GUI or CLI. "
+        "Check out the information below for details on the arguments of this function."
+    )
+    parser.add_argument("--name", "-n", required=True, help="The name of the model to be trained. ")
+    parser.add_argument("--input_folder", "-i", required=True, help="The folder with the training data.")
+    parser.add_argument("--file_pattern", default="*",
+                        help="The pattern for selecting files for training. For example '*.mrc' to select mrc files.")
+    parser.add_argument("--key", help="The internal file path for the training data. Will be derived from the file extension by default.")  # noqa
+    parser.add_argument(
+        "--source_model",
+        default="vesicles_3d",
+        help="The source model used for weight initialization of teacher and student model. "
+        "By default the model 'vesicles_3d' for vesicle segmentation in volumetric data is used."
+    )
+    parser.add_argument(
+        "--resize_training_data", action="store_true",
+        help="Whether to resize the training data to fit the voxel size of the source model's trainign data."
     )
-    parser.add_argument("--name", "-n", required=True)
-    parser.add_argument("--input", "-i", required=True)
-    parser.add_argument("--pattern", "-p", default="*.mrc")
-    parser.add_argument("--source_model", default="vesicles_3d")
-    parser.add_argument("--resize_training_data", action="store_true")
-    parser.add_argument("--n_iterations", type=int, default=int(1e4))
-    parser.add_argument("--patch_shape", nargs="+", type=int)
+    parser.add_argument("--n_iterations", type=int, default=int(1e4), help="The number of iterations for training.")
+    parser.add_argument(
+        "--patch_shape", nargs=3, type=int,
+        help="The patch shape for training. By default the patch shape the source model was trained with is used."
+    )
+
+    # More optional argument:
+    parser.add_argument("--batch_size", type=int, default=1, help="The batch size for training.")
+    parser.add_argument("--n_samples_train", type=int, help="The number of samples per epoch for training. If not given will be derived from the data size.")  # noqa
+    parser.add_argument("--n_samples_val", type=int, help="The number of samples per epoch for validation. If not given will be derived from the data size.")  # noqa
+    parser.add_argument("--val_fraction", type=float, default=0.15, help="The fraction of the data to use for validation. This has no effect if 'val_folder' and 'val_label_folder' were passed.")  # noqa
+    parser.add_argument("--check", action="store_true", help="Visualize samples from the data loaders to ensure correct data instead of running training.")  # noqa
+
     args = parser.parse_args()
 
     source_checkpoint = get_model_path(args.source_model)
     patch_shape = _parse_patch_shape(args.patch_shape, args.source_model)
     with tempfile.TemporaryDirectory() as tmp_dir:
         unsupervised_train_paths, unsupervised_val_paths = _get_paths(
-            args.input, args.pattern, args.resize_training_data, args.source_model, tmp_dir
+            args.input, args.pattern, args.resize_training_data, args.source_model, tmp_dir, args.val_fraction,
         )
+        unsupervised_train_paths, raw_key = _derive_key_from_files(unsupervised_train_paths, args.key)
 
         mean_teacher_adaptation(
             name=args.name,
             unsupervised_train_paths=unsupervised_train_paths,
             unsupervised_val_paths=unsupervised_val_paths,
             patch_shape=patch_shape,
             source_checkpoint=source_checkpoint,
-            raw_key="data",
+            raw_key=raw_key,
             n_iterations=args.n_iterations,
+            batch_size=args.batch_size,
+            n_samples_train=args.n_samples_train,
+            n_samples_val=args.n_samples_val,
+            check=args.check,
         )
diff --git a/synapse_net/training/supervised_training.py b/synapse_net/training/supervised_training.py
@@ -306,8 +306,7 @@ def supervised_training(
     trainer.fit(n_iterations)
 
 
-def _parse_input_folder(folder, pattern, key):
-    files = sorted(glob(os.path.join(folder, "**", pattern), recursive=True))
+def _derive_key_from_files(files, key):
     # Get all file extensions (general wild-cards may pick up files with multiple extensions).
     extensions = list(set([os.path.splitext(ff)[1] for ff in files]))
 
@@ -325,7 +324,7 @@ def _parse_input_folder(folder, pattern, key):
     # If the key is None and can't be derived raise an error.
     elif key is None and ext not in extension_to_key:
         raise ValueError(
-            f"You have not passed a key for the data in {folder}, but the key could not be derived for{ext} format."
+            f"You have not passed a key for the data in {ext} format, for which the key cannot be derived."
         )
     # If the key was passed and doesn't match the extension raise an error.
     elif key is not None and ext in extension_to_key and key != extension_to_key[ext]:
@@ -335,6 +334,11 @@ def _parse_input_folder(folder, pattern, key):
     return files, key
 
 
+def _parse_input_folder(folder, pattern, key):
+    files = sorted(glob(os.path.join(folder, "**", pattern), recursive=True))
+    return _derive_key_from_files(files, key)
+
+
 def _parse_input_files(args):
     train_image_paths, raw_key = _parse_input_folder(args.train_folder, args.image_file_pattern, args.raw_key)
     train_label_paths, label_key = _parse_input_folder(args.label_folder, args.label_file_pattern, args.label_key)
@@ -366,7 +370,12 @@ def main():
     import argparse
 
     parser = argparse.ArgumentParser(
-        description="Train a model for foreground and boundary segmentation via supervised learning."
+        description="Train a model for foreground and boundary segmentation via supervised learning.\n\n"
+        "You can use this function to train a model for vesicle segmentation, or another segmentation task, like this:\n"  # noqa
+        "synapse_net.run_supervised_training -n my_model -i /path/to/images -l /path/to/labels --patch_shape 32 192 192\n"  # noqa
+        "The trained model will be saved in the folder 'checkpoints/my_model' (or whichever name you pass to the '-n' argument)."  # noqa
+        "You can then use this model for segmentation with the SynapseNet GUI or CLI. "
+        "Check out the information below for details on the arguments of this function."
     )
     parser.add_argument("-n", "--name", required=True, help="The name of the model to be trained.")
     parser.add_argument("-p", "--patch_shape", nargs=3, type=int, help="The patch shape for training.")