CAREamics
diff --git a/‎src/careamics/config/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎src/careamics/config/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/careamics/config/architectures/unet_config.py‎
Lines changed: 3 additions & 0 deletions b/‎src/careamics/config/architectures/unet_config.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/careamics/config/configuration_factories.py‎
Lines changed: 1 addition & 94 deletions b/‎src/careamics/config/configuration_factories.py‎
Lines changed: 1 addition & 94 deletions
diff --git a/‎src/careamics/config/data/ng_data_config.py‎
Lines changed: 40 additions & 17 deletions b/‎src/careamics/config/data/ng_data_config.py‎
Lines changed: 40 additions & 17 deletions
diff --git a/‎src/careamics/config/lightning/training_config.py‎
Lines changed: 1 addition & 0 deletions b/‎src/careamics/config/lightning/training_config.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/careamics/config/ng_configs/__init__.py‎
Lines changed: 5 additions & 0 deletions b/‎src/careamics/config/ng_configs/__init__.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/careamics/config/ng_configs/n2v_configuration.py‎
Lines changed: 64 additions & 0 deletions b/‎src/careamics/config/ng_configs/n2v_configuration.py‎
Lines changed: 64 additions & 0 deletions
@@ -51,14 +51,14 @@
     create_microsplit_configuration,
     create_n2n_configuration,
     create_n2v_configuration,
-    create_ng_data_configuration,
     create_pn2v_configuration,
 )
 from .data import DataConfig, NGDataConfig
 from .data.inference_config import InferenceConfig
 from .lightning.callbacks import CheckpointConfig
 from .lightning.training_config import TrainingConfig
 from .losses.loss_config import LVAELossConfig
+from .ng_factories.data_factory import create_ng_data_configuration
 from .noise_model import (
     GaussianMixtureNMConfig,
     MultiChannelNMConfig,
 
@@ -114,6 +114,9 @@ def is_3D(self) -> bool:
         """
         Return whether the model is 3D or not.
 
+        This method is used in the NG configuration validation to check that the model
+        dimensions match the data dimensions.
+
         Returns
         -------
         bool
 
@@ -13,7 +13,7 @@
     PN2VAlgorithm,
 )
 from careamics.config.architectures import LVAEConfig, UNetConfig
-from careamics.config.data import DataConfig, NGDataConfig
+from careamics.config.data import DataConfig
 from careamics.config.lightning.training_config import TrainingConfig
 from careamics.config.losses.loss_config import LVAELossConfig
 from careamics.config.noise_model.likelihood_config import (
@@ -357,99 +357,6 @@ def _create_microsplit_data_configuration(
     return MicroSplitDataConfig(**data)
 
 
-def create_ng_data_configuration(
-    data_type: Literal["array", "tiff", "zarr", "czi", "custom"],
-    axes: str,
-    patch_size: Sequence[int],
-    batch_size: int,
-    augmentations: list[SPATIAL_TRANSFORMS_UNION] | None = None,
-    channels: Sequence[int] | None = None,
-    in_memory: bool | None = None,
-    train_dataloader_params: dict[str, Any] | None = None,
-    val_dataloader_params: dict[str, Any] | None = None,
-    pred_dataloader_params: dict[str, Any] | None = None,
-    seed: int | None = None,
-) -> NGDataConfig:
-    """
-    Create a training NGDatasetConfig.
-
-    Parameters
-    ----------
-    data_type : {"array", "tiff", "zarr", "czi", "custom"}
-        Type of the data.
-    axes : str
-        Axes of the data.
-    patch_size : list of int
-        Size of the patches along the spatial dimensions.
-    batch_size : int
-        Batch size.
-    augmentations : list of transforms
-        List of transforms to apply.
-    channels : Sequence of int, default=None
-        List of channels to use. If `None`, all channels are used.
-    in_memory : bool, default=None
-        Whether to load all data into memory. This is only supported for 'array',
-        'tiff' and 'custom' data types. If `None`, defaults to `True` for 'array',
-        'tiff' and `custom`, and `False` for 'zarr' and 'czi' data types. Must be `True`
-        for `array`.
-    augmentations : list of transforms or None, default=None
-        List of transforms to apply. If `None`, default augmentations are applied
-        (flip in X and Y, rotations by 90 degrees in the XY plane).
-    train_dataloader_params : dict
-        Parameters for the training dataloader, see PyTorch notes, by default None.
-    val_dataloader_params : dict
-        Parameters for the validation dataloader, see PyTorch notes, by default None.
-    pred_dataloader_params : dict
-        Parameters for the test dataloader, see PyTorch notes, by default None.
-    seed : int, default=None
-        Random seed for reproducibility. If `None`, no seed is set.
-
-    Returns
-    -------
-    NGDataConfig
-        Next-Generation Data model with the specified parameters.
-    """
-    if augmentations is None:
-        augmentations = _list_spatial_augmentations()
-
-    # data model
-    data: dict[str, Any] = {
-        "mode": "training",
-        "data_type": data_type,
-        "axes": axes,
-        "batch_size": batch_size,
-        "channels": channels,
-        "transforms": augmentations,
-        "seed": seed,
-    }
-
-    if in_memory is not None:
-        data["in_memory"] = in_memory
-
-    # don't override defaults set in DataConfig class
-    if train_dataloader_params is not None:
-        # the presence of `shuffle` key in the dataloader parameters is enforced
-        # by the NGDataConfig class
-        if "shuffle" not in train_dataloader_params:
-            train_dataloader_params["shuffle"] = True
-
-        data["train_dataloader_params"] = train_dataloader_params
-
-    if val_dataloader_params is not None:
-        data["val_dataloader_params"] = val_dataloader_params
-
-    if pred_dataloader_params is not None:
-        data["pred_dataloader_params"] = pred_dataloader_params
-
-    # add training patching
-    data["patching"] = {
-        "name": "random",
-        "patch_size": patch_size,
-    }
-
-    return NGDataConfig(**data)
-
-
 def _create_training_configuration(
     trainer_params: dict,
     logger: Literal["wandb", "tensorboard", "none"],
 
@@ -554,24 +554,26 @@ def validate_dimensions(self: Self) -> Self:
         ValueError
             If the patch size dimension is not compatible with the axes.
         """
-        if "Z" in self.axes:
-            if (
-                hasattr(self.patching, "patch_size")
-                and len(self.patching.patch_size) != 3
-            ):
-                raise ValueError(
-                    f"`patch_size` in `patching` must have 3 dimensions if the data is"
-                    f" 3D, got axes {self.axes})."
-                )
+        # "whole" patching does not have dimensions to validate
+        if not hasattr(self.patching, "patch_size"):
+            return self
+
+        if self.data_type == "czi":
+            # Z and T are both depth axes for CZI data
+            expected_dims = 3 if ("Z" in self.axes or "T" in self.axes) else 2
+            additional_message = " (`Z` and `T` are depth axes for CZI data)"
         else:
-            if (
-                hasattr(self.patching, "patch_size")
-                and len(self.patching.patch_size) != 2
-            ):
-                raise ValueError(
-                    f"`patch_size` in `patching` must have 2 dimensions if the data is"
-                    f" 3D, got axes {self.axes})."
-                )
+            expected_dims = 3 if "Z" in self.axes else 2
+            additional_message = ""
+
+        # infer dimension from requested patch size
+        actual_dims = len(self.patching.patch_size)
+        if actual_dims != expected_dims:
+            raise ValueError(
+                f"`patch_size` in `patching` must have {expected_dims} dimensions, "
+                f"got {self.patching.patch_size} with axes {self.axes}"
+                f"{additional_message}."
+            )
 
         return self
 
@@ -780,6 +782,27 @@ def set_means_and_stds(
             target_stds=target_stds,
         )
 
+    def is_3D(self) -> bool:
+        """
+        Check if the data is 3D based on the axes.
+
+        Either "Z" is in the axes and patching `patch_size` has 3 dimensions, or for CZI
+        data, "Z" is in the axes or "T" is in the axes and patching `patch_size` has
+        3 dimensions.
+
+        This method is used during NGConfiguration validation to cross checks dimensions
+        with the algorithm configuration.
+
+        Returns
+        -------
+        bool
+            True if the data is 3D, False otherwise.
+        """
+        if self.data_type == "czi":
+            return "Z" in self.axes or "T" in self.axes
+        else:
+            return "Z" in self.axes
+
     # TODO: if switching from a state in which in_memory=True to an incompatible state
     # an error will be raised. Should that automatically be set to False instead?
     # TODO `channels=None` is ambigouous: all channels or same channels as in training?
 
@@ -29,6 +29,7 @@ class TrainingConfig(BaseModel):
     model_config = ConfigDict(
         validate_assignment=True,
     )
+
     lightning_trainer_config: dict | None = None
     """Configuration for the PyTorch Lightning Trainer, following PyTorch Lightning
     Trainer class"""
 
@@ -0,0 +1,5 @@
+"""Definitions of configurations for CAREamics, compatible with the NG dataset."""
+
+__all__ = ["N2VConfiguration"]
+
+from .n2v_configuration import N2VConfiguration
@@ -0,0 +1,64 @@
+"""Configuration for N2V."""
+
+from typing import Self
+
+import numpy as np
+from pydantic import model_validator
+
+from careamics.config.algorithms import N2VAlgorithm
+from careamics.config.data.patching_strategies import RandomPatchingConfig
+
+from .ng_configuration import NGConfiguration
+
+
+class N2VConfiguration(NGConfiguration):
+    """N2V-specific configuration."""
+
+    algorithm_config: N2VAlgorithm
+
+    @model_validator(mode="after")
+    def validate_n2v_mask_pixel_perc(self: Self) -> Self:
+        """
+        Validate that there will always be at least one blind-spot pixel in every patch.
+
+        The probability of creating a blind-spot pixel is a function of the chosen
+        masked pixel percentage and patch size.
+
+        Returns
+        -------
+        Self
+            Validated configuration.
+
+        Raises
+        ------
+        ValueError
+            If the probability of masking a pixel within a patch is less than 1 for the
+            chosen masked pixel percentage and patch size.
+        """
+        if self.data_config.mode == "training":
+            assert isinstance(self.data_config.patching, RandomPatchingConfig)
+
+            mask_pixel_perc = self.algorithm_config.n2v_config.masked_pixel_percentage
+            patch_size = self.data_config.patching.patch_size
+            expected_area_per_pixel = 1 / (mask_pixel_perc / 100)
+
+            n_dims = 3 if self.algorithm_config.model.is_3D() else 2
+            patch_size_lower_bound = int(
+                np.ceil(expected_area_per_pixel ** (1 / n_dims))
+            )
+            required_patch_size = tuple(
+                2 ** int(np.ceil(np.log2(patch_size_lower_bound)))
+                for _ in range(n_dims)
+            )
+            required_mask_pixel_perc = (1 / np.prod(patch_size)) * 100
+
+            if expected_area_per_pixel > np.prod(patch_size):
+                raise ValueError(
+                    "The probability of creating a blind-spot pixel within a patch is "
+                    f"below 1, for a patch size of {patch_size} with a masked pixel "
+                    f"percentage of {mask_pixel_perc}%. Either increase the patch size "
+                    f"to {required_patch_size} or increase the masked pixel percentage "
+                    f"to at least {required_mask_pixel_perc}%."
+                )
+
+        return self
Original file line number	Diff line number	Diff line change
`@@ -29,6 +29,7 @@ class TrainingConfig(BaseModel):`
`29`	`29`	`model_config = ConfigDict(`
`30`	`30`	`validate_assignment=True,`
`31`	`31`	`)`
	`32`	`+`
`32`	`33`	`lightning_trainer_config: dict \| None = None`
`33`	`34`	`"""Configuration for the PyTorch Lightning Trainer, following PyTorch Lightning`
`34`	`35`	`Trainer class"""`