Merge pull request #71 from jhlegarreta/RefactorFilteringCode

oesteban · web-flow · commit a4bea6b06d4c · 2025-06-08T09:38:38.000+02:00
REF: Refactor filters into ``filtering``
diff --git a/pyproject.toml b/pyproject.toml
@@ -221,6 +221,7 @@ env = "PYTHONHASHSEED=0"
 markers = [
     "random_gtab_data: Custom marker for random gtab data tests",
     "random_dwi_data: Custom marker for random dwi data tests",
+    "random_uniform_4d_data: Custom marker for random 4d data tests",
 ]
 filterwarnings = [
   "ignore::DeprecationWarning",
diff --git a/src/nifreeze/data/filtering.py b/src/nifreeze/data/filtering.py
@@ -28,8 +28,12 @@
 from scipy.ndimage import median_filter
 from skimage.morphology import ball
 
+from nifreeze.data.dmri import DEFAULT_CLIP_PERCENTILE
+
 DEFAULT_DTYPE = "int16"
 """The default image's data type."""
+BVAL_ATOL = 100.0
+"""b-value tolerance value."""
 
 
 def advanced_clip(
@@ -96,3 +100,161 @@ def advanced_clip(
         data = np.round(255 * data).astype(dtype)
 
     return data
+
+
+def robust_minmax_normalization(
+    data: np.ndarray,
+    mask: np.ndarray | None = None,
+    p_min: float = 5.0,
+    p_max: float = 95.0,
+    inplace: bool = False,
+) -> np.ndarray | None:
+    r"""Normalize min-max percentiles of each volume to the grand min-max
+    percentiles.
+
+    Robust min/max normalization of the volumes in the dataset following:
+
+    .. math::
+        \text{data}_{\text{normalized}} = \frac{(\text{data} - p_{min}) \cdot p_{\text{mean}}}{p_{\text{range}}} + p_{min}^{\text{mean}}
+
+    where
+
+    .. math::
+        p_{\text{range}} = p_{max} - p_{min}, \quad p_{\text{mean}} = \frac{1}{N} \sum_{i=1}^N p_{\text{range}_i}, \quad p_{min}^{\text{mean}} = \frac{1}{N} \sum_{i=1}^N p_{5_i}
+
+    If a mask is provided, only the data within the mask are considered.
+
+    Parameters
+    ----------
+    data : :obj:`~numpy.ndarray`
+        Data to be normalized.
+    mask : :obj:`~numpy.ndarray`, optional
+        Mask. If provided, only the data within the mask are considered.
+    p_min : :obj:`float`, optional
+        The lower percentile value for normalization.
+    p_max : :obj:`float`, optional
+        The upper percentile value for normalization.
+    inplace : :obj:`bool`, optional
+        If ``False``, the normalization is performed on the original data.
+
+    Returns
+    -------
+    data : :obj:`~numpy.ndarray` or None
+        Normalized data or ``None`` if ``inplace`` is ``True``.
+    """
+
+    normalized = data if inplace else data.copy()
+
+    mask = mask if mask is not None else np.ones(data.shape[-1], dtype=bool)
+    volumes = data[..., mask]
+    reshape_shape = (-1, volumes.shape[-1]) if mask is None else (-1, sum(mask))
+    reshaped_data = volumes.reshape(reshape_shape)
+    p5 = np.percentile(reshaped_data, p_min, axis=0)
+    p95 = np.percentile(reshaped_data, p_max, axis=0) - p5
+    normalized[..., mask] = (volumes - p5) * p95.mean() / p95 + p5.mean()
+
+    if inplace:
+        return None
+
+    return normalized
+
+
+def grand_mean_normalization(
+    data: np.ndarray,
+    mask: np.ndarray | None = None,
+    center: float = DEFAULT_CLIP_PERCENTILE,
+    inplace: bool = False,
+) -> np.ndarray | None:
+    """Robust grand mean normalization.
+
+    Regresses out global signal differences so that data are normalized and
+    centered around a given value.
+
+    If a mask is provided, only the data within the mask are considered.
+
+    Parameters
+    ----------
+    data : :obj:`~numpy.ndarray`
+        Data to be normalized.
+    mask : :obj:`~numpy.ndarray`, optional
+        Mask. If provided, only the data within the mask are considered.
+    center : float, optional
+        Central value around which to normalize the data.
+    inplace : :obj:`bool`, optional
+        If ``False``, the normalization is performed on the original data.
+
+    Returns
+    -------
+    data : :obj:`~numpy.ndarray` or None
+        Normalized data or ``None`` if ``inplace`` is ``True``.
+    """
+
+    normalized = data if inplace else data.copy()
+
+    mask = mask if mask is not None else np.ones(data.shape[-1], dtype=bool)
+    volumes = data[..., mask]
+
+    centers = np.median(volumes, axis=(0, 1, 2))
+    reference = np.percentile(centers[centers >= 1.0], center)
+    centers[centers < 1.0] = reference
+    drift = reference / centers
+    normalized[..., mask] = volumes * drift
+
+    if inplace:
+        return None
+
+    return normalized
+
+
+def dwi_select_shells(
+    gradients: np.ndarray,
+    index: int,
+    atol_low: float | None = None,
+    atol_high: float | None = None,
+) -> np.ndarray:
+    """Select DWI shells around the given index and lower and upper b-value
+    bounds.
+
+    Computes a boolean mask of the DWI shells around the given index with the
+    provided lower and upper bound b-values.
+
+    If ``atol_low`` and ``atol_high`` are both ``None``, the returned shell mask
+    corresponds to the lengths of the diffusion-sensitizing gradients.
+
+    Parameters
+    ----------
+    gradients : :obj:`~numpy.ndarray`
+        Gradients.
+    index : :obj:`int`
+        Index of the shell data.
+    atol_low : :obj:`float`, optional
+        A lower bound for the b-value.
+    atol_high : :obj:`float`, optional
+        An upper bound for the b-value.
+
+    Returns
+    -------
+    shellmask : :obj:`~numpy.ndarray`
+        Shell mask.
+    """
+
+    bvalues = gradients[:, -1]
+    bcenter = bvalues[index]
+
+    shellmask = np.ones(len(bvalues), dtype=bool)
+    shellmask[index] = False  # Drop the held-out index
+
+    if atol_low is None and atol_high is None:
+        return shellmask
+
+    atol_low = 0 if atol_low is None else atol_low
+    atol_high = gradients[:, -1].max() if atol_high is None else atol_high
+
+    # Keep only b-values within the range defined by atol_high and atol_low
+    shellmask[bvalues > (bcenter + atol_high)] = False
+    shellmask[bvalues < (bcenter - atol_low)] = False
+
+    if not shellmask.sum():
+        raise RuntimeError(f"Shell corresponding to index {index} (b={bcenter}) is empty.")
+
+    return shellmask
diff --git a/src/nifreeze/model/dmri.py b/src/nifreeze/model/dmri.py
@@ -27,11 +27,8 @@
 from dipy.core.gradients import gradient_table_from_bvals_bvecs
 from joblib import Parallel, delayed
 
-from nifreeze.data.dmri import (
-    DEFAULT_CLIP_PERCENTILE,
-    DTI_MIN_ORIENTATIONS,
-    DWI,
-)
+from nifreeze.data.dmri import DTI_MIN_ORIENTATIONS, DWI
+from nifreeze.data.filtering import BVAL_ATOL, dwi_select_shells, grand_mean_normalization
 from nifreeze.model.base import BaseModel, ExpectationModel
 
 S0_EPSILON = 1e-6
@@ -215,14 +212,14 @@ def fit_predict(self, index: int | None = None, **kwargs):
 class AverageDWIModel(ExpectationModel):
     """A trivial model that returns an average DWI volume."""
 
-    __slots__ = ("_th_low", "_th_high", "_detrend")
+    __slots__ = ("_atol_low", "_atol_high", "_detrend")
 
     def __init__(
         self,
         dataset: DWI,
         stat: str = "median",
-        th_low: float = 100.0,
-        th_high: float = 100.0,
+        atol_low: float = BVAL_ATOL,
+        atol_high: float = BVAL_ATOL,
         detrend: bool = False,
         **kwargs,
     ):
@@ -235,10 +232,10 @@ def __init__(
             Reference to a DWI object.
         stat : :obj:`str`, optional
             Whether the summary statistic to apply is ``"mean"`` or ``"median"``.
-        th_low : :obj:`float`, optional
+        atol_low : :obj:`float`, optional
             A lower bound for the b-value corresponding to the diffusion weighted images
             that will be averaged.
-        th_high : :obj:`float`, optional
+        atol_low : :obj:`float`, optional
             An upper bound for the b-value corresponding to the diffusion weighted images
             that will be averaged.
         detrend : :obj:`bool`, optional
@@ -249,8 +246,8 @@ def __init__(
         """
         super().__init__(dataset, stat=stat, **kwargs)
 
-        self._th_low = th_low
-        self._th_high = th_high
+        self._atol_low = atol_low
+        self._atol_high = atol_high
         self._detrend = detrend
 
     def fit_predict(self, index: int | None = None, *_, **kwargs):
@@ -259,31 +256,22 @@ def fit_predict(self, index: int | None = None, *_, **kwargs):
         if index is None:
             raise RuntimeError(f"Model {self.__class__.__name__} does not allow locking.")
 
-        bvalues = self._dataset.gradients[:, -1]
-        bcenter = bvalues[index]
-
-        shellmask = np.ones(len(self._dataset), dtype=bool)
-
-        # Keep only bvalues within the range defined by th_high and th_low
-        shellmask[index] = False
-        shellmask[bvalues > (bcenter + self._th_high)] = False
-        shellmask[bvalues < (bcenter - self._th_low)] = False
-
-        if not shellmask.sum():
-            raise RuntimeError(f"Shell corresponding to index {index} (b={bcenter}) is empty.")
+        shellmask = dwi_select_shells(
+            self._dataset.gradients,
+            index,
+            atol_low=self._atol_low,
+            atol_high=self._atol_high,
+        )
 
         shelldata = self._dataset.dataobj[..., shellmask]
 
         # Regress out global signal differences
         if self._detrend:
-            centers = np.median(shelldata, axis=(0, 1, 2))
-            reference = np.percentile(centers[centers >= 1.0], DEFAULT_CLIP_PERCENTILE)
-            centers[centers < 1.0] = reference
-            drift = reference / centers
-            shelldata = shelldata * drift
+            shelldata = grand_mean_normalization(shelldata, mask=None)
 
         # Select the summary statistic
         avg_func = np.median if self._stat == "median" else np.mean
+
         # Calculate the average
         return avg_func(shelldata, axis=-1)
 
diff --git a/test/conftest.py b/test/conftest.py
@@ -171,7 +171,7 @@ def random_number_generator(request):
 @pytest.fixture(autouse=True)
 def setup_random_uniform_4d_data(request):
     """Automatically generate random data for tests."""
-    marker = request.node.get_closest_marker("random_uniform_4d_data_generator")
+    marker = request.node.get_closest_marker("random_uniform_4d_data")
 
     size = (32, 32, 32, 5)
     a = 0.0
@@ -187,15 +187,25 @@ def setup_random_uniform_4d_data(request):
 def _generate_random_choices(request, values, count):
     rng = request.node.rng
 
+    values = set(values)
+
     num_elements = len(values)
 
-    # Randomly distribute N among the given values
-    partitions = rng.multinomial(count, np.ones(num_elements) / num_elements)
+    if count < num_elements:
+        raise ValueError(
+            f"Count must be at least the number of unique values to guarantee inclusion\nProvided: {count} and {values}."
+        )
+
+    # Start by assigning one of each value
+    selected_values = list(values)
+
+    # Distribute remaining count: randomly distribute N among the values
+    remaining = count - num_elements
+    partitions = rng.multinomial(remaining, np.ones(num_elements) / num_elements)
 
-    # Create a list of selected values
-    selected_values = [
-        val for val, count in zip(values, partitions, strict=True) for _ in range(count)
-    ]
+    # Add the remaining values according to the partitions
+    for val, extra_count in zip(values, partitions, strict=True):
+        selected_values.extend([val] * extra_count)
 
     return sorted(selected_values)
 
diff --git a/test/test_filtering.py b/test/test_filtering.py
diff --git a/test/test_model.py b/test/test_model.py

Original file line number	Diff line number	Diff line change
`@@ -221,6 +221,7 @@ env = "PYTHONHASHSEED=0"`
`221`	`221`	`markers = [`
`222`	`222`	`"random_gtab_data: Custom marker for random gtab data tests",`
`223`	`223`	`"random_dwi_data: Custom marker for random dwi data tests",`
	`224`	`+ "random_uniform_4d_data: Custom marker for random 4d data tests",`
`224`	`225`	`]`
`225`	`226`	`filterwarnings = [`
`226`	`227`	`"ignore::DeprecationWarning",`