Merge pull request #2 from oesteban/rf/dwi-data-pr

jhlegarreta · web-flow · commit ee02d74d2752 · 2025-11-20T14:53:48.000+01:00
rf: overhaul suggested by #327's title
diff --git a/src/nifreeze/data/dmri.py b/src/nifreeze/data/dmri.py
@@ -63,18 +63,20 @@
 DTI_MIN_ORIENTATIONS = 6
 """Minimum number of nonzero b-values in a DWI dataset."""
 
-GRADIENT_VOLUME_DIMENSIONALITY_MISMATCH_MISSING_ERROR = "Gradient table shape does not match the number of diffusion volumes: expected {n_volumes} rows, found {n_gradients}."
+GRADIENT_VOLUME_DIMENSIONALITY_MISMATCH_MISSING_ERROR = """\
+Gradient table shape does not match the number of diffusion volumes: \
+expected {n_volumes} rows, found {n_gradients}."""
 """dMRI volume count vs. gradient count mismatch error message."""
 
-GRADIENT_BVAL_BVEC_PRIORITY_WARN_MSG = "Both a gradients table file and b-vec/val files are defined; ignoring b-vec/val files in favor of the gradients_file."
+GRADIENT_BVAL_BVEC_PRIORITY_WARN_MSG = """\
+Both a gradients table file and b-vec/val files are defined; \
+ignoring b-vec/val files in favor of the gradients_file."""
 """"dMRI gradient file priority warning message."""
 
 GRADIENT_NDIM_ERROR_MSG = "Gradient table must be a 2D array"
 """dMRI gradient dimensionality error message."""
 
-GRADIENT_DATA_MISSING_ERROR = (
-    "No gradient data provided. Please specify either a gradients_file or (bvec_file & bval_file)."
-)
+GRADIENT_DATA_MISSING_ERROR = "No gradient data provided."
 """dMRI missing gradient data error message."""
 
 GRADIENT_EXPECTED_COLUMNS_ERROR_MSG = (
@@ -83,51 +85,115 @@
 """dMRI gradient expected columns error message."""
 
 
+def format_gradients(
+    value: npt.ArrayLike | None,
+) -> np.ndarray | None:
+    """
+    Validate and orient gradient tables to a consistent shape.
+
+    Examples
+    --------
+    Passing an already well-formed table returns the data unchanged::
+
+        >>> format_gradients(
+        ...     [
+        ...         [1, 0, 0, 0],
+        ...         [0, 1, 0, 1000],
+        ...         [0, 0, 1, 2000],
+        ...         [0, 0, 0, 0],
+        ...         [0, 0, 0, 1000],
+        ...     ]
+        ... )
+        array([[   1,    0,    0,    0],
+               [   0,    1,    0, 1000],
+               [   0,    0,    1, 2000],
+               [   0,    0,    0,    0],
+               [   0,    0,    0, 1000]])
+
+    Column-major inputs are automatically transposed when an expected
+    number of diffusion volumes is provided::
+
+        >>> format_gradients(
+        ...     [[1, 0], [0, 1], [0, 0], [1000, 2000]],
+        ... )
+        array([[   1,    0,    0, 1000],
+               [   0,    1,    0, 2000]])
+
+    Gradient tables must always have two dimensions::
+
+        >>> format_gradients([0, 1, 0, 1000])
+        Traceback (most recent call last):
+        ...
+        ValueError: Gradient table must be a 2D array
+
+    """
+
+    formatted = np.asarray(value)
+    if formatted.ndim != 2:
+        raise ValueError(GRADIENT_NDIM_ERROR_MSG)
+
+    # Transpose if column-major
+    return formatted.T if formatted.shape[0] == 4 and formatted.shape[1] != 4 else formatted
+
+
+def validate_gradients(
+    instance: DWI,
+    attribute: attrs.Attribute,
+    value: npt.NDArray[np.floating],
+) -> None:
+    """Ensure row-major convention for gradient table."""
+    if value.shape[1] != 4:
+        raise ValueError(GRADIENT_EXPECTED_COLUMNS_ERROR_MSG)
+
+
 @attrs.define(slots=True)
 class DWI(BaseDataset[np.ndarray]):
     """Data representation structure for dMRI data."""
 
+    gradients: np.ndarray = attrs.field(
+        default=None,
+        repr=_data_repr,
+        eq=attrs.cmp_using(eq=_cmp),
+        converter=format_gradients,
+        validator=validate_gradients,
+    )
+    """A 2D numpy array of the gradient table (``N`` orientations x ``C`` components)."""
     bzero: np.ndarray | None = attrs.field(
         default=None, repr=_data_repr, eq=attrs.cmp_using(eq=_cmp)
     )
-    """A *b=0* reference map, preferably obtained by some smart averaging."""
-    gradients: np.ndarray = attrs.field(default=None, repr=_data_repr, eq=attrs.cmp_using(eq=_cmp))
-    """A 2D numpy array of the gradient table (``N`` orientations x ``C`` components)."""
+    """A *b=0* reference map, computed automatically when low-b frames are present."""
     eddy_xfms: list = attrs.field(default=None)
     """List of transforms to correct for estimated eddy current distortions."""
 
     def __attrs_post_init__(self) -> None:
-        self._normalize_gradients()
-
-    def _normalize_gradients(self) -> None:
         if self.gradients is None:
-            return
-
-        gradients = np.asarray(self.gradients)
-        if gradients.ndim != 2:
-            raise ValueError(GRADIENT_NDIM_ERROR_MSG)
-        if gradients.shape[1] == 4:
-            pass
-        elif gradients.shape[0] == 4:
-            gradients = gradients.T
-        else:
-            raise ValueError(GRADIENT_EXPECTED_COLUMNS_ERROR_MSG)
-
-        n_volumes = None
-        if self.dataobj is not None:
-            try:
-                n_volumes = self.dataobj.shape[-1]
-            except Exception:  # pragma: no cover - extremely defensive
-                n_volumes = None
+            raise ValueError(GRADIENT_DATA_MISSING_ERROR)
 
-        if n_volumes is not None and gradients.shape[0] != n_volumes:
+        if self.dataobj.shape[-1] != self.gradients.shape[0]:
             raise ValueError(
                 GRADIENT_VOLUME_DIMENSIONALITY_MISMATCH_MISSING_ERROR.format(
-                    n_volumes=n_volumes, n_gradients=gradients.shape[0]
+                    n_volumes=self.dataobj.shape[-1],
+                    n_gradients=self.gradients.shape[0],
                 )
             )
 
-        self.gradients = gradients
+        b0_mask = self.gradients[:, -1] <= DEFAULT_LOWB_THRESHOLD
+        b0_num = np.sum(b0_mask)
+
+        if b0_num > 0 and self.bzero is None:
+            bzeros = self.dataobj[..., b0_mask]
+            self.bzero = bzeros if bzeros.ndim == 3 else np.median(bzeros, axis=-1)
+
+        if b0_num > 0:
+            # Remove b0 volumes from dataobj and gradients
+            self.gradients = self.gradients[~b0_mask, :]
+            self.dataobj = self.dataobj[..., ~b0_mask]
+
+        if self.gradients.shape[0] < DTI_MIN_ORIENTATIONS:
+            raise ValueError(
+                f"DWI datasets must have at least {DTI_MIN_ORIENTATIONS} diffusion-weighted "
+                f"orientations; found {self.dataobj.shape[-1]}."
+            )
 
     def _getextra(self, idx: int | slice | tuple | np.ndarray) -> tuple[np.ndarray]:
         return (self.gradients[idx, ...],)
@@ -339,12 +405,10 @@ def to_nifti(
 def from_nii(
     filename: Path | str,
     brainmask_file: Path | str | None = None,
-    motion_file: Path | str | None = None,
     gradients_file: Path | str | None = None,
     bvec_file: Path | str | None = None,
     bval_file: Path | str | None = None,
     b0_file: Path | str | None = None,
-    b0_thres: float = DEFAULT_LOWB_THRESHOLD,
 ) -> DWI:
     """
     Load DWI data from NIfTI and construct a DWI object.
@@ -359,8 +423,6 @@ def from_nii(
     brainmask_file : :obj:`os.pathlike`, optional
         A brainmask NIfTI file. If provided, will be loaded and
         stored in the returned dataset.
-    motion_file : :obj:`os.pathlike`, optional
-        A file containing head motion affine matrices (linear)
     gradients_file : :obj:`os.pathlike`, optional
         A text file containing the gradients table, shape (N, C) where the last column
         stores the b-values. If provided following the column-major convention(C, N),
@@ -373,9 +435,6 @@ def from_nii(
     b0_file : :obj:`os.pathlike`, optional
         A NIfTI file containing a b=0 volume (possibly averaged or reference).
         If not provided, and the data contains at least one b=0 volume, one will be computed.
-    b0_thres : float, optional
-        Threshold for determining which volumes are considered DWI vs. b=0
-        if you combine them in the same file.
 
     Returns
     -------
@@ -390,10 +449,6 @@ def from_nii(
         ``bvec_file`` + ``bval_file``).
 
     """
-
-    if motion_file:
-        raise NotImplementedError
-
     filename = Path(filename)
 
     # 1) Load a NIfTI
@@ -405,18 +460,8 @@ def from_nii(
         grad = np.loadtxt(gradients_file, dtype="float32")
         if bvec_file and bval_file:
             warn(GRADIENT_BVAL_BVEC_PRIORITY_WARN_MSG, stacklevel=2)
-        if grad.ndim != 2:
-            raise ValueError(GRADIENT_NDIM_ERROR_MSG)
-        if grad.shape[1] == 4:
-            pass
-        elif grad.shape[0] == 4:
-            grad = grad.T
-        else:
-            raise ValueError(GRADIENT_EXPECTED_COLUMNS_ERROR_MSG)
     elif bvec_file and bval_file:
         bvecs = np.loadtxt(bvec_file, dtype="float32")
-        if bvecs.ndim == 1:
-            bvecs = bvecs[np.newaxis, :]
         if bvecs.shape[1] != 3 and bvecs.shape[0] == 3:
             bvecs = bvecs.T
 
@@ -427,40 +472,26 @@ def from_nii(
     else:
         raise RuntimeError(GRADIENT_DATA_MISSING_ERROR)
 
-    # 3) Create the DWI instance. We'll filter out volumes where b-value > b0_thres
-    #    as "DW volumes" if the user wants to store only the high-b volumes here
-    gradmsk = grad[:, -1] > b0_thres
-
-    dwi_obj = DWI(
-        dataobj=fulldata[..., gradmsk],
-        affine=img.affine,
-        # We'll assign the filtered gradients below.
-    )
-
-    dwi_obj.gradients = grad[gradmsk, :]
-    dwi_obj._normalize_gradients()
-
-    # 4) b=0 volume (bzero)
-    #    If the user provided a b0_file, load it
+    # 3) Read b-zero volume if provided
+    b0_data = None
     if b0_file:
         b0img = load_api(b0_file, SpatialImage)
-        b0vol = np.asanyarray(b0img.dataobj)
-        # We'll assume your DWI class has a bzero: np.ndarray | None attribute
-        dwi_obj.bzero = b0vol
-    # Otherwise, if any volumes remain outside gradmsk, compute a median B0:
-    elif np.any(~gradmsk):
-        # The b=0 volumes are those that did NOT pass b0_thres
-        b0_volumes = fulldata[..., ~gradmsk]
-        # A simple approach is to take the median across that last dimension
-        # Note that axis=3 is valid only if your data is 4D (x, y, z, volumes).
-        dwi_obj.bzero = np.median(b0_volumes, axis=3)
-
-    # 5) If a brainmask_file was provided, load it
+        b0_data = np.asanyarray(b0img.dataobj)
+
+    # 4) If a brainmask_file was provided, load it
+    brainmask_data = None
     if brainmask_file:
         mask_img = load_api(brainmask_file, SpatialImage)
-        dwi_obj.brainmask = np.asanyarray(mask_img.dataobj, dtype=bool)
+        brainmask_data = np.asanyarray(mask_img.dataobj, dtype=bool)
 
-    return dwi_obj
+    # 5) Create and return the DWI instance.
+    return DWI(
+        dataobj=fulldata,
+        affine=img.affine,
+        gradients=grad,
+        bzero=b0_data,
+        brainmask=brainmask_data,
+    )
 
 
 def find_shelling_scheme(
diff --git a/test/test_data_dmri.py b/test/test_data_dmri.py
@@ -88,11 +88,6 @@ def test_main(datadir):
     assert isinstance(load(input_file), DWI)
 
 
-def test_motion_file_not_implemented():
-    with pytest.raises(NotImplementedError):
-        from_nii("dmri.nii.gz", motion_file="motion.x5")
-
-
 @pytest.mark.random_gtab_data(10, (1000, 2000), 2)
 @pytest.mark.random_dwi_data(50, (34, 36, 24), True)
 @pytest.mark.parametrize("row_major_gradients", (False, True))
@@ -169,7 +164,7 @@ def test_dwi_instantiation_gradients_ndim_error(
     [(1, 0), (2, 0), (2, 1), (0, 1), (0, 2), (1, 2)],
 )
 def test_gradient_instantiation_dwi_vol_mismatch_error(
-    tmp_path, setup_random_dwi_data, additional_volume_count, additional_gradient_count
+    setup_random_dwi_data, additional_volume_count, additional_gradient_count
 ):
     (
         dwi_dataobj,
@@ -189,6 +184,25 @@ def test_gradient_instantiation_dwi_vol_mismatch_error(
         additional_gradients = np.tile(gradients[-1:, :], (additional_gradient_count, 1))
         gradients = np.concatenate((gradients, additional_gradients), axis=0)
 
+    # Test with b0s present
+    n_volumes = dwi_dataobj.shape[-1]
+    with pytest.raises(
+        ValueError,
+        match=GRADIENT_VOLUME_DIMENSIONALITY_MISMATCH_MISSING_ERROR.format(
+            n_volumes=n_volumes, n_gradients=gradients.shape[0]
+        ),
+    ):
+        DWI(
+            dataobj=dwi_dataobj,
+            affine=affine,
+            brainmask=brainmask_dataobj,
+            bzero=b0_dataobj,
+            gradients=gradients,
+        )
+
+    # Test without b0s present
+    dwi_dataobj = dwi_dataobj[..., 2:]
+    gradients = gradients[2:, :]
     n_volumes = dwi_dataobj.shape[-1]
     with pytest.raises(
         ValueError,
@@ -296,7 +310,7 @@ def test_load_gradients_bval_bvec_warn(tmp_path, setup_random_dwi_data):
         brainmask_dataobj,
         b0_dataobj,
         gradients,
-        b0_thres,
+        _,
     ) = setup_random_dwi_data
 
     dwi, _, _ = _dwi_data_to_nifti(
@@ -309,6 +323,9 @@ def test_load_gradients_bval_bvec_warn(tmp_path, setup_random_dwi_data):
     dwi_fname = tmp_path / "dwi.nii.gz"
     nb.save(dwi, dwi_fname)
 
+    b0_fname = tmp_path / "b0.nii.gz"
+    nb.Nifti1Image(b0_dataobj, np.eye(4), None).to_filename(b0_fname)
+
     grads_fname = tmp_path / "grads.txt"
     np.savetxt(grads_fname, gradients, fmt="%.6f")
 
@@ -326,7 +343,7 @@ def test_load_gradients_bval_bvec_warn(tmp_path, setup_random_dwi_data):
             gradients_file=grads_fname,
             bvec_file=bvec_fname,
             bval_file=bval_fname,
-            b0_thres=b0_thres,
+            b0_file=b0_fname,
         )
 
 
@@ -359,7 +376,7 @@ def test_load_gradients(tmp_path, setup_random_dwi_data, row_major_gradients):
     grads_fname = tmp_path / "grads.txt"
     np.savetxt(grads_fname, gradients, fmt="%.6f")
 
-    dwi = from_nii(dwi_fname, gradients_file=grads_fname, b0_thres=b0_thres)
+    dwi = from_nii(dwi_fname, gradients_file=grads_fname)
     if not row_major_gradients:
         gradmask = gradients.T[:, -1] > b0_thres
     else:
@@ -419,7 +436,7 @@ def test_load_bvecs_bvals(tmp_path, setup_random_dwi_data, transpose_bvals, tran
     np.savetxt(bvec_fname, bvecs, fmt="%.6f")
     np.savetxt(bval_fname, bvals, fmt="%.6f")
 
-    dwi = from_nii(dwi_fname, bvec_file=bvec_fname, bval_file=bval_fname, b0_thres=b0_thres)
+    dwi = from_nii(dwi_fname, bvec_file=bvec_fname, bval_file=bval_fname)
     gradmask = gradients[:, -1] > b0_thres
 
     expected_nonzero_grads = gradients[gradmask]
@@ -454,6 +471,7 @@ def test_load_gradients_missing(tmp_path, setup_random_dwi_data):
         from_nii(dwi_fname)
 
 
+@pytest.mark.skip(reason="to_nifti takes absurdly long")
 @pytest.mark.parametrize("insert_b0", (False, True))
 @pytest.mark.parametrize("rotate_bvecs", (False, True))
 def test_load(datadir, tmp_path, insert_b0, rotate_bvecs):  # noqa: C901
@@ -608,7 +626,6 @@ def test_equality_operator(tmp_path, setup_random_dwi_data):
         gradients_file=gradients_fname,
         b0_file=b0_fname,
         brainmask_file=brainmask_fname,
-        b0_thres=b0_thres,
     )
     hdf5_filename = tmp_path / "test_dwi.h5"
     dwi_obj.to_filename(hdf5_filename)