ENH: Add gradient unit-sphere normalization function

jhlegarreta · jhlegarreta · commit b5acdeb42ca6 · 2025-11-19T20:31:47.000-05:00
Add gradient unit-sphere normalization function.
diff --git a/src/nifreeze/data/dmri.py b/src/nifreeze/data/dmri.py
@@ -63,6 +63,17 @@
 DTI_MIN_ORIENTATIONS = 6
 """Minimum number of nonzero b-values in a DWI dataset."""
 
+GRADIENT_NORMALIZATION_SHAPE_ERROR_MSG = "Input must be 1D or 2D array."
+"""Gradient normalization shape error message."""
+
+GRADIENT_NORMALIZATION_LENGTH_ERROR_MSG = "1D input must have length 3 to be a single b-vector."
+"""Gradient normalization length error message."""
+
+GRADIENT_NORMALIZATION_UNRECOGNIZED_SHAPE_ERROR_MSG = (
+    "Unrecognized shape {shape}. Expect Nx3, Nx4, 3xN, or 4xN (or 1D length-3)"
+)
+"""Gradient normalization unrecognized error message."""
+
 
 @attrs.define(slots=True)
 class DWI(BaseDataset[np.ndarray]):
@@ -106,6 +117,7 @@ def _normalize_gradients(self) -> None:
         elif n_volumes is None and gradients.shape[1] > gradients.shape[0]:
             gradients = gradients.T
 
+        normalize_gradients(gradients, copy=False)
         self.gradients = gradients
 
     def _getextra(self, idx: int | slice | tuple | np.ndarray) -> tuple[np.ndarray]:
@@ -552,3 +564,81 @@ def transform_fsl_bvec(
     ijk2ijk_xfm = np.linalg.inv(imaffine) @ xfm @ imaffine
 
     return ijk2ijk_xfm[:3, :3] @ b_ijk[:3]
+
+
+def normalize_gradients(value: np.ndarray, eps: float = 1e-8, copy: bool = True) -> np.ndarray:
+    """Normalize b-vectors in arrays of common shapes.
+
+    Parameters
+    ----------
+    value : :obj:`~numpy.ndarray`
+        Input array with shape one of:
+        - (N, 3) : rows are b-vector components (e.g., [gx gy gz])
+        - (N, 4) : first 3 columns are b-vector components (e.g., [gx gy gz b])
+        - (3, N) : columns are b-vector components (e.g., [gx gy gz].T)
+        - (4, N) : first 3 rows are b-vector components (e.g., [gx gy gz b].T)
+        - (3,) or (1,3) or (3,1) : single b-vector
+        Columns are checked first to disambiguate Nx3/Nx4 cases.
+    eps : float, optional
+        Threshold below which a vector is considered zero and left unchanged.
+    copy : bool, optional
+        If ``True``, returns a new array; modify in-place otherwise.
+
+    Returns
+    -------
+    out : :obj:`~numpy.ndarray`
+        Array with the same shape as ``value`` with each 3-component b-vector
+        normalized.
+    """
+    arr = np.asarray(value, dtype=float)
+
+    # 1D single vector
+    if arr.ndim == 1:
+        if arr.size != 3:
+            raise ValueError(GRADIENT_NORMALIZATION_LENGTH_ERROR_MSG)
+        norm = np.linalg.norm(arr)
+        if norm > eps:
+            if copy:
+                return arr / norm
+            else:
+                # Perform in-place normalization on the array view
+                arr[:] = arr / norm
+                return arr
+        else:
+            return arr.copy() if copy else arr
+
+    if arr.ndim != 2:
+        raise ValueError(GRADIENT_NORMALIZATION_SHAPE_ERROR_MSG)
+
+    rows, cols = arr.shape
+
+    # Prepare output (copy or in-place)
+    normalized_arr = arr.copy() if copy else arr
+
+    # Determine where the 3-component vectors live and create a (N, 3) view
+    # Check columns first to make Nx3/Nx4 deterministic
+    if cols == 4:
+        # Nx4: first 3 columns are b-vectors components, last are b-values
+        vecs = normalized_arr[:, :3]  # shape (N, 3)
+    elif cols == 3:
+        # Nx3: rows are vectors
+        vecs = normalized_arr  # shape (N, 3)
+    elif rows == 4:
+        # 4xN: first 3 rows are b-vector components, last row are b-values
+        # Create a (N, 3) view by transposing first 3 rows
+        vecs = normalized_arr[:3, :].T  # shape (N, 3)
+    elif rows == 3:
+        # 3xN: columns are vectors: normalize per-column
+        vecs = normalized_arr.T  # shape (N, 3)
+    else:
+        raise ValueError(
+            GRADIENT_NORMALIZATION_UNRECOGNIZED_SHAPE_ERROR_MSG.format(shape=arr.shape)
+        )
+
+    # Normalize in-place on vecs (which is a view into output)
+    norms = np.linalg.norm(vecs, axis=1)
+    mask = norms > eps
+    if np.any(mask):
+        vecs[mask] = vecs[mask] / norms[mask, None]
+
+    return normalized_arr
diff --git a/test/test_data_dmri.py b/test/test_data_dmri.py
@@ -22,17 +22,50 @@
 #
 """Unit tests exercising the dMRI data structure."""
 
+import re
 from pathlib import Path
+from string import Formatter
 
 import nibabel as nb
 import numpy as np
 import pytest
 
 from nifreeze.data import load
-from nifreeze.data.dmri import DWI, find_shelling_scheme, from_nii, transform_fsl_bvec
+from nifreeze.data.dmri import (
+    DWI,
+    GRADIENT_NORMALIZATION_LENGTH_ERROR_MSG,
+    GRADIENT_NORMALIZATION_SHAPE_ERROR_MSG,
+    GRADIENT_NORMALIZATION_UNRECOGNIZED_SHAPE_ERROR_MSG,
+    find_shelling_scheme,
+    from_nii,
+    normalize_gradients,
+    transform_fsl_bvec,
+)
 from nifreeze.utils.ndimage import load_api
 
 
+def _template_has_field(template: str, field_name: str | None = None) -> bool:
+    """Return True if `template` contains a format field.
+    If `field_name` is provided, return True only if that named field appears.
+
+    This uses Formatter.parse() so it recognizes real format fields and
+    ignores literal substrings that merely look like "{shape}".
+    """
+    formatter = Formatter()
+    for _literal_text, field, _format_spec, _conversion in formatter.parse(template):
+        if field is None:
+            # no field in this segment
+            continue
+        # field can be '' (positional {}), 'shape', or complex like 'shape[0]' or 'obj.attr'
+        if field_name is None:
+            return True
+        # Compare the base name before any attribute/indexing syntax
+        base = field.split(".", 1)[0].split("[", 1)[0]
+        if base == field_name:
+            return True
+    return False
+
+
 def _dwi_data_to_nifti(
     dwi_dataobj,
     affine,
@@ -959,3 +992,145 @@ def test_transform_fsl_bvec(b_ijk, zooms, flips, axis_order, origin, angles):
         f"Expected {rotated_b_ijk}, got {test_b_ijk} for b_ijk={b_ijk}, "
         f"zooms={zooms}, origin={origin}, angles={angles}"
     )
+
+
+@pytest.mark.parametrize(
+    "shape, expected_msg_template",
+    [
+        # 1D but wrong length
+        ((4,), GRADIENT_NORMALIZATION_LENGTH_ERROR_MSG),
+        # ndim != 1 and != 2
+        ((2, 2, 2), GRADIENT_NORMALIZATION_SHAPE_ERROR_MSG),
+        # 2D but unrecognized shape (neither Nx3/Nx4 nor 3xN/4xN)
+        ((2, 2), GRADIENT_NORMALIZATION_UNRECOGNIZED_SHAPE_ERROR_MSG),
+    ],
+)
+def test_normalize_gradients_exceptions(shape, expected_msg_template):
+    arr = np.zeros(shape, dtype=float)
+    if _template_has_field(expected_msg_template, "shape"):
+        expected_msg = expected_msg_template.format(shape=shape)
+    else:
+        expected_msg = expected_msg_template
+
+    with pytest.raises(ValueError, match=re.escape(expected_msg)):
+        normalize_gradients(arr)
+
+
+@pytest.mark.parametrize(
+    "arr, expected",
+    [
+        # Nx3: rows are b-vectors (e.g., [gx gy gz])
+        (
+            np.array([[1, 0, 0], [0, 2, 0], [0, 0, 0]], float),
+            np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]], float),
+        ),
+        (
+            np.array([[2, 3, 1], [7, 5, 6], [0, 0, 0]], float),
+            np.array(
+                [
+                    [2.0 / np.sqrt(14), 3.0 / np.sqrt(14), 1.0 / np.sqrt(14)],
+                    [7.0 / np.sqrt(110), 5.0 / np.sqrt(110), 6.0 / np.sqrt(110)],
+                    [0.0, 0.0, 0.0],
+                ],
+                float,
+            ),
+        ),
+        # Nx4: first 3 columns are b-vectors (e.g., [gx gy gz b])
+        (
+            np.array([[1, 0, 0, 0], [0, 2, 0, 1000], [0, 0, 0, 0]], float),
+            np.array([[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 1000.0], [0.0, 0.0, 0.0, 0.0]], float),
+        ),
+        (
+            np.array([[0, 0, 0, 0], [1, 0, 2, 1000], [1, 2, 1, 1000]], float),
+            np.array(
+                [
+                    [0, 0, 0, 0],
+                    [1.0 / np.sqrt(5), 0, 2.0 / np.sqrt(5), 1000],
+                    [1.0 / np.sqrt(6), 2.0 / np.sqrt(6), 1.0 / np.sqrt(6), 1000],
+                ],
+                float,
+            ),
+        ),
+        (
+            np.array([[4.0, 2.0, 1.0, 250.0]], float),
+            np.array([[4.0 / np.sqrt(21), 2.0 / np.sqrt(21), 1.0 / np.sqrt(21), 250.0]], float),
+        ),
+        # 3xN: columns are b-vectors (e.g., [gx gy gz].T)
+        (
+            np.array([[1, 0], [0, 2], [0, 0]], float),
+            np.array([[1.0, 0.0], [0.0, 1.0], [0.0, 0.0]], float),
+        ),
+        (
+            np.array([[8.0, 0.0], [1.0, 0.0], [6.0, 0.0]], float),
+            np.array(
+                [[8.0 / np.sqrt(101), 0.0], [1.0 / np.sqrt(101), 0.0], [6.0 / np.sqrt(101), 0.0]],
+                float,
+            ),
+        ),
+        # 4xN: first 3 rows are b-vectors (e.g., [gx gy gz b].T)
+        (
+            np.array([[1, 0], [0, 2], [0, 0], [0, 1000]], float),
+            np.array([[1.0, 0.0], [0.0, 1.0], [0.0, 0.0], [0.0, 1000.0]], float),
+        ),
+        (
+            np.array([[6.0, 0.0], [8.0, 0.0], [0.0, 0.0], [5.0, 200.0]], float),
+            np.array([[0.6, 0.0], [0.8, 0.0], [0.0, 0.0], [5.0, 200.0]], float),
+        ),
+        # 1D single vector
+        (np.array([3, 0, 0], float), np.array([1.0, 0.0, 0.0], float)),
+        (np.array([3.0, 4.0, 0.0], float), np.array([0.6, 0.8, 0.0], float)),
+    ],
+)
+def test_normalize_gradients_shapes(arr, expected):
+    """Normalize several common bvec layouts and compare to expected output."""
+    obtained = normalize_gradients(arr)  # default copy=True
+
+    assert obtained.shape == expected.shape
+    assert np.allclose(obtained, expected)
+
+
+@pytest.mark.parametrize(
+    "arr, idx_check, expected_row",
+    [
+        # Nx3 in-place: ensure modification and returned object identity
+        (np.array([[0, 3, 0], [0, 0, 0]], float), (0,), np.array([0.0, 1.0, 0.0])),
+        # 1D single vector in-place
+        (np.array([3, 0, 0], float), None, np.array([1.0, 0.0, 0.0])),
+    ],
+)
+def test_normalize_gradients_inplace(arr, idx_check, expected_row):
+    """
+    Ensure copy=False modifies the provided ndarray in-place and that returned
+    object is the same when appropriate. For 1D arrays the returned object
+    should be the same object when copy=False.
+    """
+    arr_copy = arr.copy()
+    obtained = normalize_gradients(arr_copy, copy=False)
+
+    # returned object must be the exact same ndarray when copy=False
+    assert obtained is arr_copy
+
+    if idx_check is None:
+        # 1D vector: compare whole array
+        assert np.allclose(arr_copy, expected_row)
+    else:
+        # For multi-row arrays, check the indicated row(s)
+        # idx_check is a tuple of row indices to check (here only first row)
+        for i, expected in zip(idx_check, [expected_row]):
+            assert np.allclose(arr_copy[i], expected)
+
+
+def test_normalize_gradients_zero_vectors_preserved_and_norms():
+    """Check that near-zero vectors are left unchanged and non-zero are unit."""
+    a = np.array([[1e-12, 0, 0], [0, 2, 0], [0, 0, 0]], float)
+    eps = 1e-8
+    obtained = normalize_gradients(a, eps=eps)
+
+    # First row is near-zero with norm < eps -> preserved (close to original)
+    assert np.allclose(obtained[0], a[0])
+
+    # Second row normalized to unit length
+    assert np.allclose(np.linalg.norm(obtained[1]), 1.0)
+
+    # Last row is exactly zero and preserved
+    assert np.allclose(obtained[2], np.zeros(3))