enh: use DIPY's parallelization

oesteban · oesteban · commit 70c84d7e3c48 · 2025-06-06T14:04:49.000+02:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,13 +21,14 @@ license = "Apache-2.0"
 requires-python = ">=3.10"
 dependencies = [
     "attrs",
-    "dipy>=1.5.0",
+    "dipy>=1.10.0",
     "joblib",
     "nipype>= 1.5.1,<2.0",
     "nitransforms>=22.0.0,<24",
     "nireports",
     "numpy>=1.21.3",
     "nest-asyncio>=1.5.1",
+    "ray",
     "scikit-image>=0.15.0",
     "scikit_learn>=1.3.0",
     "scipy>=1.8.0",
diff --git a/src/nifreeze/model/dmri.py b/src/nifreeze/model/dmri.py
@@ -25,7 +25,6 @@
 
 import numpy as np
 from dipy.core.gradients import gradient_table_from_bvals_bvecs
-from joblib import Parallel, delayed
 
 from nifreeze.data.dmri import (
     DEFAULT_CLIP_PERCENTILE,
@@ -38,16 +37,6 @@
 B_MIN = 50
 
 
-def _exec_fit(model, data, chunk=None):
-    retval = model.fit(data)
-    return retval, chunk
-
-
-def _exec_predict(model, chunk=None, **kwargs):
-    """Propagate model parameters and call predict."""
-    return np.squeeze(model.predict(**kwargs)), chunk
-
-
 class BaseDWIModel(BaseModel):
     """Interface and default methods for DWI models."""
 
@@ -57,7 +46,7 @@ class BaseDWIModel(BaseModel):
         "_S0": "The S0 (b=0 reference signal) that will be fed into DIPY models",
         "_model_class": "Defining a model class, DIPY models are instantiated automagically",
         "_modelargs": "Arguments acceptable by the underlying DIPY-like model.",
-        "_models": "List with one or more (if parallel execution) model instances",
+        "_model_fit": "Fitted model",
     }
 
     def __init__(self, dataset: DWI, max_b: float | int | None = None, **kwargs):
@@ -107,8 +96,6 @@ def __init__(self, dataset: DWI, max_b: float | int | None = None, **kwargs):
     def _fit(self, index: int | None = None, n_jobs=None, **kwargs):
         """Fit the model chunk-by-chunk asynchronously"""
 
-        n_jobs = n_jobs or 1
-
         if self._locked_fit is not None:
             return n_jobs
 
@@ -136,25 +123,11 @@ def _fit(self, index: int | None = None, n_jobs=None, **kwargs):
                 class_name,
             )(gtab, **kwargs)
 
-        # One single CPU - linear execution (full model)
-        if n_jobs == 1:
-            _modelfit, _ = _exec_fit(model, data)
-            self._models = [_modelfit]
-            return 1
-
-        # Split data into chunks of group of slices
-        data_chunks = np.array_split(data, n_jobs)
-
-        self._models = [None] * n_jobs
-
-        # Parallelize process with joblib
-        with Parallel(n_jobs=n_jobs) as executor:
-            results = executor(
-                delayed(_exec_fit)(model, dchunk, i) for i, dchunk in enumerate(data_chunks)
-            )
-        for submodel, rindex in results:
-            self._models[rindex] = submodel
-
+        self._model_fit = model.fit(
+            data,
+            engine="serial" if n_jobs == 1 else "joblib",
+            n_jobs=n_jobs,
+        )
         return n_jobs
 
     def fit_predict(self, index: int | None = None, **kwargs):
@@ -168,13 +141,14 @@ def fit_predict(self, index: int | None = None, **kwargs):
 
         """
 
-        n_models = self._fit(
+        self._fit(
             index,
             n_jobs=kwargs.pop("n_jobs"),
             **kwargs,
         )
 
         if index is None:
+            self._locked_fit = True
             return None
 
         gradient = self._dataset.gradients[:, index]
@@ -184,28 +158,12 @@ def fit_predict(self, index: int | None = None, **kwargs):
                 gradient[np.newaxis, -1], gradient[np.newaxis, :-1]
             )
 
-        if n_models == 1:
-            predicted, _ = _exec_predict(
-                self._models[0], **(kwargs | {"gtab": gradient, "S0": self._S0})
+        predicted = np.squeeze(
+            self._model_fit.predict(
+                gtab=gradient,
+                S0=self._S0,
             )
-        else:
-            predicted = [None] * n_models
-            S0 = np.array_split(self._S0, n_models)
-
-            # Parallelize process with joblib
-            with Parallel(n_jobs=n_models) as executor:
-                results = executor(
-                    delayed(_exec_predict)(
-                        model,
-                        chunk=i,
-                        **(kwargs | {"gtab": gradient, "S0": S0[i]}),
-                    )
-                    for i, model in enumerate(self._models)
-                )
-            for subprediction, index in results:
-                predicted[index] = subprediction
-
-            predicted = np.hstack(predicted)
+        )
 
         retval = np.zeros_like(self._data_mask, dtype=self._dataset.dataobj.dtype)
         retval[self._data_mask, ...] = predicted