[FIX] strict provenance of label intensities in parcellation image

dPys · dPys · commit 5b62ac044be8 · 2020-08-21T18:35:53.000-05:00
diff --git a/pynets/core/interfaces.py b/pynets/core/interfaces.py
@@ -179,9 +179,10 @@ def _run_interface(self, runtime):
                       "installed?")
             try:
                 if self.inputs.clustering is False:
-                    [uatlas,
-                     _] = nodemaker.enforce_hem_distinct_consecutive_labels(
-                        uatlas)
+                    [uatlas, _] = \
+                        nodemaker.enforce_hem_distinct_consecutive_labels(
+                            uatlas)
+
                 # Fetch user-specified atlas coords
                 [coords, _, par_max, label_intensities] = \
                     nodemaker.get_names_and_coords_of_parcels(uatlas)
diff --git a/pynets/core/nodemaker.py b/pynets/core/nodemaker.py
@@ -58,7 +58,7 @@ def get_sphere(coords, r, vox_dims, dims):
     return neighbors
 
 
-def create_parcel_atlas(parcel_list):
+def create_parcel_atlas(parcel_list, label_intensities=None):
     """
     Create a 3D Nifti1Image atlas parcellation of consecutive integer
     intensities from an input list of ROI's.
@@ -92,7 +92,11 @@ def create_parcel_atlas(parcel_list):
                 parcel_list[0].shape,
                 dtype=bool))] + parcel_list
     concatted_parcels = concat_imgs(parcel_list_exp, dtype=np.float32)
-    parcel_list_exp = np.array(range(len(parcel_list_exp))).astype("float32")
+    if label_intensities is not None:
+        parcel_list_exp = np.array([0] + label_intensities).astype("float32")
+    else:
+        parcel_list_exp = np.array(range(len(parcel_list_exp))
+                                   ).astype("float32")
     parcel_sum = np.sum(
         parcel_list_exp *
         np.asarray(
@@ -693,11 +697,18 @@ def parcel_masker(
             " brain mask/roi..."
         )
 
+    if any(isinstance(sub, tuple) for sub in labels_adj):
+        label_intensities = [i[1] for i in labels_adj]
+    elif any(isinstance(sub, dict) for sub in labels_adj):
+        label_intensities = None
+    else:
+        label_intensities = labels_adj
+
     # Create a resampled 3D atlas that can be viewed alongside mask img for QA
     resampled_parcels_nii_path = f"{dir_path}/{ID}_parcels_resampled2roimask" \
                                  f"_{op.basename(roi).split('.')[0]}.nii.gz"
     resampled_parcels_map_nifti = resample_img(
-        nodemaker.create_parcel_atlas(parcel_list_adj)[0],
+        nodemaker.create_parcel_atlas(parcel_list_adj, label_intensities)[0],
         target_affine=mask_aff,
         target_shape=mask_data.shape,
         interpolation="nearest",
@@ -1479,8 +1490,16 @@ def node_gen_masking(
     [coords, labels, parcel_list_masked] = nodemaker.parcel_masker(
         roi, coords, parcel_list, labels, dir_path, ID, perc_overlap
     )
+
+    if any(isinstance(sub, tuple) for sub in labels):
+        label_intensities = [i[1] for i in labels]
+    elif any(isinstance(sub, dict) for sub in labels):
+        label_intensities = None
+    else:
+        label_intensities = labels
+
     [net_parcels_map_nifti, _] = nodemaker.create_parcel_atlas(
-        parcel_list_masked)
+        parcel_list_masked, label_intensities)
 
     assert (
         len(coords)
@@ -1549,7 +1568,15 @@ def node_gen(coords, parcel_list, labels, dir_path, ID, parc, atlas, uatlas):
         parcel_list = [index_img(parcel_list_img, i) for i in
                        range(parcel_list_img.shape[-1])]
 
-    [net_parcels_map_nifti, _] = nodemaker.create_parcel_atlas(parcel_list)
+    if any(isinstance(sub, tuple) for sub in labels):
+        label_intensities = [i[1] for i in labels]
+    elif any(isinstance(sub, dict) for sub in labels):
+        label_intensities = None
+    else:
+        label_intensities = labels
+
+    [net_parcels_map_nifti, _] = nodemaker.create_parcel_atlas(parcel_list,
+                                                               label_intensities)
 
     coords = list(tuple(x) for x in coords)
 
diff --git a/pynets/dmri/track.py b/pynets/dmri/track.py
@@ -437,7 +437,7 @@ def track_ensemble(
     ix = 0
     while float(stream_counter) < float(target_samples) and float(ix) < \
         len(all_combs):
-        with Parallel(n_jobs=nthreads, backend='loky', max_nbytes='8000M',
+        with Parallel(n_jobs=nthreads, backend='loky',
                       mmap_mode='r+', temp_folder=cache_dir,
                       verbose=10) as parallel:
             out_streams = parallel(
@@ -641,6 +641,8 @@ def run_tracking(step_curv_combinations, atlas_data_wm_gm_int, recon_path,
 
     del atlas_data
 
+    parcel_vec = list(np.ones(len(parcels)).astype("bool"))
+
     with h5py.File(recon_path_tmp_path, 'r+') as hf:
         mod_fit = hf['reconstruction'][:]
     hf.close()
@@ -745,7 +747,7 @@ def run_tracking(step_curv_combinations, atlas_data_wm_gm_int, recon_path,
                     roi_proximal_streamlines,
                     affine=np.eye(4),
                     rois=parcels,
-                    include=list(np.ones(len(parcels)).astype("bool")),
+                    include=parcel_vec,
                     mode="%s" % ("any" if waymask is not None else
                                  "either_end"),
                     tol=roi_neighborhood_tol,
diff --git a/pynets/fmri/estimation.py b/pynets/fmri/estimation.py
@@ -16,8 +16,8 @@ def get_optimal_cov_estimator(time_series):
     from sklearn.covariance import GraphicalLassoCV
 
     estimator_shrunk = None
-    estimator = GraphicalLassoCV(cv=5)
-    print("\nFinding best estimator...\n")
+    estimator = GraphicalLassoCV(cv=5, assume_centered=True)
+    print("\nSearching for best Lasso estimator...\n")
     try:
         estimator.fit(time_series)
     except BaseException:
@@ -27,8 +27,9 @@ def get_optimal_cov_estimator(time_series):
         while not hasattr(estimator, 'covariance_') and \
             not hasattr(estimator, 'precision_') and ix < 3:
             for tol in [0.1, 0.01, 0.001, 0.0001]:
-                print(tol)
-                estimator = GraphicalLassoCV(cv=5, max_iter=200, tol=tol)
+                print(f"Auto-tuning Tolerance={tol}")
+                estimator = GraphicalLassoCV(cv=5, max_iter=200, tol=tol,
+                                             assume_centered=True)
                 try:
                     estimator.fit(time_series)
                 except BaseException:
@@ -38,49 +39,33 @@ def get_optimal_cov_estimator(time_series):
     if not hasattr(estimator, 'covariance_') and not hasattr(estimator,
                                                              'precision_'):
         print(
-            "Unstable Lasso estimation. Applying shrinkage..."
+            "Unstable Lasso estimation. Applying shrinkage to empirical "
+            "covariance..."
+        )
+        estimator = None
+        from sklearn.covariance import (
+            GraphicalLasso,
+            empirical_covariance,
+            shrunk_covariance,
         )
         try:
-            estimator = None
-            from sklearn.covariance import (
-                GraphicalLasso,
-                empirical_covariance,
-                shrunk_covariance,
-            )
-
-            emp_cov = empirical_covariance(time_series)
-            # Iterate across different levels of alpha
+            emp_cov = empirical_covariance(time_series, assume_centered=True)
             for i in np.arange(0.8, 0.99, 0.01):
+                print(f"Shrinkage={i}:")
                 shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i)
                 alphaRange = 10.0 ** np.arange(-8, 0)
                 for alpha in alphaRange:
+                    print(f"Auto-tuning alpha={alpha}...")
+                    estimator_shrunk = GraphicalLasso(alpha,
+                                                      assume_centered=True)
                     try:
-                        estimator_shrunk = GraphicalLasso(alpha)
                         estimator_shrunk.fit(shrunk_cov)
-                        print(
-                            f"Retrying covariance matrix estimate with"
-                            f" alpha={alpha}"
-                        )
-                        if estimator_shrunk is None:
-                            pass
-                        else:
-                            break
                     except BaseException:
-                        print(
-                            f"Covariance estimation failed with shrinkage"
-                            f" at alpha={alpha}"
-                        )
                         continue
-        except ValueError:
-            estimator = None
-            print(
-                "Covariance estimation failed. Check time-series data "
-                "for errors."
-            )
-    if estimator is None and estimator_shrunk is None:
-        raise RuntimeError("\nERROR: Covariance estimation failed.")
+        except BaseException:
+            return estimator
 
-    if estimator is None:
+    if estimator is None and estimator_shrunk is not None:
         estimator = estimator_shrunk
 
     return estimator
@@ -229,7 +214,6 @@ def get_conn_matrix(
       for Gaussian and related Graphical Models. doi:10.5281/zenodo.830033
 
     """
-    import sys
     from pynets.fmri.estimation import get_optimal_cov_estimator
     from nilearn.connectome import ConnectivityMeasure
 
@@ -241,6 +225,40 @@ def get_conn_matrix(
     conn_matrix = None
     estimator = get_optimal_cov_estimator(time_series)
 
+    def fallback_covariance(time_series):
+        from sklearn.ensemble import IsolationForest
+        from sklearn import covariance
+
+        # Remove gross outliers
+        model = IsolationForest(contamination=0.02)
+        model.fit(time_series)
+        outlier_mask = model.predict(time_series)
+        outlier_mask[outlier_mask == -1] = 0
+        time_series = time_series[outlier_mask.astype('bool')]
+
+        # Fall back to LedoitWolf
+        print('Matrix estimation failed with Lasso and shrinkage due to '
+              'ill conditions. Removing potential anomalies from the '
+              'time-series using IsolationForest...')
+        try:
+            print("Trying Ledoit-Wolf Estimator...")
+            conn_measure = ConnectivityMeasure(
+                cov_estimator=covariance.LedoitWolf(store_precision=True,
+                                                    assume_centered=True),
+                kind=kind)
+            conn_matrix = conn_measure.fit_transform([time_series])[0]
+        except (np.linalg.linalg.LinAlgError, FloatingPointError):
+            print("Trying Oracle Approximating Shrinkage Estimator...")
+            conn_measure = ConnectivityMeasure(
+                cov_estimator=covariance.OAS(assume_centered=True),
+                kind=kind)
+            try:
+                conn_matrix = conn_measure.fit_transform([time_series])[0]
+            except (np.linalg.linalg.LinAlgError, FloatingPointError):
+                raise ValueError('All covariance estimators failed to '
+                                 'converge...')
+        return conn_matrix
+
     if conn_model in nilearn_kinds:
         if conn_model == "corr" or conn_model == "cor" or conn_model == "correlation":
             print("\nComputing correlation matrix...\n")
@@ -259,32 +277,16 @@ def get_conn_matrix(
                 "\nERROR! No connectivity model specified at runtime. Select a"
                 " valid estimator using the -mod flag.")
 
-        try:
-            # Try with the best-fitting Lasso estimator
+        # Try with the best-fitting Lasso estimator
+        if estimator is not None:
             conn_measure = ConnectivityMeasure(cov_estimator=estimator,
                                                kind=kind)
-            conn_matrix = conn_measure.fit_transform([time_series])[0]
-        except BaseException:
-            from sklearn.ensemble import IsolationForest
-
-            # Remove gross outliers
-            model = IsolationForest(contamination=0.02)
-            model.fit(time_series)
-            outlier_mask = model.predict(time_series)
-            outlier_mask[outlier_mask == -1] = 0
-            time_series = time_series[outlier_mask.astype('bool')]
-
-            # Fall back to LedoitWolf
-            print('Matrix estimation failed with Lasso and shrinkage due to '
-                  'ill conditions. Removing potential anomalies from the '
-                  'time-series using IsolationForest and falling back to '
-                  'LedoitWolf...')
             try:
-                conn_measure = ConnectivityMeasure(kind=kind)
                 conn_matrix = conn_measure.fit_transform([time_series])[0]
-            except RuntimeError:
-                print('Matrix estimation failed.')
-                sys.exit(1)
+            except (np.linalg.linalg.LinAlgError, FloatingPointError):
+                fallback_covariance(time_series)
+        else:
+            fallback_covariance(time_series)
     else:
         if conn_model == "QuicGraphicalLasso":
             try:
diff --git a/tests/test_track.py b/tests/test_track.py
@@ -15,6 +15,7 @@
 import indexed_gzip
 import numpy as np
 import logging
+import h5py
 
 logger = logging.getLogger(__name__)
 logger.setLevel(50)
@@ -148,9 +149,13 @@ def test_track_ensemble(directget, target_samples):
     dwi_data = dwi_img.get_fdata()
 
     temp_dir = tempfile.TemporaryDirectory()
-    recon_path = temp_dir.name + '/model_file.npy'
+    recon_path = temp_dir.name + '/model_file.hdf5'
     model, _ = track.reconstruction(conn_model, gtab, dwi_data, wm_in_dwi)
-    np.save(recon_path, model)
+
+    with h5py.File(recon_path, 'w') as hf:
+        hf.create_dataset("reconstruction",
+                          data=model.astype('float32'))
+    hf.close()
 
     streamlines = track.track_ensemble(target_samples, atlas_data_wm_gm_int,
                                        labels_im_file,
@@ -203,8 +208,12 @@ def test_track_ensemble_particle():
 
     model, _ = track.reconstruction(conn_model, gtab, dwi_data, wm_in_dwi)
     temp_dir = tempfile.TemporaryDirectory()
-    recon_path = temp_dir.name + '/model_file.npy'
-    np.save(recon_path, model)
+    recon_path = temp_dir.name + '/model_file.hdf5'
+
+    with h5py.File(recon_path, 'w') as hf:
+        hf.create_dataset("reconstruction",
+                          data=model.astype('float32'))
+    hf.close()
 
     streamlines = track.track_ensemble(target_samples, atlas_data_wm_gm_int,
                                        labels_im_file, recon_path, sphere,