fix(compcor): refactor of CompCor masks

oesteban · oesteban · commit 748d17e785b5 · 2020-06-18T12:09:09.000-07:00
This commit revises the implementation of CompCor masks in an attempt to make it closer to the original proposal and at the same time address some recurring problems of *fMRIPrep*'s tCompCor implementation. Finally, with the more careful resampling of prior knowledge from the anatomical scan, this refactor should also make the aCompCor components more run-to-run repeatable. aCompCor -------- The massaging of CompCor masks is now done in anatomical space where it is more precise, and a careful resampling to BOLD space follows. The implementation deviates from Behzadi et al. Their original implementation thresholded the CSF and the WM partial-volume masks at 0.99 (i.e., 99% of the voxel volume is filled with a particular tissue), and then binary eroded that 2 voxels: > Anatomical data were segmented into gray matter, white matter, > and CSF partial volume maps using the FAST algorithm available > in the FSL software package (Smith et al., 2004). Tissue partial > volume maps were linearly interpolated to the resolution of the > functional data series using AFNI (Cox, 1996). In order to form > white matter ROIs, the white matter partial volume maps were > thresholded at a partial volume fraction of 0.99 and then eroded by > two voxels in each direction to further minimize partial voluming > with gray matter. CSF voxels were determined by first thresholding > the CSF partial volume maps at 0.99 and then applying a threedimensional > nearest neighbor criteria to minimize multiple tissue > partial voluming. Since CSF regions are typically small compared > to white matter regions mask, erosion was not applied. This particular procedure is not generalizable to BOLD data with different voxel zooms as the mathematical morphology operations will be scaled by those. Also, from reading the excerpt above and the tCompCor description, I (@oesteban) believe that they always operated slice-wise given the large slice-thickness of their functional data. Instead, *fMRIPrep*'s implementation deviates from Behzadi's implementation on two aspects: * the masks are prepared in high-resolution, anatomical space and then projected into BOLD space; and, * instead of using binary erosion, a dilated GM map is generated -- thresholding the corresponding PV map at 0.05 (i.e., pixels containing at least 5% of GM tissue) and then subtracting that map from the CSF, WM and CSF+WM (combined) masks. This should be equivalent to eroding the masks, except that the erosion only happens at direct interfaces with GM. When the probseg maps provene from FreeSurfer's ``recon-all`` (i.e., they are discrete), binary maps are *transformed* into some sort of partial volume maps by means of a Gaussian smoothing filter with sigma adjusted by the size of the BOLD data. tCompCor -------- In the case of *tCompCor*, this commit removes the heavy erosion of the brain mask because 1) that wasn't part of the original proposal by Behzadi et al., and 2) the erosion was the potential source of errors from numpy complaining that it can't take from an empty axis of an array. > Based on these results, we chose a 2% threshold > (∼20–30 voxels per slice) as a reasonable empirical > threshold that effectively identified voxels with > the highest fractional variance of physiological noise. Although they do the calculation slice-wise, this commit rolls tCompCor back to calculate the 2% threshold on the whole brain mask. Resolves: #2129. References: #2052.
diff --git a/fmriprep/interfaces/confounds.py b/fmriprep/interfaces/confounds.py
@@ -18,12 +18,41 @@
 from nipype.utils.filemanip import fname_presuffix
 from nipype.interfaces.base import (
     traits, TraitedSpec, BaseInterfaceInputSpec, File, Directory, isdefined,
-    SimpleInterface
+    SimpleInterface, InputMultiObject, OutputMultiObject
 )
 
 LOGGER = logging.getLogger('nipype.interface')
 
 
+class _aCompCorMasksInputSpec(BaseInterfaceInputSpec):
+    in_vfs = InputMultiObject(File(exists=True), desc="Input volume fractions.")
+    is_aseg = traits.Bool(False, usedefault=True,
+                          desc="Whether the input volume fractions come from FS' aseg.")
+    bold_zooms = traits.Tuple(traits.Float, traits.Float, traits.Float, mandatory=True,
+                              desc="BOLD series zooms")
+
+
+class _aCompCorMasksOutputSpec(TraitedSpec):
+    out_masks = OutputMultiObject(File(exists=True),
+                                  desc="CSF, WM and combined masks, respectively")
+
+
+class aCompCorMasks(SimpleInterface):
+    """Generate masks in T1w space for aCompCor."""
+
+    input_spec = _aCompCorMasksInputSpec
+    output_spec = _aCompCorMasksOutputSpec
+
+    def _run_interface(self, runtime):
+        from ..utils.confounds import acompcor_masks
+        self._results["out_masks"] = acompcor_masks(
+            self.inputs.in_vfs,
+            self.inputs.is_aseg,
+            self.inputs.bold_zooms,
+        )
+        return runtime
+
+
 class GatherConfoundsInputSpec(BaseInterfaceInputSpec):
     signals = File(exists=True, desc='input signals')
     dvars = File(exists=True, desc='file containing DVARS')
diff --git a/fmriprep/utils/confounds.py b/fmriprep/utils/confounds.py
@@ -0,0 +1,138 @@
+"""Utilities for confounds manipulation."""
+
+
+def mask2vf(in_file, zooms=None, out_file=None):
+    """
+    Convert a binary mask on a volume fraction map.
+
+    The algorithm simply applies a Gaussian filter with the kernel size scaled
+    by the zooms given as argument.
+
+    """
+    import numpy as np
+    import nibabel as nb
+    from scipy.ndimage import gaussian_filter
+
+    img = nb.load(in_file)
+    imgzooms = np.array(img.header.get_zooms()[:3], dtype=float)
+    if zooms is None:
+        zooms = imgzooms
+
+    zooms = np.array(zooms, dtype=float)
+    sigma = 0.5 * (zooms / imgzooms)
+    if len(sigma) > 1:
+        sigma = tuple(sigma)
+
+    data = gaussian_filter(img.get_fdata(dtype=np.float32), sigma=sigma)
+
+    max_data = np.percentile(data[data > 0], 99)
+    data = np.clip(data / max_data, a_min=0, a_max=1)
+
+    if out_file is None:
+        return data
+
+    hdr = img.header.copy()
+    hdr.set_data_dtype(np.float32)
+    nb.Nifti1Image(data.astype(np.float32), img.affine, hdr).to_filename(out_file)
+    return out_file
+
+
+def acompcor_masks(in_files, is_aseg=False, zooms=None):
+    """
+    Generate aCompCor masks.
+
+    This function selects the CSF partial volume map from the input,
+    and generates the WM and combined CSF+WM masks for aCompCor.
+
+    The implementation deviates from Behzadi et al.
+    Their original implementation thresholded the CSF and the WM partial-volume
+    masks at 0.99 (i.e., 99% of the voxel volume is filled with a particular tissue),
+    and then binary eroded that 2 voxels:
+
+    > Anatomical data were segmented into gray matter, white matter,
+    > and CSF partial volume maps using the FAST algorithm available
+    > in the FSL software package (Smith et al., 2004). Tissue partial
+    > volume maps were linearly interpolated to the resolution of the
+    > functional data series using AFNI (Cox, 1996). In order to form
+    > white matter ROIs, the white matter partial volume maps were
+    > thresholded at a partial volume fraction of 0.99 and then eroded by
+    > two voxels in each direction to further minimize partial voluming
+    > with gray matter. CSF voxels were determined by first thresholding
+    > the CSF partial volume maps at 0.99 and then applying a threedimensional
+    > nearest neighbor criteria to minimize multiple tissue
+    > partial voluming. Since CSF regions are typically small compared
+    > to white matter regions mask, erosion was not applied.
+
+    This particular procedure is not generalizable to BOLD data with different voxel zooms
+    as the mathematical morphology operations will be scaled by those.
+    Also, from reading the excerpt above and the tCompCor description, I (@oesteban)
+    believe that they always operated slice-wise given the large slice-thickness of
+    their functional data.
+
+    Instead, *fMRIPrep*'s implementation deviates from Behzadi's implementation on two
+    aspects:
+
+      * the masks are prepared in high-resolution, anatomical space and then
+        projected into BOLD space; and,
+      * instead of using binary erosion, a dilated GM map is generated -- thresholding
+        the corresponding PV map at 0.05 (i.e., pixels containing at least 5% of GM tissue)
+        and then subtracting that map from the CSF, WM and CSF+WM (combined) masks.
+        This should be equivalent to eroding the masks, except that the erosion
+        only happens at direct interfaces with GM.
+
+    When the probseg maps provene from FreeSurfer's ``recon-all`` (i.e., they are
+    discrete), binary maps are *transformed* into some sort of partial volume maps
+    by means of a Gaussian smoothing filter with sigma adjusted by the size of the
+    BOLD data.
+
+    """
+    from pathlib import Path
+    import numpy as np
+    import nibabel as nb
+    from scipy.ndimage import binary_dilation
+    from skimage.morphology import ball
+
+    csf_file = in_files[2]  # BIDS labeling (CSF=2; last of list)
+    # Load PV maps (fast) or segments (recon-all)
+    gm_vf = nb.load(in_files[0])
+    wm_vf = nb.load(in_files[1])
+    csf_vf = nb.load(csf_file)
+
+    # Prepare target zooms
+    imgzooms = np.array(gm_vf.header.get_zooms()[:3], dtype=float)
+    if zooms is None:
+        zooms = imgzooms
+    zooms = np.array(zooms, dtype=float)
+
+    if not is_aseg:
+        gm_data = gm_vf.get_fdata() > 0.05
+        wm_data = wm_vf.get_fdata()
+        csf_data = csf_vf.get_fdata()
+    else:
+        csf_file = mask2vf(
+            csf_file,
+            zooms=zooms,
+            out_file=str(Path("acompcor_csf.nii.gz").absolute()),
+        )
+        csf_data = nb.load(csf_file).get_fdata()
+        wm_data = mask2vf(in_files[1], zooms=zooms)
+
+        # We do not have partial volume maps (recon-all route)
+        gm_data = np.asanyarray(gm_vf.dataobj, np.uint8) > 0
+
+    # Dilate the GM mask
+    gm_data = binary_dilation(gm_data, structure=ball(3))
+
+    # Output filenames
+    wm_file = str(Path("acompcor_wm.nii.gz").absolute())
+    combined_file = str(Path("acompcor_wmcsf.nii.gz").absolute())
+
+    # Prepare WM mask
+    wm_data[gm_data] = 0  # Make sure voxel does not contain GM
+    nb.Nifti1Image(wm_data, gm_vf.affine, gm_vf.header).to_filename(wm_file)
+
+    # Prepare combined CSF+WM mask
+    comb_data = csf_data + wm_data
+    comb_data[gm_data] = 0  # Make sure voxel does not contain GM
+    nb.Nifti1Image(comb_data, gm_vf.affine, gm_vf.header).to_filename(combined_file)
+    return [csf_file, wm_file, combined_file]
diff --git a/fmriprep/workflows/bold/base.py b/fmriprep/workflows/bold/base.py
@@ -347,6 +347,7 @@ def init_func_preproc_wf(bold_file):
     bold_confounds_wf = init_bold_confs_wf(
         mem_gb=mem_gb['largemem'],
         metadata=metadata,
+        freesurfer=freesurfer,
         regressors_all_comps=config.workflow.regressors_all_comps,
         regressors_fd_th=config.workflow.regressors_fd_th,
         regressors_dvars_th=config.workflow.regressors_dvars_th,
diff --git a/fmriprep/workflows/bold/confounds.py b/fmriprep/workflows/bold/confounds.py