diff --git a/examples/preprocessing/plot_dss_array.py b/examples/preprocessing/plot_dss_array.py index 0245529..fbbef0f 100644 --- a/examples/preprocessing/plot_dss_array.py +++ b/examples/preprocessing/plot_dss_array.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- - -"""Denoising source separation applied to a NumPy array""" +"""Denoising source separation applied to a NumPy array.""" # Authors: Daniel McCloy # diff --git a/examples/preprocessing/plot_dss_epochs.py b/examples/preprocessing/plot_dss_epochs.py index b8acdca..90af525 100644 --- a/examples/preprocessing/plot_dss_epochs.py +++ b/examples/preprocessing/plot_dss_epochs.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- - -"""Denoising source separation applied to an Epochs object""" +"""Denoising source separation applied to an Epochs object.""" # Authors: Daniel McCloy # @@ -18,23 +17,18 @@ # file paths data_path = sample.data_path() -raw_fname = op.join(data_path, 'MEG', 'sample', 'sample_audvis_raw.fif') -events_fname = op.join(data_path, 'MEG', 'sample', 'sample_audvis_raw-eve.fif') +raw_fname = op.join(data_path, 'MEG', 'sample', + 'sample_audvis_filt-0-40_raw.fif') # import sample data raw = mne.io.read_raw_fif(raw_fname, preload=True) -events = mne.read_events(events_fname) -# pick channels, filter, epoch -picks = mne.pick_types(raw.info, meg=False, eeg=True, eog=True) -# reject = dict(eeg=180e-6, eog=150e-6) -reject = None -raw.filter(0.3, 30, method='iir', picks=picks) -epochs = mne.Epochs(raw, events, event_id=1, preload=True, picks=picks, - reject=reject) -epochs.pick_types(eeg=True) +picks = mne.pick_types(raw.info, meg=False, eeg=True) +events = mne.find_events(raw) +epochs = mne.Epochs(raw, events, event_id=1, preload=True, + baseline=(None, 0), picks=picks) evoked = epochs.average() # perform DSS -dss_mat, dss_data = dss(epochs, data_thresh=1e-9, bias_thresh=1e-9) +dss_mat, dss_data = dss(epochs) evoked_data_clean = np.dot(dss_mat, evoked.data) evoked_data_clean[4:] = 0. # select 4 components diff --git a/examples/preprocessing/plot_tsdss_sim.py b/examples/preprocessing/plot_tsdss_sim.py new file mode 100644 index 0000000..a25dc47 --- /dev/null +++ b/examples/preprocessing/plot_tsdss_sim.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +"""Denoising source separation applied to an Epochs object.""" + +import numpy as np +from scipy.signal import lfilter +import matplotlib.pyplot as plt + +from mne import EpochsArray, create_info +from mne_sandbox.preprocessing import dss + +rng = np.random.RandomState(0) +sfreq = 1000. +f = 4. # sinusoid freq +n_channels, n_times, n_epochs = 10, 1000, 100 +max_delay = 20 +n_noises = 9 +sinusoid = np.sin(2 * np.pi * f * np.arange(int(round(sfreq / f))) / sfreq) +mixing = rng.randn(n_channels, n_noises) + +data = np.zeros((n_epochs, n_channels, n_times)) +samps = rng.randint(200, 200 + max_delay, n_epochs) +data[np.arange(n_epochs), :, samps] = 1 +data = lfilter(sinusoid, [1], data, axis=-1) +data += np.einsum( + 'ent,cn->ect', rng.randn(n_epochs, n_noises, n_times), mixing) +info = create_info(n_channels, sfreq, 'eeg') + +epochs = EpochsArray(data, info, tmin=-0.2) +evoked = epochs.average() + +# perform DSS +_, dss_data = dss(epochs) + +# perform tsDSS +_, tsdss_data = dss(epochs, max_delay=max_delay) + +# plot +fig, axs = plt.subplots(3, 1, figsize=(7, 10), sharex=True) +plotdata = [data.mean(1).T, dss_data[:, 0].T, tsdss_data[:, 0].T] +titles = ('raw data', + 'first DSS component', + 'first tsDSS component') +for ax, dat, ti in zip(axs, plotdata, titles): + ax.xaxis.set_ticks_position('bottom') + ax.yaxis.set_ticks_position('left') + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + ax.plot(1e3 * evoked.times, dat) + ax.set_title(ti) +ax.set_xlabel('Time (ms)') +plt.tight_layout() +plt.show() diff --git a/mne_sandbox/preprocessing/_dss.py b/mne_sandbox/preprocessing/_dss.py index 417dc21..d50a858 100644 --- a/mne_sandbox/preprocessing/_dss.py +++ b/mne_sandbox/preprocessing/_dss.py @@ -1,18 +1,25 @@ # -*- coding: utf-8 -*- - -"""Denoising source separation""" +"""Denoising source separation.""" # Authors: Daniel McCloy +# Eric Larson # # License: BSD (3-clause) +from copy import deepcopy + import numpy as np -from mne import Epochs, EpochsArray, compute_covariance +from mne import (Epochs, EpochsArray, compute_covariance, create_info, + compute_rank) +from mne.cov import compute_whitener +from mne.decoding.receptive_field import _delay_time_series +from mne.utils import _ensure_int -def dss(data, data_max_components=None, data_thresh=0, - bias_max_components=None, bias_thresh=0, return_data=True): - """Process physiological data with denoising source separation (DSS) +def dss(data, data_max_components=None, data_thresh='auto', + bias_max_components=None, bias_thresh=1e-6, max_delay=0, + rank=None, return_data=True): + """Process physiological data with denoising source separation (DSS). Implementation follows the procedure described in Särelä & Valpola [1]_ and de Cheveigné & Simon [2]_. @@ -24,10 +31,8 @@ def dss(data, data_max_components=None, data_thresh=0, data_max_components : int | None Maximum number of components to keep during PCA decomposition of the data. ``None`` (the default) keeps all suprathreshold components. - data_thresh : float | None - Threshold (relative to the largest component) above which components - will be kept during decomposition of the data. The default keeps all - non-zero values; to keep all values, specify ``thresh=None``. + data_thresh : float | str | None + Threshold to pass to :func:`mne.compute_rank`. bias_max_components : int | None Maximum number of components to keep during PCA decomposition of the bias function. ``None`` (the default) keeps all suprathreshold @@ -37,8 +42,16 @@ def dss(data, data_max_components=None, data_thresh=0, will be discarded during decomposition of the bias function. ``None`` (the default) keeps all non-zero values; to keep all values, pass ``thresh=None`` and ``max_components=None``. + max_delay : int + Maximum delay (in samples) to consider. Zero will use DSS, anything + greater than zero will use tsDSS. + rank : None | dict | 'info' | 'full' + See :func:`mne.compute_rank`. return_data : bool - Whether to return the denoised data along with the denoising matrix. + Whether to return the denoised data along with the denoising matrix, + which can be obtained via:: + + data_dss = np.einsum('ij,hjk->hik', dss_mat, data) Returns ------- @@ -53,27 +66,47 @@ def dss(data, data_max_components=None, data_thresh=0, References ---------- .. [1] Särelä, Jaakko, and Valpola, Harri (2005). Denoising source - separation. Journal of Machine Learning Research 6: 233–72. - + separation. Journal of Machine Learning Research 6: 233–72. .. [2] de Cheveigné, Alain, and Simon, Jonathan Z. (2008). Denoising based - on spatial filtering. Journal of Neuroscience Methods, 171(2): 331-339. + on spatial filtering. Journal of Neuroscience Methods, 171(2): 331-339. """ if isinstance(data, (Epochs, EpochsArray)): - data_cov = compute_covariance(data).data - bias_cov = np.cov(data.average().pick_types(eeg=True, ref_meg=False). - data) - if return_data: - data = data.get_data() + epochs = data + data = epochs.get_data() elif isinstance(data, np.ndarray): if data.ndim != 3: raise ValueError('Data to denoise must have shape ' '(n_trials, n_channels, n_times).') - data_cov = np.sum([np.dot(trial, trial.T) for trial in data], axis=0) - bias_cov = np.cov(data.mean(axis=0)) + info = create_info(data.shape[1], 1000., 'eeg') + epochs = EpochsArray(data, info) else: raise TypeError('Data to denoise must be an instance of mne.Epochs or ' - 'a numpy array.') - dss_mat = _dss(data_cov, bias_cov, data_max_components, data_thresh, + 'ndarray, got type %s' % (type(data),)) + _check_thresh(data_thresh) + rank = compute_rank(epochs, rank=rank, tol=data_thresh) + + # Upsample to virtual channels for tsDSS + max_delay = _ensure_int(max_delay) + if max_delay < 0: + raise ValueError('max_delay must be ≥ 0, got %s' % (max_delay,)) + rank = {key: max_delay * val for key, val in rank.items()} + data = _delay_time_series(data.transpose(2, 0, 1), 0, max_delay, 1.) + data = data.transpose(1, 2, 3, 0) # ep, ch, del, time + data = np.reshape(data, (data.shape[0], -1, data.shape[-1])) + info = epochs.info.copy() + chs = list() + for ch in info['chs']: + for ii in range(max_delay + 1): + this_ch = deepcopy(ch) + this_ch['ch_name'] += '_%06d' % (ii,) + chs.append(this_ch) + info.update(projs=[], chs=chs) + info._update_redundant() + info._check_consistency() + epochs = EpochsArray(data, info) + + # Actually compute DSS transformation + dss_mat = _dss(epochs, rank, data_max_components, bias_max_components, bias_thresh) if return_data: # next line equiv. to: np.array([np.dot(dss_mat, ep) for ep in data]) @@ -83,30 +116,63 @@ def dss(data, data_max_components=None, data_thresh=0, return dss_mat -def _dss(data_cov, bias_cov, data_max_components=None, data_thresh=None, - bias_max_components=None, bias_thresh=None): - """Process physiological data with denoising source separation (DSS) +def _dss(epochs, rank, data_max_components, + bias_max_components, bias_thresh): + """Process physiological data with denoising source separation (DSS). Acts on covariance matrices; allows specification of arbitrary bias functions (as compared to the public ``dss`` function, which forces the bias to be the evoked response). """ - data_eigval, data_eigvec = _pca(data_cov, data_max_components, data_thresh) - W = np.sqrt(1 / data_eigval) # diagonal of whitening matrix + data_cov = compute_covariance(epochs, verbose='error') + bias_epochs = EpochsArray(epochs.average(picks='all').data[np.newaxis], + epochs.info) + bias_cov = compute_covariance(bias_epochs, verbose='error') + + # From Eq. 7 in the de Cheveigné and Simon paper, the dss_mat A: + # A = P @ Q @ R2 @ N2 @ R1 @ N1 + # Where: + # - N1 is the initial normalization (row normalization) + # - R1 the first PCA rotation + # - N2 the second normalization (whitening) + # - R2 the second PCA rotation + # - Q the criterion-based selector + # - P the projection back to sensor space + + # Here we skip N1, assume compute_whitener does a good enough job + # of whitening the data that a diagonal prewhitening is not necessary. + + # First rotation (R1) and second normalization (N2) + # ------------------------------------------------- + # Obtain via compute_whitener in MNE so we can be careful about channel + # types and rank deficiency. + N2_R1 = compute_whitener(data_cov, epochs.info, pca=True, rank=rank, + verbose='error')[0] # ignore avg ref warnings + + # Second rotation (R2) + # -------------------- # bias covariance projected into whitened PCA space of data channels - bias_cov_white = (W * data_eigvec).T.dot(bias_cov).dot(data_eigvec) * W + bias_cov_white = N2_R1 @ bias_cov['data'] @ N2_R1.T # proj. matrix from whitened data space to a space maximizing bias fxn - bias_eigval, bias_eigvec = _pca(bias_cov_white, bias_max_components, - bias_thresh) + _, R2 = _pca(bias_cov_white, bias_max_components, bias_thresh) + R2 = R2.T # proj. matrix from data to bias-maximizing space (DSS space) - dss_mat = (W[np.newaxis, :] * data_eigvec).dot(bias_eigvec) - # normalize DSS dimensions - N = np.sqrt(1 / np.diag(dss_mat.T.dot(data_cov).dot(dss_mat))) - return (N * dss_mat).T + A = R2 @ N2_R1 + # Normalize DSS dimensions + P = np.sqrt(1 / np.diag(A.dot(data_cov['data']).dot(A.T))) + A *= P[:, np.newaxis] + + # Q and P are left to be computed by the user. + return A + + +def _check_thresh(thresh): + if thresh is not None and thresh != 'auto' and not 0 <= thresh <= 1: + raise ValueError('Threshold must be between 0 and 1 (or None).') def _pca(cov, max_components=None, thresh=0): - """Perform PCA decomposition + """Perform PCA decomposition. Parameters ---------- @@ -127,9 +193,7 @@ def _pca(cov, max_components=None, thresh=0): eigvec : array 2-dimensional array of eigenvectors. """ - - if thresh is not None and (thresh > 1 or thresh < 0): - raise ValueError('Threshold must be between 0 and 1 (or None).') + _check_thresh(thresh) eigval, eigvec = np.linalg.eigh(cov) eigval = np.abs(eigval) sort_ix = np.argsort(eigval)[::-1]