Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 40 additions & 7 deletions mne/cov.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,16 +553,17 @@ def make_ad_hoc_cov(info, std=None, *, verbose=None):
return Covariance(data, ch_names, info["bads"], info["projs"], nfree=0)


def _check_n_samples(n_samples, n_chan):
def _check_n_samples(n_samples, n_chan, on_few_samples="warn"):
"""Check to see if there are enough samples for reliable cov calc."""
n_samples_min = 10 * (n_chan + 1) // 2
if n_samples <= 0:
raise ValueError("No samples found to compute the covariance matrix")
if n_samples < n_samples_min:
warn(
msg = (
f"Too few samples (required : {n_samples_min} got : {n_samples}), "
"covariance estimate may be unreliable"
)
_on_missing(on_few_samples, msg, "on_few_samples")


@verbose
Expand All @@ -582,6 +583,7 @@ def compute_raw_covariance(
return_estimators=False,
reject_by_annotation=True,
rank=None,
on_few_samples="warn",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nowadays we try to add * to functions (and we've been adding them to old functions as we come across them), like

    *,
    on_few_samples="warn",

But really we could probably go with something much farther up... maybe after picks and before method?

verbose=None,
):
"""Estimate noise covariance matrix from a continuous segment of raw data.
Expand Down Expand Up @@ -662,6 +664,10 @@ def compute_raw_covariance(

.. versionadded:: 0.18
Support for 'info' mode.
on_few_samples : str
Can be 'warn' (default), 'ignore', or 'raise' to control behavior when
there are fewer samples than channels, which can lead to inaccurate
covariance or rank estimates.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
covariance or rank estimates.
covariance or rank estimates.
.. versionadded:: 1.11

%(verbose)s

Returns
Expand Down Expand Up @@ -736,7 +742,7 @@ def compute_raw_covariance(
mu += raw_segment.sum(axis=1)
data += np.dot(raw_segment, raw_segment.T)
n_samples += raw_segment.shape[1]
_check_n_samples(n_samples, len(picks))
_check_n_samples(n_samples, len(picks), on_few_samples)
data -= mu[:, None] * (mu[None, :] / n_samples)
data /= n_samples - 1.0
logger.info("Number of samples used : %d", n_samples)
Expand Down Expand Up @@ -872,6 +878,7 @@ def compute_covariance(
return_estimators=False,
on_mismatch="raise",
rank=None,
on_few_samples="warn",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

    *,
    on_few_samples="warn",

Here maybe after projs would make sense?

verbose=None,
):
"""Estimate noise covariance matrix from epochs.
Expand Down Expand Up @@ -966,6 +973,10 @@ def compute_covariance(

.. versionadded:: 0.18
Support for 'info' mode.
on_few_samples : str
Can be 'warn' (default), 'ignore', or 'raise' to control behavior when
there are fewer samples than channels, which can lead to inaccurate
covariance or rank estimates.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
covariance or rank estimates.
covariance or rank estimates.
.. versionadded:: 1.11

%(verbose)s

Returns
Expand Down Expand Up @@ -1144,7 +1155,7 @@ def _unpack_epochs(epochs):

epochs = np.hstack(epochs)
n_samples_tot = epochs.shape[-1]
_check_n_samples(n_samples_tot, len(picks_meeg))
_check_n_samples(n_samples_tot, len(picks_meeg), on_few_samples)

epochs = epochs.T # sklearn | C-order
cov_data = _compute_covariance_auto(
Expand All @@ -1158,6 +1169,7 @@ def _unpack_epochs(epochs):
picks_list=picks_list,
scalings=scalings,
rank=rank,
on_few_samples=on_few_samples,
)

if keep_sample_mean is False:
Expand Down Expand Up @@ -1221,7 +1233,7 @@ def _eigvec_subspace(eig, eigvec, mask):

@verbose
def _compute_rank_raw_array(
data, info, rank, scalings, *, log_ch_type=None, verbose=None
data, info, rank, scalings, *, log_ch_type=None, on_few_samples="warn", verbose=None
):
from .io import RawArray

Expand All @@ -1231,6 +1243,7 @@ def _compute_rank_raw_array(
scalings,
info,
log_ch_type=log_ch_type,
on_few_samples=on_few_samples,
)


Expand All @@ -1249,6 +1262,7 @@ def _compute_covariance_auto(
cov_kind="",
log_ch_type=None,
log_rank=True,
on_few_samples="warn",
):
"""Compute covariance auto mode."""
# rescale to improve numerical stability
Expand All @@ -1258,6 +1272,7 @@ def _compute_covariance_auto(
info,
rank=rank,
scalings=scalings,
on_few_samples=on_few_samples,
verbose=_verbose_safe_false(),
)
with _scaled_array(data.T, picks_list, scalings):
Expand All @@ -1268,6 +1283,7 @@ def _compute_covariance_auto(
rank,
proj_subspace=True,
do_compute_rank=False,
on_few_samples=on_few_samples,
log_ch_type=log_ch_type,
verbose=None if log_rank else _verbose_safe_false(),
)
Expand Down Expand Up @@ -1729,6 +1745,7 @@ def prepare_noise_cov(
rank=None,
scalings=None,
on_rank_mismatch="ignore",
on_few_samples="warn",
verbose=None,
):
"""Prepare noise covariance matrix.
Expand All @@ -1751,6 +1768,10 @@ def prepare_noise_cov(

dict(mag=1e12, grad=1e11, eeg=1e5)
%(on_rank_mismatch)s
on_few_samples : str
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only functions that compute a covariance should end up warning about the number of samples. Things like prepare_noise_cov and regularize shouldn't have any new public parameter. Under the hood they should pass on_few_samples="ignore" (or maybe nothing should need to be passed for it to work?)

Can be 'warn' (default), 'ignore', or 'raise' to control behavior when
there are fewer samples than channels, which can lead to inaccurate
covariance or rank estimates.
%(verbose)s

Returns
Expand Down Expand Up @@ -1792,6 +1813,7 @@ def prepare_noise_cov(
projs,
ch_names,
on_rank_mismatch=on_rank_mismatch,
on_few_samples=on_few_samples,
)
noise_cov.update(eig=eig, eigvec=eigvec)
return noise_cov
Expand All @@ -1808,6 +1830,7 @@ def _smart_eigh(
proj_subspace=False,
do_compute_rank=True,
on_rank_mismatch="ignore",
on_few_samples="warn",
*,
log_ch_type=None,
verbose=None,
Expand Down Expand Up @@ -1838,6 +1861,7 @@ def _smart_eigh(
scalings,
info,
on_rank_mismatch=on_rank_mismatch,
on_few_samples=on_few_samples,
log_ch_type=log_ch_type,
)
assert C.ndim == 2 and C.shape[0] == C.shape[1]
Expand Down Expand Up @@ -1916,6 +1940,7 @@ def regularize(
dbs=0.1,
rank=None,
scalings=None,
on_few_samples="warn",
verbose=None,
):
"""Regularize noise covariance matrix.
Expand Down Expand Up @@ -1978,6 +2003,10 @@ def regularize(
See :func:`mne.compute_covariance`.

.. versionadded:: 0.17
on_few_samples : str
Can be 'warn' (default), 'ignore', or 'raise' to control behavior when
there are fewer samples than channels, which can lead to inaccurate
covariance or rank estimates.
%(verbose)s

Returns
Expand Down Expand Up @@ -2032,7 +2061,7 @@ def regularize(
else:
regs.update(mag=mag, grad=grad)
if rank != "full":
rank = _compute_rank(cov, rank, scalings, info)
rank = _compute_rank(cov, rank, scalings, info, on_few_samples=on_few_samples)

info_ch_names = info["ch_names"]
ch_names_by_type = dict()
Expand Down Expand Up @@ -2092,7 +2121,9 @@ def regularize(
this_info = pick_info(info, this_picks)
# Here we could use proj_subspace=True, but this should not matter
# since this is already in a loop over channel types
_, eigvec, mask = _smart_eigh(this_C, this_info, rank)
_, eigvec, mask = _smart_eigh(
this_C, this_info, rank, on_few_samples=on_few_samples
)
U = eigvec[mask].T
this_C = np.dot(U.T, np.dot(this_C, U))

Expand All @@ -2119,6 +2150,7 @@ def _regularized_covariance(
log_ch_type=None,
log_rank=None,
cov_kind="",
on_few_samples="warn",
verbose=None,
):
"""Compute a regularized covariance from data using sklearn.
Expand Down Expand Up @@ -2166,6 +2198,7 @@ def _regularized_covariance(
cov_kind=cov_kind,
log_ch_type=log_ch_type,
log_rank=log_rank,
on_few_samples=on_few_samples,
)[reg]["data"]
return cov

Expand Down
19 changes: 17 additions & 2 deletions mne/decoding/_covs_ged.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def _concat_cov(x_class, *, cov_kind, log_rank, reg, cov_method_params, info, ra
cov_kind=cov_kind,
log_rank=log_rank,
log_ch_type="data",
on_few_samples="ignore",
)

return cov, n_channels # the weight here is just the number of channels
Expand Down Expand Up @@ -88,6 +89,7 @@ def _csp_estimate(X, y, reg, cov_method_params, cov_est, info, rank, norm_trace)
rank=rank,
scalings=None,
log_ch_type="data",
on_few_samples="ignore",
)

covs = []
Expand Down Expand Up @@ -130,7 +132,12 @@ def _xdawn_estimate(
# Retrieve or compute whitening covariance
if R is None:
R = _regularized_covariance(
np.hstack(X), reg, cov_method_params, info, rank=rank
np.hstack(X),
reg,
cov_method_params,
info,
rank=rank,
on_few_samples="ignore",
)
elif isinstance(R, Covariance):
R = R.data
Expand All @@ -146,7 +153,9 @@ def _xdawn_estimate(
for evo, toeplitz in zip(evokeds, toeplitzs):
# Estimate covariance matrix of the prototype response
evo = np.dot(evo, toeplitz)
evo_cov = _regularized_covariance(evo, reg, cov_method_params, info, rank=rank)
evo_cov = _regularized_covariance(
evo, reg, cov_method_params, info, rank=rank, on_few_samples="ignore"
)
covs.append(evo_cov)

covs.append(R)
Expand All @@ -158,6 +167,7 @@ def _xdawn_estimate(
rank=rank,
scalings=None,
log_ch_type="data",
on_few_samples="ignore",
)
return covs, C_ref, info, rank, dict()

Expand Down Expand Up @@ -197,13 +207,15 @@ def _ssd_estimate(
method_params=cov_method_params,
rank="full",
info=picked_info,
on_few_samples="ignore",
)
R = _regularized_covariance(
X_noise,
reg=reg,
method_params=cov_method_params,
rank="full",
info=picked_info,
on_few_samples="ignore",
)
covs = [S, R]
C_ref = S
Expand All @@ -223,6 +235,7 @@ def _ssd_estimate(
rank,
_handle_default("scalings_cov_rank", None),
info,
on_few_samples="ignore",
).values()
)[0]
all_ranks.append(r)
Expand Down Expand Up @@ -266,6 +279,7 @@ def _spoc_estimate(X, y, reg, cov_method_params, info, rank):
rank=rank,
log_ch_type="data",
log_rank=ii == 0,
on_few_samples="ignore",
)

S = np.mean(covs * target[:, np.newaxis, np.newaxis], axis=0)
Expand All @@ -280,5 +294,6 @@ def _spoc_estimate(X, y, reg, cov_method_params, info, rank):
rank=rank,
scalings=None,
log_ch_type="data",
on_few_samples="ignore",
)
return covs, C_ref, info, rank, dict()
2 changes: 1 addition & 1 deletion mne/decoding/tests/test_csp.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def test_spoc():
# check y
pytest.raises(ValueError, spoc.fit, X, y * 0)

# Check that doesn't take CSP-spcific input
# Check that doesn't take CSP-specific input
pytest.raises(TypeError, SPoC, cov_est="epoch")

# Check mixing matrix on simulated data
Expand Down
8 changes: 6 additions & 2 deletions mne/decoding/tests/test_ged.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,17 @@ def _mock_cov_callable(X, y, cov_method_params=None, compute_C_ref=True):
for ci, this_class in enumerate(classes):
class_data = X[y == this_class]
class_data = class_data.transpose(1, 0, 2).reshape(n_channels, -1)
cov = _regularized_covariance(class_data, **cov_method_params)
cov = _regularized_covariance(
class_data, on_few_samples="ignore", **cov_method_params
)
covs.append(cov)
sample_weights.append(class_data.shape[0])

ref_data = X.transpose(1, 0, 2).reshape(n_channels, -1)
if compute_C_ref:
C_ref = _regularized_covariance(ref_data, **cov_method_params)
C_ref = _regularized_covariance(
ref_data, on_few_samples="ignore", **cov_method_params
)
else:
C_ref = None
info = _mock_info(n_channels)
Expand Down
Loading
Loading