Skip to content

Commit e9e482f

Browse files
Merge develop into unify-split-logic
2 parents 68b88a8 + 063fc23 commit e9e482f

File tree

6 files changed

+259
-9
lines changed

6 files changed

+259
-9
lines changed

docs/source/whats_new.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,14 @@ API changes
3939
- When CodeCarbon is installed, MOABB HDF5 results have an additional column `codecarbon_task_name`. If CodeCarbon is configured to save to file, its own tabular results have a column `task_name`. These columns are unique UUID4s. Related rows can be joined to see detailed costs and benefits of predictive performance and computing profiling metrics (:gh:`866` by `Ethan Davis`_).
4040
- Isolated model fitting, duration tracking, and CodeCarbon compute profiling tracking. New and consistent ordering of duration and CodeCarbon tracking across all evaluations: (Higher priority, closest to model fitting) required duration tracking, (lower priority, second closest to model fitting) optional CodeCarbon tracking (:gh:`866` by `Ethan Davis`_).
4141
- Replaced unreliable wall clock duration tracking (Python's `time.time()`) in favor of performance counter duration tracking (Python's `time.perf_counter()`) (:gh:`866` by `Ethan Davis`_).
42+
- Enable choice of online or offline CodeCarbon through the parameterization of `codecarbon_config` when instantiating a :class:`moabb.evaluations.base.BaseEvaluation` child class (:gh:`956` by `Ethan Davis`_)
4243
- Renamed stimulus channel from ``stim`` to ``STI`` in BNCI motor imagery and error-related potential datasets for clarity and BIDS compliance (by `Bruno Aristimunha`_).
4344
- Added four new BNCI P300/ERP dataset classes: :class:`moabb.datasets.BNCI2015_009` (AMUSE), :class:`moabb.datasets.BNCI2015_010` (RSVP), :class:`moabb.datasets.BNCI2015_012` (PASS2D), and :class:`moabb.datasets.BNCI2015_013` (ErrP) (by `Bruno Aristimunha`_).
4445

4546
Requirements
4647
~~~~~~~~~~~~
47-
- Requires CodeCarbon environment variables or a configuration file to be defined in the home directory or the current working directory (:gh:`866` by `Ethan Davis`_).
48+
- Allows CodeCarbon environment variables or a configuration file to be defined in the home directory or the current working directory (:gh:`866` by `Ethan Davis`_).
49+
- Added ``filelock`` as a core dependency to fix missing import errors in utils (:gh:`959` by `Mateusz Naklicki`_).
4850

4951
Bugs
5052
~~~~
@@ -56,6 +58,7 @@ Bugs
5658
- Fixing option to pickle model (:gh:`870` by `Ethan Davis`_)
5759
- Normalize Zenodo download paths and add a custom user-agent to improve download robustness (:gh:`946` by `Bruno Aristimunha`_)
5860
- Use the BNCI mirror host to avoid download timeouts (:gh:`946` by `Bruno Aristimunha`_)
61+
- Prevent Python mutable default argument when defining CodeCarbon configurations (:gh:`956` by `Ethan Davis`_)
5962
- Fix copytree FileExistsError in BrainInvaders2013a download by adding dirs_exist_ok=True (by `Bruno Aristimunha`_)
6063
- Ensure optional additional scoring columns in evaluation results (:gh:`957` by `Ethan Davis`_)
6164

@@ -709,3 +712,4 @@ API changes
709712
.. _Romani Michele: https://github.com/BRomans
710713
.. _Lionel Kusch: https://github.com/lionelkusch
711714
.. _Victor Martinez-Cagigal: https://github.com/vicmarcag
715+
.. _Mateusz Naklicki: https://github.com/luluu9

moabb/evaluations/base.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from moabb.analysis import Results
1010
from moabb.datasets.base import BaseDataset
1111
from moabb.evaluations.utils import (
12+
Emissions,
1213
_convert_sklearn_params_to_optuna,
1314
_create_scorer,
1415
_DictScorer,
@@ -113,7 +114,7 @@ def __init__(
113114
optuna=False,
114115
time_out=60 * 15,
115116
verbose=None,
116-
codecarbon_config=dict(save_to_file=False, log_level="error"),
117+
codecarbon_config=None,
117118
):
118119
self.random_state = random_state
119120
self.n_jobs = n_jobs
@@ -128,7 +129,7 @@ def __init__(
128129
self.optuna = optuna
129130
self.time_out = time_out
130131
self.verbose = verbose
131-
self.codecarbon_config = codecarbon_config
132+
self.emissions = Emissions(codecarbon_config=codecarbon_config)
132133

133134
self.additional_columns = additional_columns
134135
if additional_columns is None:

moabb/evaluations/evaluations.py

Lines changed: 62 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def _pipeline_requires_epochs(pipeline):
4747

4848

4949
try:
50-
from codecarbon import EmissionsTracker
50+
from codecarbon import EmissionsTracker # noqa
5151

5252
_carbonfootprint = True
5353
except ImportError:
@@ -265,7 +265,8 @@ def _evaluate(
265265
emissions = np.nan
266266
task_name = ""
267267
if _carbonfootprint:
268-
tracker = EmissionsTracker(**self.codecarbon_config)
268+
# Initialise CodeCarbon per cross-validation
269+
tracker = self.emissions.create_tracker()
269270
tracker.start()
270271

271272
# Create scorer once before CV loop
@@ -366,11 +367,11 @@ def _evaluate(
366367
X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1]
367368
)
368369
res = {
369-
"time": duration / self.n_splits,
370+
"time": duration / self.n_splits, # 5 fold CV
370371
"dataset": dataset,
371372
"subject": subject,
372373
"session": session,
373-
"n_samples": len(y_cv),
374+
"n_samples": len(y_cv), # not training sample
374375
"n_channels": nchan,
375376
"pipeline": name,
376377
}
@@ -384,6 +385,61 @@ def _evaluate(
384385

385386
yield res
386387

388+
def get_data_size_subsets(self, y):
389+
if self.data_size is None:
390+
raise ValueError(
391+
"Cannot create data subsets without valid policy for data_size."
392+
)
393+
if self.data_size["policy"] == "ratio":
394+
vals = np.array(self.data_size["value"])
395+
if np.any(vals < 0) or np.any(vals > 1):
396+
raise ValueError("Data subset ratios must be in range [0, 1]")
397+
upto = np.ceil(vals * len(y)).astype(int)
398+
indices = [np.array(range(i)) for i in upto]
399+
elif self.data_size["policy"] == "per_class":
400+
classwise_indices = dict()
401+
n_smallest_class = np.inf
402+
for cl in np.unique(y):
403+
cl_i = np.where(cl == y)[0]
404+
classwise_indices[cl] = cl_i
405+
n_smallest_class = (
406+
len(cl_i) if len(cl_i) < n_smallest_class else n_smallest_class
407+
)
408+
indices = []
409+
for ds in self.data_size["value"]:
410+
if ds > n_smallest_class:
411+
raise ValueError(
412+
f"Smallest class has {n_smallest_class} samples. "
413+
f"Desired samples per class {ds} is too large."
414+
)
415+
indices.append(
416+
np.concatenate(
417+
[classwise_indices[cl][:ds] for cl in classwise_indices]
418+
)
419+
)
420+
else:
421+
raise ValueError(f"Unknown policy {self.data_size['policy']}")
422+
return indices
423+
424+
def score_explicit(self, res, clf, X_train, y_train, X_test, y_test):
425+
"""Fit model and update result dict with scores and duration."""
426+
if not self.mne_labels:
427+
# convert labels if array, keep them if epochs and mne_labels is set
428+
le = LabelEncoder()
429+
y_train = le.fit_transform(y_train)
430+
y_test = le.transform(y_test)
431+
t_start = perf_counter()
432+
try:
433+
model = clf.fit(X_train, y_train)
434+
_ensure_fitted(model)
435+
scorer = _create_scorer(model, self.paradigm.scoring)
436+
_score_and_update(res, scorer, model, X_test, y_test)
437+
except ValueError as e:
438+
if self.error_score == "raise":
439+
raise e
440+
res["score"] = self.error_score
441+
res["time"] = perf_counter() - t_start
442+
387443
def evaluate(
388444
self, dataset, pipelines, param_grid, process_pipeline, postprocess_pipeline=None
389445
):
@@ -497,7 +553,7 @@ def evaluate(
497553

498554
if _carbonfootprint:
499555
# Initialise CodeCarbon per cross-validation
500-
tracker = EmissionsTracker(**self.codecarbon_config)
556+
tracker = self.emissions.create_tracker()
501557
tracker.start()
502558

503559
# Create scorer once before CV loop
@@ -684,7 +740,7 @@ def evaluate(
684740

685741
if _carbonfootprint:
686742
# Initialise CodeCarbon per cross-validation
687-
tracker = EmissionsTracker(**self.codecarbon_config)
743+
tracker = self.emissions.create_tracker()
688744
tracker.start()
689745

690746
# Progressbar at subject level

moabb/evaluations/utils.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,14 @@
2525
optuna_available = False
2626

2727

28+
try:
29+
from codecarbon import EmissionsTracker, OfflineEmissionsTracker
30+
31+
_carbonfootprint = True
32+
except ImportError:
33+
_carbonfootprint = False
34+
35+
2836
def _ensure_fitted(estimator):
2937
"""Ensure an estimator is properly marked as fitted for sklearn 1.8+.
3038
@@ -462,3 +470,34 @@ def _score_and_update(res, scorer, model, X, y_true):
462470
"""
463471
score = scorer(model, X, y_true)
464472
return _update_result_with_scores(res, score)
473+
474+
475+
class Emissions:
476+
def __init__(self, codecarbon_config=None):
477+
self.codecarbon_config = codecarbon_config
478+
if codecarbon_config is None:
479+
# Default CodeCarbon configurations
480+
self.codecarbon_config = dict(save_to_file=False, log_level="error")
481+
self.codecarbon_offline = False
482+
else:
483+
# Offline mode parameters are a superset of online mode parameters
484+
# Hardcode check avoids object reflection for security and compatibility
485+
# For more information see CodeCarbon documentation
486+
# https://mlco2.github.io/codecarbon/parameters.html#specific-parameters-for-offline-mode
487+
offline_params = [
488+
"country_iso_code",
489+
"region",
490+
"cloud_provider",
491+
"cloud_region",
492+
"country_2letter_iso_code",
493+
]
494+
self.codecarbon_offline = any(
495+
key in self.codecarbon_config for key in offline_params
496+
)
497+
498+
def create_tracker(self):
499+
if self.codecarbon_offline:
500+
tracker = OfflineEmissionsTracker(**self.codecarbon_config)
501+
else:
502+
tracker = EmissionsTracker(**self.codecarbon_config)
503+
return tracker

moabb/tests/test_emissions.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
"""Tests for the Emissions class in evaluations/utils.py."""
2+
3+
from unittest.mock import MagicMock, patch
4+
5+
import pytest
6+
7+
from moabb.evaluations.utils import Emissions
8+
9+
10+
try:
11+
from codecarbon import EmissionsTracker, OfflineEmissionsTracker # noqa
12+
13+
CODECARBON_AVAILABLE = True
14+
except ImportError:
15+
CODECARBON_AVAILABLE = False
16+
17+
18+
def test_default_config():
19+
"""Test Emissions initialization with default config (None)."""
20+
emissions = Emissions()
21+
22+
# Check default configuration is set
23+
assert emissions.codecarbon_config == dict(save_to_file=False, log_level="error")
24+
# Check codecarbon_offline is set to False for default config
25+
assert emissions.codecarbon_offline is False
26+
27+
28+
def test_custom_config_online_mode():
29+
"""Test Emissions initialization with custom config (online mode)."""
30+
custom_config = {
31+
"save_to_file": True,
32+
"log_level": "info",
33+
"project_name": "test_project",
34+
}
35+
emissions = Emissions(codecarbon_config=custom_config)
36+
37+
# Check custom configuration is set
38+
assert emissions.codecarbon_config == custom_config
39+
# Check codecarbon_offline is False when no offline params are present
40+
assert emissions.codecarbon_offline is False
41+
42+
43+
@pytest.mark.parametrize(
44+
"offline_param,param_value",
45+
[
46+
("country_iso_code", "USA"),
47+
("region", "us-west-1"),
48+
("cloud_provider", "aws"),
49+
("cloud_region", "us-west-1"),
50+
("country_2letter_iso_code", "US"),
51+
],
52+
)
53+
def test_custom_config_offline_mode(offline_param, param_value):
54+
"""Test Emissions initialization with various offline parameters."""
55+
custom_config = {
56+
"save_to_file": False,
57+
"log_level": "error",
58+
offline_param: param_value,
59+
}
60+
emissions = Emissions(codecarbon_config=custom_config)
61+
62+
# Check custom configuration is set
63+
assert emissions.codecarbon_config == custom_config
64+
# Check codecarbon_offline is True when offline param is present
65+
assert emissions.codecarbon_offline is True
66+
67+
68+
def test_custom_config_offline_mode_multiple_params():
69+
"""Test Emissions initialization with multiple offline parameters."""
70+
custom_config = {
71+
"save_to_file": True,
72+
"log_level": "info",
73+
"country_iso_code": "USA",
74+
"region": "california",
75+
"cloud_provider": "aws",
76+
}
77+
emissions = Emissions(codecarbon_config=custom_config)
78+
79+
assert emissions.codecarbon_config == custom_config
80+
assert emissions.codecarbon_offline is True
81+
82+
83+
@pytest.mark.skipif(not CODECARBON_AVAILABLE, reason="codecarbon not installed")
84+
@patch("moabb.evaluations.utils.EmissionsTracker")
85+
def test_create_tracker_default_config(mock_emissions_tracker):
86+
"""Test create_tracker with default config uses EmissionsTracker."""
87+
mock_tracker = MagicMock()
88+
mock_emissions_tracker.return_value = mock_tracker
89+
90+
emissions = Emissions()
91+
tracker = emissions.create_tracker()
92+
93+
# Verify EmissionsTracker was called with correct config
94+
mock_emissions_tracker.assert_called_once_with(save_to_file=False, log_level="error")
95+
assert tracker == mock_tracker
96+
97+
98+
@pytest.mark.skipif(not CODECARBON_AVAILABLE, reason="codecarbon not installed")
99+
@patch("moabb.evaluations.utils.EmissionsTracker")
100+
def test_create_tracker_online_mode(mock_emissions_tracker):
101+
"""Test create_tracker with online config uses EmissionsTracker."""
102+
mock_tracker = MagicMock()
103+
mock_emissions_tracker.return_value = mock_tracker
104+
105+
custom_config = {"save_to_file": True, "log_level": "info"}
106+
emissions = Emissions(codecarbon_config=custom_config)
107+
tracker = emissions.create_tracker()
108+
109+
# Verify EmissionsTracker was called with custom config
110+
mock_emissions_tracker.assert_called_once_with(save_to_file=True, log_level="info")
111+
assert tracker == mock_tracker
112+
113+
114+
@pytest.mark.skipif(not CODECARBON_AVAILABLE, reason="codecarbon not installed")
115+
@patch("moabb.evaluations.utils.OfflineEmissionsTracker")
116+
def test_create_tracker_offline_mode(mock_offline_emissions_tracker):
117+
"""Test create_tracker with offline config uses OfflineEmissionsTracker."""
118+
mock_tracker = MagicMock()
119+
mock_offline_emissions_tracker.return_value = mock_tracker
120+
121+
custom_config = {
122+
"save_to_file": False,
123+
"log_level": "error",
124+
"country_iso_code": "USA",
125+
}
126+
emissions = Emissions(codecarbon_config=custom_config)
127+
tracker = emissions.create_tracker()
128+
129+
# Verify OfflineEmissionsTracker was called with custom config
130+
mock_offline_emissions_tracker.assert_called_once_with(
131+
save_to_file=False, log_level="error", country_iso_code="USA"
132+
)
133+
assert tracker == mock_tracker
134+
135+
136+
@pytest.mark.parametrize(
137+
"config,expected_offline",
138+
[
139+
(None, False),
140+
({}, False),
141+
({"save_to_file": False}, False),
142+
({"country_iso_code": "USA"}, True),
143+
],
144+
)
145+
def test_codecarbon_offline_attribute_always_exists(config, expected_offline):
146+
"""Test that codecarbon_offline attribute always exists regardless of config."""
147+
emissions = Emissions(codecarbon_config=config)
148+
assert hasattr(emissions, "codecarbon_offline")
149+
assert emissions.codecarbon_offline == expected_offline

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ dependencies = [
4949
"memory-profiler>=0.61.0",
5050
"edflib-python>=1.0.6",
5151
"edfio>=0.4.2",
52+
"filelock>=3.18.0",
5253
"pytest>=8.3.5",
5354
"mne-bids>=0.16",
5455
"scikit-learn>=1.6",

0 commit comments

Comments
 (0)