|
18 | 18 | from moabb.datasets.fake import FakeDataset |
19 | 19 | from moabb.evaluations import evaluations as ev |
20 | 20 | from moabb.evaluations.base import optuna_available |
| 21 | +from moabb.evaluations.splitters import LearningCurveSplitter |
21 | 22 | from moabb.evaluations.utils import _create_save_path as create_save_path |
22 | 23 | from moabb.evaluations.utils import _save_model_cv as save_model_cv |
23 | 24 | from moabb.paradigms.motor_imagery import FakeImageryParadigm |
@@ -228,110 +229,112 @@ def test_postprocess_pipeline(self): |
228 | 229 |
|
229 | 230 |
|
230 | 231 | class TestWithinSessLearningCurve: |
231 | | - """Some tests for the learning curve evaluation. |
| 232 | + """Tests for the learning curve evaluation using LearningCurveSplitter.""" |
232 | 233 |
|
233 | | - TODO if we ever extend dataset metadata, e.g. including y for |
234 | | - example, we could get rid of a lot of issues regarding valid inputs |
235 | | - for policy per_class as this could be determined at Evaluation |
236 | | - initialization instead of during running the evaluation |
237 | | - """ |
238 | | - |
239 | | - @pytest.mark.skip(reason="This test is not working") |
240 | 234 | def test_correct_results_integrity(self): |
| 235 | + """Test that learning curve results have correct columns.""" |
241 | 236 | learning_curve_eval = ev.WithinSessionEvaluation( |
242 | 237 | paradigm=FakeImageryParadigm(), |
243 | 238 | datasets=[dataset], |
244 | | - data_size={"policy": "ratio", "value": np.array([0.2, 0.5])}, |
245 | | - n_perms=np.array([2, 2]), |
| 239 | + cv_class=LearningCurveSplitter, |
| 240 | + cv_params={ |
| 241 | + "data_size": {"policy": "ratio", "value": np.array([0.2, 0.5])}, |
| 242 | + "n_perms": np.array([2, 2]), |
| 243 | + "test_size": 0.2, |
| 244 | + }, |
| 245 | + overwrite=True, # Ensure fresh results, not cached |
246 | 246 | ) |
247 | 247 | process_pipeline = learning_curve_eval.paradigm.make_process_pipelines(dataset)[0] |
248 | | - results = [ |
249 | | - r |
250 | | - for r in learning_curve_eval.evaluate( |
| 248 | + results = list( |
| 249 | + learning_curve_eval.evaluate( |
251 | 250 | dataset, pipelines, param_grid=None, process_pipeline=process_pipeline |
252 | 251 | ) |
253 | | - ] |
| 252 | + ) |
| 253 | + assert len(results) > 0 |
254 | 254 | keys = results[0].keys() |
255 | | - assert len(keys) == 10 # 8 + 2 new for learning curve |
256 | 255 | assert "permutation" in keys |
257 | 256 | assert "data_size" in keys |
258 | 257 |
|
259 | 258 | def test_all_policies_work(self): |
260 | | - kwargs = dict(paradigm=FakeImageryParadigm(), datasets=[dataset], n_perms=[2, 2]) |
261 | | - # The next two should work without issue |
| 259 | + """Test that both ratio and per_class policies work.""" |
| 260 | + # Ratio policy should work |
262 | 261 | ev.WithinSessionEvaluation( |
263 | | - data_size={"policy": "per_class", "value": [5, 10]}, **kwargs |
| 262 | + paradigm=FakeImageryParadigm(), |
| 263 | + datasets=[dataset], |
| 264 | + cv_class=LearningCurveSplitter, |
| 265 | + cv_params={ |
| 266 | + "data_size": {"policy": "ratio", "value": [0.2, 0.5]}, |
| 267 | + "n_perms": [2, 2], |
| 268 | + "test_size": 0.2, |
| 269 | + }, |
264 | 270 | ) |
| 271 | + |
| 272 | + # Per class policy should work |
265 | 273 | ev.WithinSessionEvaluation( |
266 | | - data_size={"policy": "ratio", "value": [0.2, 0.5]}, **kwargs |
| 274 | + paradigm=FakeImageryParadigm(), |
| 275 | + datasets=[dataset], |
| 276 | + cv_class=LearningCurveSplitter, |
| 277 | + cv_params={ |
| 278 | + "data_size": {"policy": "per_class", "value": [5, 10]}, |
| 279 | + "n_perms": [2, 2], |
| 280 | + "test_size": 0.2, |
| 281 | + }, |
267 | 282 | ) |
| 283 | + |
| 284 | + # Invalid policy should raise (tested at splitter level since validation is lazy) |
268 | 285 | with pytest.raises(ValueError): |
269 | | - ev.WithinSessionEvaluation( |
| 286 | + LearningCurveSplitter( |
270 | 287 | data_size={"policy": "does_not_exist", "value": [0.2, 0.5]}, |
271 | | - **kwargs, |
| 288 | + n_perms=[2, 2], |
| 289 | + test_size=0.2, |
272 | 290 | ) |
273 | 291 |
|
274 | | - @pytest.mark.skip(reason="This test is not working") |
275 | | - def test_data_sanity(self): |
276 | | - # need this helper to iterate over the generator |
277 | | - def run_evaluation(eval, dataset, pipelines): |
278 | | - process_pipeline = eval.paradigm.make_process_pipelines(dataset)[0] |
279 | | - list( |
280 | | - eval.evaluate( |
281 | | - dataset, pipelines, param_grid=None, process_pipeline=process_pipeline |
282 | | - ) |
283 | | - ) |
| 292 | + def test_datasize_parameters(self): |
| 293 | + """Test that data_size parameter validation works correctly.""" |
| 294 | + # Test validation at LearningCurveSplitter level (validation is lazy) |
284 | 295 |
|
285 | | - # E.g. if number of samples too high -> expect error |
286 | | - kwargs = dict(paradigm=FakeImageryParadigm(), datasets=[dataset], n_perms=[2, 2]) |
287 | | - should_work = ev.WithinSessionEvaluation( |
288 | | - data_size={"policy": "per_class", "value": [5, 10]}, **kwargs |
289 | | - ) |
290 | | - too_many_samples = ev.WithinSessionEvaluation( |
291 | | - data_size={"policy": "per_class", "value": [5, 100000]}, **kwargs |
292 | | - ) |
293 | | - # This one should run |
294 | | - run_evaluation(should_work, dataset, pipelines) |
| 296 | + # Decreasing data_size should fail |
295 | 297 | with pytest.raises(ValueError): |
296 | | - run_evaluation(too_many_samples, dataset, pipelines) |
297 | | - |
298 | | - def test_eval_grid_search(self): |
299 | | - pass |
| 298 | + LearningCurveSplitter( |
| 299 | + data_size={"policy": "ratio", "value": [0.5, 0.2]}, |
| 300 | + n_perms=[2, 1], |
| 301 | + test_size=0.2, |
| 302 | + ) |
300 | 303 |
|
301 | | - def test_datasize_parameters(self): |
302 | | - # Fail if not values are not correctly ordered |
303 | | - kwargs = dict(paradigm=FakeImageryParadigm(), datasets=[dataset]) |
304 | | - decreasing_datasize = dict( |
305 | | - data_size={"policy": "per_class", "value": [5, 4]}, n_perms=[2, 1], **kwargs |
306 | | - ) |
307 | | - constant_datasize = dict( |
308 | | - data_size={"policy": "per_class", "value": [5, 5]}, n_perms=[2, 3], **kwargs |
309 | | - ) |
310 | | - increasing_perms = dict( |
311 | | - data_size={"policy": "per_class", "value": [3, 4]}, n_perms=[2, 3], **kwargs |
312 | | - ) |
| 304 | + # Constant data_size should fail |
313 | 305 | with pytest.raises(ValueError): |
314 | | - ev.WithinSessionEvaluation(**decreasing_datasize) |
315 | | - with pytest.raises(ValueError): |
316 | | - ev.WithinSessionEvaluation(**constant_datasize) |
| 306 | + LearningCurveSplitter( |
| 307 | + data_size={"policy": "ratio", "value": [0.5, 0.5]}, |
| 308 | + n_perms=[2, 2], |
| 309 | + test_size=0.2, |
| 310 | + ) |
| 311 | + |
| 312 | + # Increasing n_perms should fail |
317 | 313 | with pytest.raises(ValueError): |
318 | | - ev.WithinSessionEvaluation(**increasing_perms) |
| 314 | + LearningCurveSplitter( |
| 315 | + data_size={"policy": "ratio", "value": [0.2, 0.5]}, |
| 316 | + n_perms=[2, 3], |
| 317 | + test_size=0.2, |
| 318 | + ) |
319 | 319 |
|
320 | 320 | def test_postprocess_pipeline(self): |
| 321 | + """Test that postprocess_pipeline works with learning curve evaluation.""" |
321 | 322 | learning_curve_eval = ev.WithinSessionEvaluation( |
322 | 323 | paradigm=FakeImageryParadigm(), |
323 | 324 | datasets=[dataset], |
324 | | - data_size={"policy": "ratio", "value": np.array([0.2, 0.5])}, |
325 | | - n_perms=np.array([2, 2]), |
| 325 | + cv_class=LearningCurveSplitter, |
| 326 | + cv_params={ |
| 327 | + "data_size": {"policy": "ratio", "value": np.array([0.2, 0.5])}, |
| 328 | + "n_perms": np.array([2, 2]), |
| 329 | + "test_size": 0.2, |
| 330 | + }, |
326 | 331 | ) |
327 | 332 |
|
328 | 333 | cov = Covariances("oas") |
329 | 334 | pipelines0 = { |
330 | 335 | "CovCspLda": make_pipeline( |
331 | 336 | cov, |
332 | | - CSP( |
333 | | - 8, |
334 | | - ), |
| 337 | + CSP(8), |
335 | 338 | LDA(), |
336 | 339 | ) |
337 | 340 | } |
|
0 commit comments