Update learning curve examples to use LearningCurveSplitter API

bruAristimunha · bruAristimunha · commit 68b88a8f4e28 · 2026-01-30T22:34:16.000+01:00
Update all learning curve examples to use the new LearningCurveSplitter
as cv_class for WithinSessionEvaluation:

- examples/learning_curve/plot_learning_curve_p300.py
- examples/learning_curve/plot_learning_curve_motor_imagery.py
- examples/learning_curve/noplot_learning_curve_p300_external.py
- examples/external/learning_curve_p300_external.py
- examples/external/noplot_learning_curve_p300_external.py

The new API uses cv_class=LearningCurveSplitter with cv_params containing:
- data_size: dict with 'policy' and 'value' keys
- n_perms: array of permutations per data size
- test_size: fraction for test set

This replaces the old data_size and n_perms parameters that were passed
directly to WithinSessionEvaluation.
diff --git a/examples/external/learning_curve_p300_external.py b/examples/external/learning_curve_p300_external.py
@@ -15,6 +15,11 @@
 - Time-Decoupled Linear Discriminant Analysis
 
 We will use the P300 paradigm, which uses the AUC as metric.
+
+The learning curve shows how model performance changes with different
+amounts of training data. We use LearningCurveSplitter which creates
+train/test splits where the test set is fixed for each permutation while
+the training set is subsampled to different sizes.
 """
 
 # Authors: Jan Sosulski
@@ -36,6 +41,7 @@
 import moabb
 from moabb.datasets import BNCI2014_009
 from moabb.evaluations import WithinSessionEvaluation
+from moabb.evaluations.splitters import LearningCurveSplitter
 from moabb.paradigms import P300
 
 
@@ -95,30 +101,39 @@
 # ----------
 #
 # We define the paradigm (P300) and use the BNCI 2014-009 dataset for it.
-# The evaluation will return a dataframe containing AUCs for each permutation
-# and dataset size.
+# The evaluation will return a DataFrame containing AUCs for each permutation
+# and data size.
+#
+# LearningCurveSplitter creates train/test splits where:
+# - The test set is fixed for each permutation (using StratifiedShuffleSplit)
+# - The training set is subsampled according to the data_size policy
+# - Multiple permutations are run for each data size
 
 paradigm = P300(resample=processing_sampling_rate)
 dataset = BNCI2014_009()
 # Remove the slicing of the subject list to evaluate multiple subjects
 dataset.subject_list = dataset.subject_list[0:1]
 datasets = [dataset]
 overwrite = True  # set to True if we want to overwrite cached results
-data_size = dict(policy="ratio", value=np.geomspace(0.02, 1, 6))
-# When the training data is sparse, perform more permutations than when we have
-# a lot of data
+
+# Define learning curve parameters
+data_size = {"policy": "ratio", "value": np.geomspace(0.02, 1, 4)}
+# When the training data is sparse, perform more permutations than when we have a lot of data
 n_perms = np.floor(np.geomspace(20, 2, len(data_size["value"]))).astype(int)
-print(n_perms)
-# Guarantee reproducibility
-np.random.seed(7536298)
+
 evaluation = WithinSessionEvaluation(
     paradigm=paradigm,
     datasets=datasets,
-    data_size=data_size,
-    n_perms=n_perms,
+    cv_class=LearningCurveSplitter,
+    cv_params={
+        "data_size": data_size,
+        "n_perms": n_perms,
+        "test_size": 0.2,
+    },
     suffix="examples_lr",
     overwrite=overwrite,
     return_epochs=True,
+    random_state=7536298,  # For reproducibility
 )
 
 results = evaluation.process(pipelines)
@@ -127,7 +142,8 @@
 # Plot Results
 # ------------
 #
-# Here we plot the results.
+# Here we plot the results. The 'data_size' column contains the training set
+# size for each fold.
 
 fig, ax = plt.subplots(facecolor="white", figsize=[8, 4])
 
@@ -141,8 +157,8 @@
 sns.pointplot(data=r, x="data_size", y="score", hue="pipeline", ax=ax, palette="Set1")
 
 errbar_meaning = "subjects" if n_subs > 1 else "permutations"
-title_str = f"Errorbar shows Mean-CI across {errbar_meaning}"
-ax.set_xlabel("Amount of training samples")
+title_str = f"Learning Curve (errorbar: Mean-CI across {errbar_meaning})"
+ax.set_xlabel("Number of training samples")
 ax.set_ylabel("ROC AUC")
 ax.set_title(title_str)
 fig.tight_layout()
diff --git a/examples/external/noplot_learning_curve_p300_external.py b/examples/external/noplot_learning_curve_p300_external.py
@@ -15,6 +15,11 @@
 - Time-Decoupled Linear Discriminant Analysis
 
 We will use the P300 paradigm, which uses the AUC as metric.
+
+The learning curve shows how model performance changes with different
+amounts of training data. We use LearningCurveSplitter which creates
+train/test splits where the test set is fixed for each permutation while
+the training set is subsampled to different sizes.
 """
 
 # Authors: Jan Sosulski
@@ -36,6 +41,7 @@
 import moabb
 from moabb.datasets import BNCI2014_009
 from moabb.evaluations import WithinSessionEvaluation
+from moabb.evaluations.splitters import LearningCurveSplitter
 from moabb.paradigms import P300
 
 
@@ -96,29 +102,38 @@
 # ----------
 #
 # We define the paradigm (P300) and use the BNCI 2014-009 dataset for it.
-# The evaluation will return a dataframe containing AUCs for each permutation
-# and dataset size.
+# The evaluation will return a DataFrame containing AUCs for each permutation
+# and data size.
+#
+# LearningCurveSplitter creates train/test splits where:
+# - The test set is fixed for each permutation (using StratifiedShuffleSplit)
+# - The training set is subsampled according to the data_size policy
+# - Multiple permutations are run for each data size
 
 paradigm = P300(resample=processing_sampling_rate)
 dataset = BNCI2014_009()
 # Remove the slicing of the subject list to evaluate multiple subjects
 dataset.subject_list = dataset.subject_list[0:1]
 datasets = [dataset]
 overwrite = True  # set to True if we want to overwrite cached results
-data_size = dict(policy="ratio", value=np.geomspace(0.02, 1, 6))
-# When the training data is sparse, perform more permutations than when we have
-# a lot of data
+
+# Define learning curve parameters
+data_size = {"policy": "ratio", "value": np.geomspace(0.02, 1, 4)}
+# When the training data is sparse, perform more permutations than when we have a lot of data
 n_perms = np.floor(np.geomspace(20, 2, len(data_size["value"]))).astype(int)
-print(n_perms)
-# Guarantee reproducibility
-np.random.seed(7536298)
+
 evaluation = WithinSessionEvaluation(
     paradigm=paradigm,
     datasets=datasets,
-    data_size=data_size,
-    n_perms=n_perms,
+    cv_class=LearningCurveSplitter,
+    cv_params={
+        "data_size": data_size,
+        "n_perms": n_perms,
+        "test_size": 0.2,
+    },
     suffix="examples_lr",
     overwrite=overwrite,
+    random_state=7536298,  # For reproducibility
 )
 
 results = evaluation.process(pipelines)
@@ -127,7 +142,8 @@
 # Plot Results
 # ------------
 #
-# Here we plot the results.
+# Here we plot the results. The 'data_size' column contains the training set
+# size for each fold.
 
 fig, ax = plt.subplots(facecolor="white", figsize=[8, 4])
 
@@ -141,8 +157,8 @@
 sns.pointplot(data=r, x="data_size", y="score", hue="pipeline", ax=ax, palette="Set1")
 
 errbar_meaning = "subjects" if n_subs > 1 else "permutations"
-title_str = f"Errorbar shows Mean-CI across {errbar_meaning}"
-ax.set_xlabel("Amount of training samples")
+title_str = f"Learning Curve (errorbar: Mean-CI across {errbar_meaning})"
+ax.set_xlabel("Number of training samples")
 ax.set_ylabel("ROC AUC")
 ax.set_title(title_str)
 fig.tight_layout()
diff --git a/examples/learning_curve/noplot_learning_curve_p300_external.py b/examples/learning_curve/noplot_learning_curve_p300_external.py
@@ -15,6 +15,11 @@
 - Time-Decoupled Linear Discriminant Analysis
 
 We will use the P300 paradigm, which uses the AUC as metric.
+
+The learning curve shows how model performance changes with different
+amounts of training data. We use LearningCurveSplitter which creates
+train/test splits where the test set is fixed for each permutation while
+the training set is subsampled to different sizes.
 """
 
 # Authors: Jan Sosulski
@@ -36,6 +41,7 @@
 import moabb
 from moabb.datasets import BNCI2014_009
 from moabb.evaluations import WithinSessionEvaluation
+from moabb.evaluations.splitters import LearningCurveSplitter
 from moabb.paradigms import P300
 
 
@@ -96,29 +102,38 @@
 # ----------
 #
 # We define the paradigm (P300) and use the BNCI 2014-009 dataset for it.
-# The evaluation will return a dataframe containing AUCs for each permutation
-# and dataset size.
+# The evaluation will return a DataFrame containing AUCs for each permutation
+# and data size.
+#
+# LearningCurveSplitter creates train/test splits where:
+# - The test set is fixed for each permutation (using StratifiedShuffleSplit)
+# - The training set is subsampled according to the data_size policy
+# - Multiple permutations are run for each data size
 
 paradigm = P300(resample=processing_sampling_rate)
 dataset = BNCI2014_009()
 # Remove the slicing of the subject list to evaluate multiple subjects
 dataset.subject_list = dataset.subject_list[0:1]
 datasets = [dataset]
 overwrite = True  # set to True if we want to overwrite cached results
-data_size = dict(policy="ratio", value=np.geomspace(0.02, 1, 6))
-# When the training data is sparse, perform more permutations than when we have
-# a lot of data
+
+# Define learning curve parameters
+data_size = {"policy": "ratio", "value": np.geomspace(0.02, 1, 4)}
+# When the training data is sparse, perform more permutations than when we have a lot of data
 n_perms = np.floor(np.geomspace(20, 2, len(data_size["value"]))).astype(int)
-print(n_perms)
-# Guarantee reproducibility
-np.random.seed(7536298)
+
 evaluation = WithinSessionEvaluation(
     paradigm=paradigm,
     datasets=datasets,
-    data_size=data_size,
-    n_perms=n_perms,
+    cv_class=LearningCurveSplitter,
+    cv_params={
+        "data_size": data_size,
+        "n_perms": n_perms,
+        "test_size": 0.2,
+    },
     suffix="examples_lr",
     overwrite=overwrite,
+    random_state=7536298,  # For reproducibility
 )
 
 results = evaluation.process(pipelines)
@@ -127,7 +142,8 @@
 # Plot Results
 # ------------
 #
-# Here we plot the results.
+# Here we plot the results. The 'data_size' column contains the training set
+# size for each fold.
 
 fig, ax = plt.subplots(facecolor="white", figsize=[8, 4])
 
@@ -141,8 +157,8 @@
 sns.pointplot(data=r, x="data_size", y="score", hue="pipeline", ax=ax, palette="Set1")
 
 errbar_meaning = "subjects" if n_subs > 1 else "permutations"
-title_str = f"Errorbar shows Mean-CI across {errbar_meaning}"
-ax.set_xlabel("Amount of training samples")
+title_str = f"Learning Curve (errorbar: Mean-CI across {errbar_meaning})"
+ax.set_xlabel("Number of training samples")
 ax.set_ylabel("ROC AUC")
 ax.set_title(title_str)
 fig.tight_layout()
diff --git a/examples/learning_curve/plot_learning_curve_motor_imagery.py b/examples/learning_curve/plot_learning_curve_motor_imagery.py
@@ -4,7 +4,7 @@
 ================================================
 
 This example shows how to perform a within session motor imagery analysis on the
-very popular dataset 2a from the BCI competition IV.
+very popular dataset 2a from the BCI competition IV while creating learning curves.
 
 We will compare two pipelines :
 
@@ -13,6 +13,11 @@
 
 We will use the LeftRightImagery paradigm. This will restrict the analysis
 to two classes (left- vs right-hand) and use AUC as metric.
+
+The learning curve shows how model performance changes with different
+amounts of training data. We use LearningCurveSplitter which creates
+train/test splits where the test set is fixed for each permutation while
+the training set is subsampled to different sizes.
 """
 
 # Original author: Alexandre Barachant <alexandre.barachant@gmail.com>
@@ -33,6 +38,7 @@
 import moabb
 from moabb.datasets import BNCI2014_001
 from moabb.evaluations import WithinSessionEvaluation
+from moabb.evaluations.splitters import LearningCurveSplitter
 from moabb.paradigms import LeftRightImagery
 
 
@@ -65,8 +71,13 @@
 # ----------
 #
 # We define the paradigm (LeftRightImagery) and the dataset (BNCI2014_001).
-# The evaluation will return a DataFrame containing a single AUC score for
-# each subject / session of the dataset, and for each pipeline.
+# The evaluation will return a DataFrame containing AUCs for each permutation
+# and data size.
+#
+# LearningCurveSplitter creates train/test splits where:
+# - The test set is fixed for each permutation (using StratifiedShuffleSplit)
+# - The training set is subsampled according to the data_size policy
+# - Multiple permutations are run for each data size
 #
 # Results are saved into the database, so that if you add a new pipeline, it
 # will not run again the evaluation unless a parameter has changed. Results can
@@ -77,17 +88,24 @@
 dataset.subject_list = dataset.subject_list[:1]
 datasets = [dataset]
 overwrite = True  # set to True if we want to overwrite cached results
-# Evaluate for a specific number of training samples per class
-data_size = dict(policy="per_class", value=np.array([5, 10, 30, 50]))
+
+# Define learning curve parameters
+data_size = {"policy": "ratio", "value": np.geomspace(0.1, 1, 5)}
 # When the training data is sparse, perform more permutations than when we have a lot of data
-n_perms = np.floor(np.geomspace(20, 2, len(data_size["value"]))).astype(int)
+n_perms = np.floor(np.geomspace(10, 2, len(data_size["value"]))).astype(int)
+
 evaluation = WithinSessionEvaluation(
     paradigm=paradigm,
     datasets=datasets,
+    cv_class=LearningCurveSplitter,
+    cv_params={
+        "data_size": data_size,
+        "n_perms": n_perms,
+        "test_size": 0.2,
+    },
     suffix="examples",
     overwrite=overwrite,
-    data_size=data_size,
-    n_perms=n_perms,
+    random_state=42,  # For reproducibility
 )
 
 results = evaluation.process(pipelines)
@@ -99,7 +117,8 @@
 # ------------
 #
 # We plot the accuracy as a function of the number of training samples, for
-# each pipeline
+# each pipeline. The 'data_size' column contains the training set size for
+# each fold.
 
 fig, ax = plt.subplots(facecolor="white", figsize=[8, 4])
 
@@ -113,8 +132,8 @@
 sns.pointplot(data=r, x="data_size", y="score", hue="pipeline", ax=ax, palette="Set1")
 
 errbar_meaning = "subjects" if n_subs > 1 else "permutations"
-title_str = f"Errorbar shows Mean-CI across {errbar_meaning}"
-ax.set_xlabel("Amount of training samples")
+title_str = f"Learning Curve (errorbar: Mean-CI across {errbar_meaning})"
+ax.set_xlabel("Number of training samples")
 ax.set_ylabel("ROC AUC")
 ax.set_title(title_str)
 fig.tight_layout()
diff --git a/examples/learning_curve/plot_learning_curve_p300.py b/examples/learning_curve/plot_learning_curve_p300.py