deeppavlov
diff --git a/‎autointent/context/data_handler/__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎autointent/context/data_handler/__init__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎autointent/context/data_handler/_data_handler.py‎
Lines changed: 132 additions & 42 deletions b/‎autointent/context/data_handler/_data_handler.py‎
Lines changed: 132 additions & 42 deletions
diff --git a/‎autointent/context/data_handler/_dataset.py‎
Lines changed: 2 additions & 1 deletion b/‎autointent/context/data_handler/_dataset.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎autointent/context/data_handler/_stratification.py‎
Lines changed: 18 additions & 7 deletions b/‎autointent/context/data_handler/_stratification.py‎
Lines changed: 18 additions & 7 deletions
diff --git a/‎autointent/context/data_handler/_validation.py‎
Lines changed: 1 addition & 2 deletions b/‎autointent/context/data_handler/_validation.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎autointent/context/optimization_info/_data_models.py‎
Lines changed: 6 additions & 1 deletion b/‎autointent/context/optimization_info/_data_models.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎autointent/context/optimization_info/_optimization_info.py‎
Lines changed: 26 additions & 3 deletions b/‎autointent/context/optimization_info/_optimization_info.py‎
Lines changed: 26 additions & 3 deletions
@@ -1,5 +1,5 @@
 from ._data_handler import DataHandler
-from ._dataset import Dataset
+from ._dataset import Dataset, Split
 from ._schemas import Intent, Sample, Tag
 
-__all__ = ["DataHandler", "Dataset", "Intent", "Sample", "Tag"]
+__all__ = ["DataHandler", "Dataset", "Intent", "Sample", "Split", "Tag"]
@@ -48,20 +48,9 @@ def __init__(
         if self.dataset.multilabel:
             self.dataset = self.dataset.encode_labels()
 
-        if Split.TEST not in self.dataset:
-            logger.info("Splitting dataset into train and test splits")
-            self.dataset = split_dataset(self.dataset, random_seed=random_seed)
+        self.n_classes = self.dataset.n_classes
 
-        for split in self.dataset:
-            if split == Split.OOS:
-                continue
-            n_classes_split = self.dataset.get_n_classes(split)
-            if n_classes_split != self.n_classes:
-                message = (
-                    f"Number of classes in split '{split}' doesn't match initial number of classes "
-                    f"({n_classes_split} != {self.n_classes})"
-                )
-                raise ValueError(message)
+        self._split(random_seed)
 
         self.regexp_patterns = [
             RegexPatterns(
@@ -86,60 +75,104 @@ def multilabel(self) -> bool:
         """
         return self.dataset.multilabel
 
-    @property
-    def n_classes(self) -> int:
+    def train_utterances(self, idx: int | None = None) -> list[str]:
+        """
+        Retrieve training utterances from the dataset.
+
+        If a specific training split index is provided, retrieves utterances
+        from the indexed training split. Otherwise, retrieves utterances from
+        the primary training split.
+
+        :param idx: Optional index for a specific training split.
+        :return: List of training utterances.
         """
-        Get the number of classes in the dataset.
+        split = f"{Split.TRAIN}_{idx}" if idx is not None else Split.TRAIN
+        return cast(list[str], self.dataset[split][self.dataset.utterance_feature])
 
-        :return: Number of classes.
+    def train_labels(self, idx: int | None = None) -> list[LabelType]:
         """
-        return self.dataset.n_classes
+        Retrieve training labels from the dataset.
 
-    @property
-    def train_utterances(self) -> list[str]:
+        If a specific training split index is provided, retrieves labels
+        from the indexed training split. Otherwise, retrieves labels from
+        the primary training split.
+
+        :param idx: Optional index for a specific training split.
+        :return: List of training labels.
         """
-        Get the training utterances.
+        split = f"{Split.TRAIN}_{idx}" if idx is not None else Split.TRAIN
+        return cast(list[LabelType], self.dataset[split][self.dataset.label_feature])
 
-        :return: List of training utterances.
+    def validation_utterances(self, idx: int | None = None) -> list[str]:
         """
-        return cast(list[str], self.dataset[Split.TRAIN][self.dataset.utterance_feature])
+        Retrieve validation utterances from the dataset.
 
-    @property
-    def train_labels(self) -> list[LabelType]:
+        If a specific validation split index is provided, retrieves utterances
+        from the indexed validation split. Otherwise, retrieves utterances from
+        the primary validation split.
+
+        :param idx: Optional index for a specific validation split.
+        :return: List of validation utterances.
         """
-        Get the training labels.
+        split = f"{Split.VALIDATION}_{idx}" if idx is not None else Split.VALIDATION
+        return cast(list[str], self.dataset[split][self.dataset.utterance_feature])
 
-        :return: List of training labels.
+    def validation_labels(self, idx: int | None = None) -> list[LabelType]:
         """
-        return cast(list[LabelType], self.dataset[Split.TRAIN][self.dataset.label_feature])
+        Retrieve validation labels from the dataset.
 
-    @property
-    def test_utterances(self) -> list[str]:
+        If a specific validation split index is provided, retrieves labels
+        from the indexed validation split. Otherwise, retrieves labels from
+        the primary validation split.
+
+        :param idx: Optional index for a specific validation split.
+        :return: List of validation labels.
+        """
+        split = f"{Split.VALIDATION}_{idx}" if idx is not None else Split.VALIDATION
+        return cast(list[LabelType], self.dataset[split][self.dataset.label_feature])
+
+    def test_utterances(self, idx: int | None = None) -> list[str]:
         """
-        Get the test utterances.
+        Retrieve test utterances from the dataset.
 
+        If a specific test split index is provided, retrieves utterances
+        from the indexed test split. Otherwise, retrieves utterances from
+        the primary test split.
+
+        :param idx: Optional index for a specific test split.
         :return: List of test utterances.
         """
-        return cast(list[str], self.dataset[Split.TEST][self.dataset.utterance_feature])
+        split = f"{Split.TEST}_{idx}" if idx is not None else Split.TEST
+        return cast(list[str], self.dataset[split][self.dataset.utterance_feature])
 
-    @property
-    def test_labels(self) -> list[LabelType]:
+    def test_labels(self, idx: int | None = None) -> list[LabelType]:
         """
-        Get the test labels.
+        Retrieve test labels from the dataset.
 
+        If a specific test split index is provided, retrieves labels
+        from the indexed test split. Otherwise, retrieves labels from
+        the primary test split.
+
+        :param idx: Optional index for a specific test split.
         :return: List of test labels.
         """
-        return cast(list[LabelType], self.dataset[Split.TEST][self.dataset.label_feature])
+        split = f"{Split.TEST}_{idx}" if idx is not None else Split.TEST
+        return cast(list[LabelType], self.dataset[split][self.dataset.label_feature])
 
-    @property
-    def oos_utterances(self) -> list[str]:
+    def oos_utterances(self, idx: int | None = None) -> list[str]:
         """
-        Get the out-of-scope utterances.
+        Retrieve out-of-scope (OOS) utterances from the dataset.
 
-        :return: List of out-of-scope utterances if available, otherwise an empty list.
+        If the dataset contains out-of-scope samples, retrieves the utterances
+        from the specified OOS split index (if provided) or the primary OOS split.
+        Returns an empty list if no OOS samples are available in the dataset.
+
+        :param idx: Optional index for a specific OOS split.
+        :return: List of out-of-scope utterances, or an empty list if unavailable.
         """
         if self.has_oos_samples():
-            return cast(list[str], self.dataset[Split.OOS][self.dataset.utterance_feature])
+            split = f"{Split.OOS}_{idx}" if idx is not None else Split.OOS
+            return cast(list[str], self.dataset[split][self.dataset.utterance_feature])
         return []
 
     def has_oos_samples(self) -> bool:
@@ -148,7 +181,7 @@ def has_oos_samples(self) -> bool:
 
         :return: True if there are out-of-scope samples.
         """
-        return Split.OOS in self.dataset
+        return any(split.startswith(Split.OOS) for split in self.dataset)
 
     def dump(self) -> dict[str, list[dict[str, Any]]]:
         """
@@ -157,3 +190,60 @@ def dump(self) -> dict[str, list[dict[str, Any]]]:
         :return: Dataset dump.
         """
         return self.dataset.dump()
+
+    def _split(self, random_seed: int) -> None:
+        if Split.TEST not in self.dataset:
+            self.dataset[Split.TRAIN], self.dataset[Split.TEST] = split_dataset(
+                self.dataset,
+                split=Split.TRAIN,
+                test_size=0.2,
+                random_seed=random_seed,
+            )
+
+        self.dataset[f"{Split.TRAIN}_0"], self.dataset[f"{Split.TRAIN}_1"] = split_dataset(
+            self.dataset,
+            split=Split.TRAIN,
+            test_size=0.5,
+            random_seed=random_seed,
+        )
+        self.dataset.pop(Split.TRAIN)
+
+        for idx in range(2):
+            self.dataset[f"{Split.TRAIN}_{idx}"], self.dataset[f"{Split.VALIDATION}_{idx}"] = split_dataset(
+                self.dataset,
+                split=f"{Split.TRAIN}_{idx}",
+                test_size=0.2,
+                random_seed=random_seed,
+            )
+
+        if self.has_oos_samples():
+            self.dataset[f"{Split.OOS}_0"], self.dataset[f"{Split.OOS}_1"] = (
+                self.dataset[Split.OOS]
+                .train_test_split(
+                    test_size=0.2,
+                    shuffle=True,
+                    seed=random_seed,
+                )
+                .values()
+            )
+            self.dataset[f"{Split.OOS}_1"], self.dataset[f"{Split.OOS}_2"] = (
+                self.dataset[f"{Split.OOS}_1"]
+                .train_test_split(
+                    test_size=0.5,
+                    shuffle=True,
+                    seed=random_seed,
+                )
+                .values()
+            )
+            self.dataset.pop(Split.OOS)
+
+        for split in self.dataset:
+            if split.startswith(Split.OOS):
+                continue
+            n_classes_split = self.dataset.get_n_classes(split)
+            if n_classes_split != self.n_classes:
+                message = (
+                    f"Number of classes in split '{split}' doesn't match initial number of classes "
+                    f"({n_classes_split} != {self.n_classes})"
+                )
+                raise ValueError(message)
@@ -81,7 +81,8 @@ def multilabel(self) -> bool:
 
         :return: True if the dataset is multilabel, False otherwise.
         """
-        return isinstance(self[Split.TRAIN].features[self.label_feature], Sequence)
+        split = Split.TRAIN if Split.TRAIN in self else f"{Split.TRAIN}_0"
+        return isinstance(self[split].features[self.label_feature], Sequence)
 
     @cached_property
     def n_classes(self) -> int:
 
@@ -12,7 +12,7 @@
 from sklearn.model_selection import train_test_split
 from skmultilearn.model_selection import IterativeStratification
 
-from ._dataset import Dataset, Split
+from ._dataset import Dataset
 
 
 class StratifiedSplitter:
@@ -44,7 +44,7 @@ def __init__(
         self.random_seed = random_seed
         self.shuffle = shuffle
 
-    def __call__(self, dataset: HFDataset, multilabel: bool) -> tuple[Dataset, Dataset]:
+    def __call__(self, dataset: HFDataset, multilabel: bool) -> tuple[HFDataset, HFDataset]:
         """
         Split the dataset into training and testing subsets.
 
@@ -73,21 +73,32 @@ def _split_multilabel(self, dataset: HFDataset) -> Sequence[npt.NDArray[np.int_]
         return next(splitter.split(np.arange(len(dataset)), np.array(dataset[self.label_feature])))
 
 
-def split_dataset(dataset: Dataset, random_seed: int) -> Dataset:
+def split_dataset(
+    dataset: Dataset,
+    split: str,
+    test_size: float,
+    random_seed: int,
+) -> tuple[HFDataset, HFDataset]:
     """
     Split a Dataset object into training and testing subsets.
 
     This function uses the StratifiedSplitter to perform stratified splitting
     while preserving the distribution of labels.
 
     :param dataset: The dataset to be split, which must include training data.
+    :param split: The specific data split to be divided, e.g., "train" or
+        another split within the dataset.
+    :param test_size: Proportion of the dataset to include in the test split.
+        Should be a float value between 0.0 and 1.0, where 0.0
+        means no data will be assigned to the test set, and 1.0
+        means all data will be assigned to the test set. For example,
+        a value of 0.2 indicates 20% of the data will be used for testing.
     :param random_seed: Seed for random number generation to ensure reproducibility.
-    :return: The input dataset with training and testing splits.
+    :return: A tuple containing two subsets of the selected split.
     """
     splitter = StratifiedSplitter(
-        test_size=0.25,
+        test_size=test_size,
         label_feature=dataset.label_feature,
         random_seed=random_seed,
     )
-    dataset[Split.TRAIN], dataset[Split.TEST] = splitter(dataset[Split.TRAIN], dataset.multilabel)
-    return dataset
+    return splitter(dataset[split], dataset.multilabel)
@@ -17,7 +17,6 @@ class DatasetReader(BaseModel):
     """
 
     train: list[Sample]
-    validation: list[Sample] = []
     test: list[Sample] = []
     intents: list[Intent] = []
 
@@ -30,7 +29,7 @@ def validate_dataset(self) -> Self:
         :return: The validated DatasetReader instance.
         """
         self._validate_intents()
-        for split in [self.train, self.validation, self.test]:
+        for split in [self.train, self.test]:
             self._validate_split(split)
         return self
 
 
@@ -39,8 +39,13 @@ class ScorerArtifact(Artifact):
     """
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
+    train_scores: NDArray[np.float64] | None = Field(None, description="Scorer outputs for train utterances")
+    validation_scores: NDArray[np.float64] | None = Field(None, description="Scorer outputs for validation utterances")
     test_scores: NDArray[np.float64] | None = Field(None, description="Scorer outputs for test utterances")
-    oos_scores: NDArray[np.float64] | None = Field(None, description="Scorer outputs for out-of-scope utterances")
+    oos_scores: dict[str, NDArray[np.float64]] | None = Field(
+        None,
+        description="Scorer outputs for out-of-scope utterances",
+    )
 
 
 class PredictorArtifact(Artifact):
 
@@ -5,7 +5,7 @@
 """
 
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Literal
 
 import numpy as np
 from numpy.typing import NDArray
@@ -147,6 +147,24 @@ def get_best_embedder(self) -> str:
         best_retriever_artifact: RetrieverArtifact = self._get_best_artifact(node_type=NodeType.retrieval)  # type: ignore[assignment]
         return best_retriever_artifact.embedder_name
 
+    def get_best_train_scores(self) -> NDArray[np.float64] | None:
+        """
+        Retrieve the train scores from the best scorer node.
+
+        :return: Train scores as a numpy array.
+        """
+        best_scorer_artifact: ScorerArtifact = self._get_best_artifact(node_type=NodeType.scoring)  # type: ignore[assignment]
+        return best_scorer_artifact.train_scores
+
+    def get_best_validation_scores(self) -> NDArray[np.float64] | None:
+        """
+        Retrieve the validation scores from the best scorer node.
+
+        :return: Validation scores as a numpy array.
+        """
+        best_scorer_artifact: ScorerArtifact = self._get_best_artifact(node_type=NodeType.scoring)  # type: ignore[assignment]
+        return best_scorer_artifact.validation_scores
+
     def get_best_test_scores(self) -> NDArray[np.float64] | None:
         """
         Retrieve the test scores from the best scorer node.
@@ -156,13 +174,18 @@ def get_best_test_scores(self) -> NDArray[np.float64] | None:
         best_scorer_artifact: ScorerArtifact = self._get_best_artifact(node_type=NodeType.scoring)  # type: ignore[assignment]
         return best_scorer_artifact.test_scores
 
-    def get_best_oos_scores(self) -> NDArray[np.float64] | None:
+    def get_best_oos_scores(self, split: Literal["train", "validation", "test"]) -> NDArray[np.float64] | None:
         """
         Retrieve the out-of-scope scores from the best scorer node.
 
-        :return: Out-of-scope scores as a numpy array.
+        :param split: The data split for which to retrieve the OOS scores.
+            Must be one of "train", "validation", or "test".
+        :return: A numpy array containing OOS scores for the specified split,
+            or `None` if no OOS scores are available.
         """
         best_scorer_artifact: ScorerArtifact = self._get_best_artifact(node_type=NodeType.scoring)  # type: ignore[assignment]
+        if best_scorer_artifact.oos_scores is not None:
+            return best_scorer_artifact.oos_scores[split]
         return best_scorer_artifact.oos_scores
 
     def dump_evaluation_results(self) -> dict[str, dict[str, list[float]]]: