deeppavlov
diff --git a/‎autointent/_dataset/_dataset.py‎
Lines changed: 4 additions & 4 deletions b/‎autointent/_dataset/_dataset.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎autointent/_pipeline/_pipeline.py‎
Lines changed: 12 additions & 6 deletions b/‎autointent/_pipeline/_pipeline.py‎
Lines changed: 12 additions & 6 deletions
diff --git a/‎autointent/_pipeline/_schemas.py‎
Lines changed: 5 additions & 5 deletions b/‎autointent/_pipeline/_schemas.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎autointent/_ranker.py‎
Lines changed: 3 additions & 3 deletions b/‎autointent/_ranker.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎autointent/_vector_index.py‎
Lines changed: 6 additions & 6 deletions b/‎autointent/_vector_index.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎autointent/context/data_handler/_data_handler.py‎
Lines changed: 7 additions & 7 deletions b/‎autointent/context/data_handler/_data_handler.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎autointent/context/optimization_info/_data_models.py‎
Lines changed: 2 additions & 2 deletions b/‎autointent/context/optimization_info/_data_models.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎autointent/custom_types.py‎
Lines changed: 10 additions & 2 deletions b/‎autointent/custom_types.py‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎autointent/metrics/decision.py‎
Lines changed: 10 additions & 14 deletions b/‎autointent/metrics/decision.py‎
Lines changed: 10 additions & 14 deletions
diff --git a/‎autointent/metrics/scoring.py‎
Lines changed: 3 additions & 1 deletion b/‎autointent/metrics/scoring.py‎
Lines changed: 3 additions & 1 deletion
@@ -9,20 +9,20 @@
 from datasets import Dataset as HFDataset
 from datasets import Sequence, get_dataset_config_names, load_dataset
 
-from autointent.custom_types import LabelType, Split
+from autointent.custom_types import LabelWithOOS, Split
 from autointent.schemas import Intent, Tag
 
 
 class Sample(TypedDict):
     """
     Typed dictionary representing a dataset sample.
 
-    :param str utterance: The text of the utterance.
-    :param LabelType | None label: The label associated with the utterance, or None if out-of-scope.
+    :param utterance: The text of the utterance.
+    :param label: The label associated with the utterance, or None if out-of-scope.
     """
 
     utterance: str
-    label: LabelType | None
+    label: LabelWithOOS
 
 
 class Dataset(dict[str, HFDataset]):
 
@@ -3,20 +3,23 @@
 import json
 import logging
 from pathlib import Path
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 import numpy as np
 import yaml
 
 from autointent import Context, Dataset
 from autointent.configs import CrossEncoderConfig, EmbedderConfig, InferenceNodeConfig, LoggingConfig, VectorIndexConfig
-from autointent.custom_types import LabelType, NodeType
+from autointent.custom_types import ListOfGenericLabels, NodeType
 from autointent.metrics import PREDICTION_METRICS_MULTILABEL
 from autointent.nodes import InferenceNode, NodeOptimizer
 from autointent.utils import load_default_search_space, load_search_space
 
 from ._schemas import InferencePipelineOutput, InferencePipelineUtteranceOutput
 
+if TYPE_CHECKING:
+    from autointent.modules.abc import DecisionModule, ScoringModule
+
 
 class Pipeline:
     """Pipeline optimizer class."""
@@ -184,7 +187,7 @@ def load(cls, path: str | Path) -> "Pipeline":
             inference_dict_config = yaml.safe_load(file)
         return cls.from_dict_config(inference_dict_config["nodes_configs"])
 
-    def predict(self, utterances: list[str]) -> list[LabelType | None]:
+    def predict(self, utterances: list[str]) -> ListOfGenericLabels:
         """
         Predict the labels for the utterances.
 
@@ -195,8 +198,11 @@ def predict(self, utterances: list[str]) -> list[LabelType | None]:
             msg = "Pipeline in optimization mode cannot perform inference"
             raise RuntimeError(msg)
 
-        scores = self.nodes[NodeType.scoring].module.predict(utterances)  # type: ignore[union-attr]
-        return self.nodes[NodeType.decision].module.predict(scores)  # type: ignore[union-attr]
+        scoring_module: ScoringModule = self.nodes[NodeType.scoring].module  # type: ignore[assignment,union-attr]
+        decision_module: DecisionModule = self.nodes[NodeType.decision].module  # type: ignore[assignment,union-attr]
+
+        scores = scoring_module.predict(utterances)
+        return decision_module.predict(scores)
 
     def predict_with_metadata(self, utterances: list[str]) -> InferencePipelineOutput:
         """
@@ -210,7 +216,7 @@ def predict_with_metadata(self, utterances: list[str]) -> InferencePipelineOutpu
             raise RuntimeError(msg)
 
         scores, scores_metadata = self.nodes[NodeType.scoring].module.predict_with_metadata(utterances)  # type: ignore[union-attr]
-        predictions = self.nodes[NodeType.decision].module.predict(scores)  # type: ignore[union-attr]
+        predictions = self.nodes[NodeType.decision].module.predict(scores)  # type: ignore[union-attr,arg-type]
         regexp_predictions, regexp_predictions_metadata = None, None
         if NodeType.regexp in self.nodes:
             regexp_predictions, regexp_predictions_metadata = self.nodes[NodeType.regexp].module.predict_with_metadata(  # type: ignore[union-attr]
 
@@ -2,15 +2,15 @@
 
 from pydantic import BaseModel
 
-from autointent.custom_types import LabelType
+from autointent.custom_types import LabelWithOOS, ListOfLabels, ListOfLabelsWithOOS
 
 
 class InferencePipelineUtteranceOutput(BaseModel):
     """Output of the inference pipeline for a single utterance."""
 
     utterance: str
-    prediction: LabelType | None
-    regexp_prediction: LabelType | None
+    prediction: LabelWithOOS
+    regexp_prediction: LabelWithOOS
     regexp_prediction_metadata: Any
     score: list[float]
     score_metadata: Any
@@ -19,6 +19,6 @@ class InferencePipelineUtteranceOutput(BaseModel):
 class InferencePipelineOutput(BaseModel):
     """Output of the inference pipeline."""
 
-    predictions: list[LabelType | None]
-    regexp_predictions: list[LabelType] | None = None
+    predictions: ListOfLabelsWithOOS
+    regexp_predictions: ListOfLabels | None = None
     utterances: list[InferencePipelineUtteranceOutput] | None = None
@@ -18,7 +18,7 @@
 from sklearn.linear_model import LogisticRegressionCV
 from torch import nn
 
-from autointent.custom_types import LabelType
+from autointent.custom_types import ListOfLabels
 
 logger = logging.getLogger(__name__)
 
@@ -158,7 +158,7 @@ def _get_features_or_predictions(self, pairs: list[tuple[str, str]]) -> npt.NDAr
         self._activations_list.clear()
         return res  # type: ignore[no-any-return]
 
-    def _fit(self, pairs: list[tuple[str, str]], labels: list[LabelType]) -> None:
+    def _fit(self, pairs: list[tuple[str, str]], labels: ListOfLabels) -> None:
         """
         Train the logistic regression model on cross-encoder features.
 
@@ -181,7 +181,7 @@ def _fit(self, pairs: list[tuple[str, str]], labels: list[LabelType]) -> None:
 
         self._clf = clf
 
-    def fit(self, utterances: list[str], labels: list[LabelType]) -> None:
+    def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
         """
         Construct training samples and train the logistic regression classifier.
 
 
@@ -15,7 +15,7 @@
 import numpy.typing as npt
 
 from autointent import Embedder
-from autointent.custom_types import LabelType
+from autointent.custom_types import ListOfLabels
 
 
 class VectorIndexMetadata(TypedDict):
@@ -28,7 +28,7 @@ class VectorIndexMetadata(TypedDict):
 
 class VectorIndexData(TypedDict):
     texts: list[str]
-    labels: list[LabelType]
+    labels: ListOfLabels
 
 
 class VectorIndex:
@@ -68,12 +68,12 @@ def __init__(
         )
         self.embedder_device = embedder_device
 
-        self.labels: list[LabelType] = []  # (n_samples,) or (n_samples, n_classes)
+        self.labels: ListOfLabels = []  # (n_samples,) or (n_samples, n_classes)
         self.texts: list[str] = []
 
         self.logger = logging.getLogger(__name__)
 
-    def add(self, texts: list[str], labels: list[LabelType]) -> None:
+    def add(self, texts: list[str], labels: ListOfLabels) -> None:
         """
         Add texts and their corresponding labels to the index.
 
@@ -160,7 +160,7 @@ def get_all_embeddings(self) -> npt.NDArray[Any]:
             raise ValueError(msg)
         return self.index.reconstruct_n(0, self.index.ntotal)  # type: ignore[no-any-return]
 
-    def get_all_labels(self) -> list[LabelType]:
+    def get_all_labels(self) -> ListOfLabels:
         """
         Retrieve all labels stored in the index.
 
@@ -172,7 +172,7 @@ def query(
         self,
         queries: list[str] | npt.NDArray[np.float32],
         k: int,
-    ) -> tuple[list[list[LabelType]], list[list[float]], list[list[str]]]:
+    ) -> tuple[list[ListOfLabels], list[list[float]], list[list[str]]]:
         """
         Query the index to retrieve nearest neighbors.
 
 
@@ -8,7 +8,7 @@
 from transformers import set_seed
 
 from autointent import Dataset
-from autointent.custom_types import LabelType, Split
+from autointent.custom_types import ListOfGenericLabels, Split
 
 from ._stratification import split_dataset
 
@@ -83,7 +83,7 @@ def train_utterances(self, idx: int | None = None) -> list[str]:
         split = f"{Split.TRAIN}_{idx}" if idx is not None else Split.TRAIN
         return cast(list[str], self.dataset[split][self.dataset.utterance_feature])
 
-    def train_labels(self, idx: int | None = None) -> list[LabelType]:
+    def train_labels(self, idx: int | None = None) -> ListOfGenericLabels:
         """
         Retrieve training labels from the dataset.
 
@@ -95,7 +95,7 @@ def train_labels(self, idx: int | None = None) -> list[LabelType]:
         :return: List of training labels.
         """
         split = f"{Split.TRAIN}_{idx}" if idx is not None else Split.TRAIN
-        return cast(list[LabelType], self.dataset[split][self.dataset.label_feature])
+        return cast(ListOfGenericLabels, self.dataset[split][self.dataset.label_feature])
 
     def validation_utterances(self, idx: int | None = None) -> list[str]:
         """
@@ -111,7 +111,7 @@ def validation_utterances(self, idx: int | None = None) -> list[str]:
         split = f"{Split.VALIDATION}_{idx}" if idx is not None else Split.VALIDATION
         return cast(list[str], self.dataset[split][self.dataset.utterance_feature])
 
-    def validation_labels(self, idx: int | None = None) -> list[LabelType]:
+    def validation_labels(self, idx: int | None = None) -> ListOfGenericLabels:
         """
         Retrieve validation labels from the dataset.
 
@@ -123,7 +123,7 @@ def validation_labels(self, idx: int | None = None) -> list[LabelType]:
         :return: List of validation labels.
         """
         split = f"{Split.VALIDATION}_{idx}" if idx is not None else Split.VALIDATION
-        return cast(list[LabelType], self.dataset[split][self.dataset.label_feature])
+        return cast(ListOfGenericLabels, self.dataset[split][self.dataset.label_feature])
 
     def test_utterances(self, idx: int | None = None) -> list[str]:
         """
@@ -139,7 +139,7 @@ def test_utterances(self, idx: int | None = None) -> list[str]:
         split = f"{Split.TEST}_{idx}" if idx is not None else Split.TEST
         return cast(list[str], self.dataset[split][self.dataset.utterance_feature])
 
-    def test_labels(self, idx: int | None = None) -> list[LabelType]:
+    def test_labels(self, idx: int | None = None) -> ListOfGenericLabels:
         """
         Retrieve test labels from the dataset.
 
@@ -151,7 +151,7 @@ def test_labels(self, idx: int | None = None) -> list[LabelType]:
         :return: List of test labels.
         """
         split = f"{Split.TEST}_{idx}" if idx is not None else Split.TEST
-        return cast(list[LabelType], self.dataset[split][self.dataset.label_feature])
+        return cast(ListOfGenericLabels, self.dataset[split][self.dataset.label_feature])
 
     def dump(self, filepath: str | Path) -> None:
         """
 
@@ -10,7 +10,7 @@
 from numpy.typing import NDArray
 from pydantic import BaseModel, ConfigDict, Field
 
-from autointent.custom_types import LabelType, NodeType
+from autointent.custom_types import ListOfLabelsWithOOS, NodeType
 
 
 class Artifact(BaseModel):
@@ -53,7 +53,7 @@ class DecisionArtifact(Artifact):
     """
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
-    labels: list[LabelType | None]
+    labels: ListOfLabelsWithOOS
 
 
 def validate_node_name(value: str) -> str:
 
@@ -5,7 +5,7 @@
 """
 
 from enum import Enum
-from typing import Literal, TypedDict
+from typing import Literal, TypeAlias, TypedDict
 
 
 class LogLevel(Enum):
@@ -29,7 +29,15 @@ class LogLevel(Enum):
 """
 
 # Type alias for label representation
-LabelType = int | list[int]
+SimpleLabel = int
+MultiLabel = list[int]
+SimpleLabelWithOOS = SimpleLabel | None
+MultiLabelWithOOS = MultiLabel | None
+ListOfLabels = list[SimpleLabel] | list[MultiLabel]
+ListOfLabelsWithOOS = list[SimpleLabelWithOOS] | list[MultiLabelWithOOS]
+LabelType: TypeAlias = SimpleLabel | MultiLabel
+LabelWithOOS = LabelType | None
+ListOfGenericLabels = ListOfLabels | ListOfLabelsWithOOS
 """
 Type alias for label representation
 
 
@@ -8,18 +8,17 @@
 import numpy.typing as npt
 from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score
 
-from autointent.custom_types import LabelType
+from autointent.custom_types import ListOfGenericLabels, ListOfLabels
 
 from ._converter import transform
-from .custom_types import LABELS_VALUE_TYPE
 
 logger = logging.getLogger(__name__)
 
 
 class DecisionMetricFn(Protocol):
     """Protocol for decision metrics."""
 
-    def __call__(self, y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> float:
+    def __call__(self, y_true: ListOfGenericLabels, y_pred: ListOfGenericLabels) -> float:
         """
         Calculate decision metric.
 
@@ -32,17 +31,14 @@ def __call__(self, y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> floa
         ...
 
 
-def handle_oos(
-    y_true: list[LabelType | None], y_pred: list[LabelType | None]
-) -> tuple[list[LabelType], list[LabelType]]:
+def handle_oos(y_true: ListOfGenericLabels, y_pred: ListOfGenericLabels) -> tuple[ListOfLabels, ListOfLabels]:
     """Convert labels of OOS samples to make them usable in decision metrics."""
     in_domain_labels = list(filter(lambda lab: lab is not None, y_true))
-    multilabel = isinstance(in_domain_labels[0], list)
-    if multilabel:
+    if isinstance(in_domain_labels[0], list):
         func = _add_oos_multilabel
         n_classes = len(in_domain_labels[0])
     else:
-        func = _add_oos_multiclass
+        func = _add_oos_multiclass  # type: ignore[assignment]
         n_classes = len(set(in_domain_labels))
     func = partial(func, n_classes=n_classes)
     return list(map(func, y_true)), list(map(func, y_pred))
@@ -60,7 +56,7 @@ def _add_oos_multilabel(label: list[int] | None, n_classes: int) -> list[int]:
     return [*label, 1]
 
 
-def decision_accuracy(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> float:
+def decision_accuracy(y_true: ListOfGenericLabels, y_pred: ListOfGenericLabels) -> float:
     r"""
     Calculate decision accuracy. Supports both multiclass and multilabel.
 
@@ -131,7 +127,7 @@ def _decision_roc_auc_multilabel(y_true: npt.NDArray[Any], y_pred: npt.NDArray[A
     return float(roc_auc_score(y_true, y_pred, average="macro"))
 
 
-def decision_roc_auc(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> float:
+def decision_roc_auc(y_true: ListOfGenericLabels, y_pred: ListOfGenericLabels) -> float:
     r"""
     Calculate ROC AUC for multiclass and multilabel classification.
 
@@ -153,7 +149,7 @@ def decision_roc_auc(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> fl
     raise ValueError(msg)
 
 
-def decision_precision(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> float:
+def decision_precision(y_true: ListOfGenericLabels, y_pred: ListOfGenericLabels) -> float:
     r"""
     Calculate decision precision. Supports both multiclass and multilabel.
 
@@ -168,7 +164,7 @@ def decision_precision(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) ->
     return float(precision_score(*handle_oos(y_true, y_pred), average="macro"))
 
 
-def decision_recall(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> float:
+def decision_recall(y_true: ListOfGenericLabels, y_pred: ListOfGenericLabels) -> float:
     r"""
     Calculate decision recall. Supports both multiclass and multilabel.
 
@@ -183,7 +179,7 @@ def decision_recall(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> flo
     return float(recall_score(*handle_oos(y_true, y_pred), average="macro"))
 
 
-def decision_f1(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> float:
+def decision_f1(y_true: ListOfGenericLabels, y_pred: ListOfGenericLabels) -> float:
     r"""
     Calculate decision f1 score. Supports both multiclass and multilabel.
 
 
@@ -100,7 +100,9 @@ def scoring_roc_auc(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> flo
     return float(roc_auc_score(labels_, scores_, average="macro"))
 
 
-def _calculate_decision_metric(func: DecisionMetricFn, labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float:
+def _calculate_decision_metric(
+    func: DecisionMetricFn, labels: list[int] | list[list[int]], scores: SCORES_VALUE_TYPE
+) -> float:
     r"""
     Calculate decision metric.