deeppavlov
diff --git a/‎autointent/_pipeline/_pipeline.py‎
Lines changed: 7 additions & 1 deletion b/‎autointent/_pipeline/_pipeline.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎autointent/custom_types.py‎
Lines changed: 1 addition & 5 deletions b/‎autointent/custom_types.py‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎autointent/modules/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎autointent/modules/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎autointent/modules/decision/_threshold.py‎
Lines changed: 3 additions & 6 deletions b/‎autointent/modules/decision/_threshold.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎autointent/modules/embedding/_logreg.py‎
Lines changed: 5 additions & 5 deletions b/‎autointent/modules/embedding/_logreg.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎autointent/modules/regexp/_regexp.py‎
Lines changed: 2 additions & 0 deletions b/‎autointent/modules/regexp/_regexp.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎autointent/modules/scoring/_linear.py‎
Lines changed: 1 addition & 1 deletion b/‎autointent/modules/scoring/_linear.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎autointent/nodes/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎autointent/nodes/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎autointent/nodes/_nodes_info/_embedding.py‎
Lines changed: 9 additions & 3 deletions b/‎autointent/nodes/_nodes_info/_embedding.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎autointent/nodes/_optimization/_node_optimizer.py‎
Lines changed: 5 additions & 5 deletions b/‎autointent/nodes/_optimization/_node_optimizer.py‎
Lines changed: 5 additions & 5 deletions
@@ -13,6 +13,7 @@
 from autointent.custom_types import ListOfGenericLabels, NodeType
 from autointent.metrics import PREDICTION_METRICS_MULTILABEL
 from autointent.nodes import InferenceNode, NodeOptimizer
+from autointent.nodes.schemes import OptimizationConfig
 from autointent.utils import load_default_search_space, load_search_space
 
 from ._schemas import InferencePipelineOutput, InferencePipelineUtteranceOutput
@@ -72,10 +73,12 @@ def from_search_space(cls, search_space: list[dict[str, Any]] | Path | str, seed
         Create pipeline optimizer from dictionary search space.
 
         :param search_space: Dictionary config
+        :param seed: random seed
         """
         if isinstance(search_space, Path | str):
             search_space = load_search_space(search_space)
-        nodes = [NodeOptimizer(**node) for node in search_space]
+        validated_search_space = OptimizationConfig(search_space).model_dump()  # type: ignore[arg-type]
+        nodes = [NodeOptimizer(**node) for node in validated_search_space]
         return cls(nodes=nodes, seed=seed)
 
     @classmethod
@@ -84,6 +87,9 @@ def default_optimizer(cls, multilabel: bool, seed: int = 42) -> "Pipeline":
         Create pipeline optimizer with default search space for given classification task.
 
         :param multilabel: Whether the task multi-label, or single-label.
+        :param seed: random seed
+
+        :return: Pipeline
         """
         return cls.from_search_space(search_space=load_default_search_space(multilabel), seed=seed)
 
 
@@ -5,7 +5,7 @@
 """
 
 from enum import Enum
-from typing import Literal, TypeAlias, TypedDict
+from typing import Literal, TypeAlias
 
 
 class LogLevel(Enum):
@@ -46,10 +46,6 @@ class LogLevel(Enum):
 """
 
 
-class BaseMetadataDict(TypedDict):
-    """Base metadata dictionary for storing additional information."""
-
-
 class NodeType(str, Enum):
     """Enumeration of node types in the AutoIntent pipeline."""
 
 
@@ -24,7 +24,7 @@ def _create_modules_dict(modules: list[type[T]]) -> dict[str, type[T]]:
     [RetrievalAimedEmbedding, LogregAimedEmbedding]
 )
 
-RETRIEVAL_MODULES_MULTILABEL = RETRIEVAL_MODULES_MULTICLASS
+RETRIEVAL_MODULES_MULTILABEL: dict[str, type[EmbeddingModule]] = RETRIEVAL_MODULES_MULTICLASS
 
 SCORING_MODULES_MULTICLASS: dict[str, type[ScoringModule]] = _create_modules_dict(
     [
 
@@ -24,9 +24,6 @@ class ThresholdDecision(DecisionModule):
     ThresholdDecision uses a predefined threshold (or array of thresholds) to predict
     labels for single-label or multi-label classification tasks.
 
-    :ivar metadata_dict_name: Filename for saving metadata to disk.
-    :ivar multilabel: If True, the model supports multi-label classification.
-    :ivar n_classes: Number of classes in the dataset.
     :ivar tags: Tags for predictions (if any).
     :ivar name: Name of the predictor, defaults to "adaptive".
 
@@ -78,17 +75,17 @@ class ThresholdDecision(DecisionModule):
 
     def __init__(
         self,
-        thresh: float | npt.NDArray[Any],
+        thresh: float | list[float],
     ) -> None:
         """
         Initialize threshold predictor.
 
         :param thresh: Threshold for the scores, shape (n_classes,) or float
         """
-        self.thresh = thresh
+        self.thresh = thresh if isinstance(thresh, float) else np.array(thresh)
 
     @classmethod
-    def from_context(cls, context: Context, thresh: float | npt.NDArray[Any] = 0.5) -> "ThresholdDecision":
+    def from_context(cls, context: Context, thresh: float | list[float] = 0.5) -> "ThresholdDecision":
         """
         Initialize from context.
 
 
@@ -22,8 +22,8 @@ class LogregAimedEmbedding(EmbeddingModule):
     The main purpose of this module is to be used at embedding node for optimizing
     embedding configuration using its logreg classification quality as a sort of proxy metric.
 
-    :ivar classifier: The trained logistic regression model.
-    :ivar label_encoder: Label encoder for converting labels to numerical format.
+    :ivar _classifier: The trained logistic regression model.
+    :ivar _label_encoder: Label encoder for converting labels to numerical format.
     :ivar name: Name of the module, defaults to "logreg".
 
     Examples
@@ -42,7 +42,7 @@ class LogregAimedEmbedding(EmbeddingModule):
 
     _classifier: LogisticRegressionCV | MultiOutputClassifier
     _label_encoder: LabelEncoder | None
-    name = "logreg"
+    name = "logreg_embedding"
     supports_multiclass = True
     supports_multilabel = True
     supports_oos = False
@@ -62,8 +62,8 @@ def __init__(
         :param cv: the number of folds used in LogisticRegressionCV
         :param embedder_name: Name of the embedder used for creating embeddings.
         :param embedder_device: Device to run operations on, e.g., "cpu" or "cuda".
-        :param batch_size: Batch size for embedding generation.
-        :param max_length: Maximum sequence length for embeddings. None if not set.
+        :param embedder_batch_size: Batch size for embedding generation.
+        :param embedder_max_length: Maximum sequence length for embeddings. None if not set.
         :param embedder_use_cache: Flag indicating whether to cache intermediate embeddings.
         """
         self.embedder_name = embedder_name
 
@@ -26,6 +26,8 @@ class RegexPatternsCompiled(TypedDict):
 class RegExp(Module):
     """Regular expressions based intent detection module."""
 
+    name = "regexp"
+
     @classmethod
     def from_context(cls, context: Context) -> "RegExp":
         """Initialize from context."""
 
@@ -39,7 +39,7 @@ class LinearScorer(ScoringModule):
     .. testoutput::
 
         [[0.50000032 0.49999968]
-         [0.50000032 0.49999968]]
+         [0.44031667 0.55968333]]
 
     """
 
 
@@ -3,13 +3,15 @@
 from ._inference_node import InferenceNode
 from ._nodes_info import DecisionNodeInfo, EmbeddingNodeInfo, NodeInfo, RegExpNodeInfo, ScoringNodeInfo
 from ._optimization import NodeOptimizer
+from .schemes import OptimizationConfig
 
 __all__ = [
     "DecisionNodeInfo",
     "EmbeddingNodeInfo",
     "InferenceNode",
     "NodeInfo",
     "NodeOptimizer",
+    "OptimizationConfig",
     "RegExpNodeInfo",
     "ScoringNodeInfo",
 ]
@@ -7,7 +7,10 @@
 from autointent.metrics import (
     RETRIEVAL_METRICS_MULTICLASS,
     RETRIEVAL_METRICS_MULTILABEL,
+    SCORING_METRICS_MULTICLASS,
+    SCORING_METRICS_MULTILABEL,
     RetrievalMetricFn,
+    ScoringMetricFn,
 )
 from autointent.modules import RETRIEVAL_MODULES_MULTICLASS, RETRIEVAL_MODULES_MULTILABEL
 from autointent.modules.abc import Module
@@ -18,12 +21,15 @@
 class EmbeddingNodeInfo(NodeInfo):
     """Retrieval node info."""
 
-    metrics_available: ClassVar[Mapping[str, RetrievalMetricFn]] = (
-        RETRIEVAL_METRICS_MULTICLASS | RETRIEVAL_METRICS_MULTILABEL
+    metrics_available: ClassVar[Mapping[str, RetrievalMetricFn | ScoringMetricFn]] = (
+        RETRIEVAL_METRICS_MULTICLASS
+        | RETRIEVAL_METRICS_MULTILABEL
+        | SCORING_METRICS_MULTILABEL
+        | SCORING_METRICS_MULTICLASS
     )
 
     modules_available: ClassVar[Mapping[str, type[Module]]] = (
-        RETRIEVAL_MODULES_MULTICLASS | RETRIEVAL_MODULES_MULTILABEL  # type: ignore[has-type]
+        RETRIEVAL_MODULES_MULTICLASS | RETRIEVAL_MODULES_MULTILABEL
     )
 
     node_type = NodeType.embedding
@@ -35,11 +35,11 @@ def __init__(
         """
         self.node_type = node_type
         self.node_info = NODES_INFO[node_type]
-        self.decision_metric_name = target_metric
+        self.target_metric = target_metric
 
         self.metrics = metrics if metrics is not None else []
-        if self.decision_metric_name not in self.metrics:
-            self.metrics.append(self.decision_metric_name)
+        if self.target_metric not in self.metrics:
+            self.metrics.append(self.target_metric)
 
         self.modules_search_spaces = search_space  # TODO search space validation
         self._logger = logging.getLogger(__name__)  # TODO solve duplicate logging messages problem
@@ -73,7 +73,7 @@ def fit(self, context: Context) -> None:
 
                 self._logger.debug("scoring %s module...", module_name)
                 metrics_score = module.score(context, "validation", self.metrics)
-                metric_value = metrics_score[self.decision_metric_name]
+                metric_value = metrics_score[self.target_metric]
 
                 context.callback_handler.log_metrics(metrics_score)
                 context.callback_handler.end_module()
@@ -91,7 +91,7 @@ def fit(self, context: Context) -> None:
                     module_name,
                     module_kwargs,
                     metric_value,
-                    self.decision_metric_name,
+                    self.target_metric,
                     module.get_assets(),  # retriever name / scores / predictions
                     module_dump_dir,
                     module=module if not context.is_ram_to_clear() else None,
Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@ def _create_modules_dict(modules: list[type[T]]) -> dict[str, type[T]]:`
`24`	`24`	`[RetrievalAimedEmbedding, LogregAimedEmbedding]`
`25`	`25`	`)`
`26`	`26`
`27`		`-RETRIEVAL_MODULES_MULTILABEL = RETRIEVAL_MODULES_MULTICLASS`
	`27`	`+RETRIEVAL_MODULES_MULTILABEL: dict[str, type[EmbeddingModule]] = RETRIEVAL_MODULES_MULTICLASS`
`28`	`28`
`29`	`29`	`SCORING_MODULES_MULTICLASS: dict[str, type[ScoringModule]] = _create_modules_dict(`
`30`	`30`	`[`