deeppavlov
diff --git a/‎autointent/_datafiles/default-multiclass-config.yaml‎
Lines changed: 4 additions & 4 deletions b/‎autointent/_datafiles/default-multiclass-config.yaml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎autointent/_datafiles/default-multilabel-config.yaml‎
Lines changed: 4 additions & 4 deletions b/‎autointent/_datafiles/default-multilabel-config.yaml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎autointent/modules/abc/_base.py‎
Lines changed: 2 additions & 6 deletions b/‎autointent/modules/abc/_base.py‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎autointent/modules/abc/_decision.py‎
Lines changed: 3 additions & 6 deletions b/‎autointent/modules/abc/_decision.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎autointent/modules/abc/_scoring.py‎
Lines changed: 3 additions & 6 deletions b/‎autointent/modules/abc/_scoring.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎autointent/modules/embedding/_logreg.py‎
Lines changed: 3 additions & 6 deletions b/‎autointent/modules/embedding/_logreg.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎autointent/modules/embedding/_retrieval.py‎
Lines changed: 3 additions & 6 deletions b/‎autointent/modules/embedding/_retrieval.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎autointent/modules/regexp/_regexp.py‎
Lines changed: 3 additions & 6 deletions b/‎autointent/modules/regexp/_regexp.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎autointent/nodes/_optimization/_node_optimizer.py‎
Lines changed: 17 additions & 10 deletions b/‎autointent/nodes/_optimization/_node_optimizer.py‎
Lines changed: 17 additions & 10 deletions
diff --git a/‎tests/assets/configs/description.yaml‎
Lines changed: 3 additions & 3 deletions b/‎tests/assets/configs/description.yaml‎
Lines changed: 3 additions & 3 deletions
@@ -1,14 +1,14 @@
 # TODO: make up a better and more versatile config
 - node_type: embedding
-  metric: retrieval_hit_rate
+  target_metric: retrieval_hit_rate
   search_space:
     - module_name: retrieval
       k: [10]
       embedder_name:
         - avsolatorio/GIST-small-Embedding-v0
         - infgrad/stella-base-en-v2
 - node_type: scoring
-  metric: scoring_roc_auc
+  target_metric: scoring_roc_auc
   search_space:
     - module_name: knn
       k: [1, 3, 5, 10]
@@ -20,8 +20,8 @@
         - cross-encoder/ms-marco-MiniLM-L-6-v2
       k: [1, 3, 5, 10]
 - node_type: decision
-  metric: decision_accuracy
+  target_metric: decision_accuracy
   search_space:
     - module_name: threshold
       thresh: [0.5]
-    - module_name: argmax
+    - module_name: argmax
@@ -1,21 +1,21 @@
 # TODO: make up a better and more versatile config
 - node_type: embedding
-  metric: retrieval_hit_rate_intersecting
+  target_metric: retrieval_hit_rate_intersecting
   search_space:
     - module_name: retrieval
       k: [10]
       embedder_name:
         - deepvk/USER-bge-m3
 - node_type: scoring
-  metric: scoring_roc_auc
+  target_metric: scoring_roc_auc
   search_space:
     - module_name: knn
       k: [3]
       weights: ["uniform", "distance", "closest"]
     - module_name: linear
 - node_type: decision
-  metric: decision_accuracy
+  target_metric: decision_accuracy
   search_space:
     - module_name: threshold
       thresh: [0.5]
-    - module_name: adaptive
+    - module_name: adaptive
@@ -34,11 +34,7 @@ def fit(self, *args: tuple[Any], **kwargs: dict[str, Any]) -> None:
         """
 
     @abstractmethod
-    def score(
-        self,
-        context: Context,
-        split: Literal["validation", "test"],
-    ) -> dict[str, float | str]:
+    def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]:
         """
         Calculate metric on test set and return metric value.
 
@@ -110,7 +106,7 @@ def get_embedder_name(self) -> str | None:
         return None
 
     @staticmethod
-    def score_metrics(params: tuple[Any, Any], metrics_dict: dict[str, Any]) -> dict[str, float | str]:
+    def score_metrics(params: tuple[Any, Any], metrics_dict: dict[str, Any]) -> dict[str, float]:
         """
         Score metrics on the test set.
 
 
@@ -40,11 +40,7 @@ def predict(self, scores: npt.NDArray[Any]) -> ListOfGenericLabels:
         :param scores: Scores to predict
         """
 
-    def score(
-        self,
-        context: Context,
-        split: Literal["validation", "test"],
-    ) -> dict[str, float | str]:
+    def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]:
         """
         Calculate metric on test set and return metric value.
 
@@ -54,7 +50,8 @@ def score(
         """
         labels, scores = get_decision_evaluation_data(context, split)
         self._decisions = self.predict(scores)
-        return self.score_metrics((labels, self._decisions), PREDICTION_METRICS_MULTICLASS)
+        chosen_metrics = {name: fn for name, fn in PREDICTION_METRICS_MULTICLASS.items() if name in metrics}
+        return self.score_metrics((labels, self._decisions), chosen_metrics)
 
     def get_assets(self) -> DecisionArtifact:
         """Return useful assets that represent intermediate data into context."""
 
@@ -21,11 +21,7 @@ class ScoringModule(Module, ABC):
 
     supports_oos = False
 
-    def score(
-        self,
-        context: Context,
-        split: Literal["validation", "test"],
-    ) -> dict[str, float | str]:
+    def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]:
         """
         Evaluate the scorer on a test set and compute the specified metric.
 
@@ -50,7 +46,8 @@ def score(
         self._test_scores = self.predict(context.data_handler.test_utterances())
 
         metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS
-        return self.score_metrics((labels, scores), metrics_dict)
+        chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
+        return self.score_metrics((labels, scores), chosen_metrics)
 
     def get_assets(self) -> ScorerArtifact:
         """
 
@@ -129,11 +129,7 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
 
         self._classifier.fit(embeddings, labels)
 
-    def score(
-        self,
-        context: Context,
-        split: Literal["validation", "test"],
-    ) -> dict[str, float | str]:
+    def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]:
         """
         Evaluate the embedding model using a specified metric function.
 
@@ -153,7 +149,8 @@ def score(
 
         probas = self.predict(utterances)
         metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS
-        return self.score_metrics((labels, probas), metrics_dict)
+        chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
+        return self.score_metrics((labels, probas), chosen_metrics)
 
     def get_assets(self) -> RetrieverArtifact:
         """
 
@@ -109,11 +109,7 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
         )
         self._vector_index.add(utterances, labels)
 
-    def score(
-        self,
-        context: Context,
-        split: Literal["validation", "test"],
-    ) -> dict[str, float | str]:
+    def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]:
         """
         Evaluate the embedding model using a specified metric function.
 
@@ -133,7 +129,8 @@ def score(
         predictions, _, _ = self._vector_index.query(utterances, self.k)
 
         metrics_dict = RETRIEVAL_METRICS_MULTILABEL if context.is_multilabel() else RETRIEVAL_METRICS_MULTICLASS
-        return self.score_metrics((labels, predictions), metrics_dict)
+        chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
+        return self.score_metrics((labels, predictions), chosen_metrics)
 
     def get_assets(self) -> RetrieverArtifact:
         """
 
@@ -108,11 +108,7 @@ def _predict_single(self, utterance: str) -> tuple[LabelType, dict[str, list[str
             matches["partial_matches"].extend(intent_matches["partial_matches"])
         return list(prediction), matches
 
-    def score(
-        self,
-        context: Context,
-        split: Literal["validation", "test"],
-    ) -> dict[str, float | str]:
+    def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]:
         """
         Calculate metric on test set and return metric value.
 
@@ -128,7 +124,8 @@ def score(
         if assets["test_matches"] is None:
             msg = "no matches found"
             raise ValueError(msg)
-        return self.score_metrics((context.data_handler.test_labels(), assets["test_matches"]), REGEXP_METRICS)
+        chosen_metrics = {name: fn for name, fn in REGEXP_METRICS.items() if name in metrics}
+        return self.score_metrics((context.data_handler.test_labels(), assets["test_matches"]), chosen_metrics)
 
     def clear_cache(self) -> None:
         """Clear cache."""
 
@@ -19,7 +19,13 @@
 class NodeOptimizer:
     """Node optimizer class."""
 
-    def __init__(self, node_type: NodeType, search_space: list[dict[str, Any]], metric: str) -> None:
+    def __init__(
+        self,
+        node_type: NodeType,
+        search_space: list[dict[str, Any]],
+        target_metric: str,
+        metrics: list[str] | None = None,
+    ) -> None:
         """
         Initialize the node optimizer.
 
@@ -29,7 +35,12 @@ def __init__(self, node_type: NodeType, search_space: list[dict[str, Any]], metr
         """
         self.node_type = node_type
         self.node_info = NODES_INFO[node_type]
-        self.metric_name = metric
+        self.decision_metric_name = target_metric
+
+        self.metrics = metrics if metrics is not None else []
+        if self.decision_metric_name not in self.metrics:
+            self.metrics.append(self.decision_metric_name)
+
         self.modules_search_spaces = search_space  # TODO search space validation
         self._logger = logging.getLogger(__name__)  # TODO solve duplicate logging messages problem
 
@@ -61,14 +72,10 @@ def fit(self, context: Context) -> None:
                 self.module_fit(module, context)
 
                 self._logger.debug("scoring %s module...", module_name)
-                metrics = module.score(context, "validation")
-                metric_value = metrics[self.metric_name]
-
-                # some metrics can produce error. When main metric produces error raise it.
-                if isinstance(metric_value, str):
-                    raise Exception(metric_value)  # noqa: TRY004, TRY002
+                metrics_score = module.score(context, "validation", self.metrics)
+                metric_value = metrics_score[self.decision_metric_name]
 
-                context.callback_handler.log_metrics(metrics)
+                context.callback_handler.log_metrics(metrics_score)
                 context.callback_handler.end_module()
 
                 dump_dir = context.get_dump_dir()
@@ -84,7 +91,7 @@ def fit(self, context: Context) -> None:
                     module_name,
                     module_kwargs,
                     metric_value,
-                    self.metric_name,
+                    self.decision_metric_name,
                     module.get_assets(),  # retriever name / scores / predictions
                     module_dump_dir,
                     module=module if not context.is_ram_to_clear() else None,
 
@@ -1,16 +1,16 @@
 - node_type: embedding
-  metric: retrieval_hit_rate
+  target_metric: retrieval_hit_rate
   search_space:
     - module_name: retrieval
       k: [10]
       embedder_name:
         - sentence-transformers/all-MiniLM-L6-v2
 - node_type: scoring
-  metric: scoring_roc_auc
+  target_metric: scoring_roc_auc
   search_space:
     - module_name: description
       temperature: [1.0, 0.5, 0.1, 0.05]
 - node_type: decision
-  metric: decision_accuracy
+  target_metric: decision_accuracy
   search_space:
     - module_name: argmax