Check if metric can handle dataset type (#224)

Samoed · web-flow · commit d61ee10f8215 · 2025-06-11T08:54:36.000+03:00
* add test for configuration

* lint

* satisfy mypy
diff --git a/autointent/nodes/_node_optimizer.py b/autointent/nodes/_node_optimizer.py
@@ -241,7 +241,7 @@ def get_module_dump_dir(self, context: Context, module_name: str, j_combination:
         dump_dir_.mkdir(parents=True, exist_ok=True)
         return str(dump_dir_)
 
-    def validate_nodes_with_dataset(self, dataset: Dataset, mode: SearchSpaceValidationMode) -> None:
+    def validate_nodes_with_dataset(self, dataset: Dataset, mode: SearchSpaceValidationMode) -> None:  # noqa: C901
         """Validates nodes against the dataset.
 
         Args:
@@ -254,12 +254,24 @@ def validate_nodes_with_dataset(self, dataset: Dataset, mode: SearchSpaceValidat
         is_multilabel = dataset.multilabel
 
         filtered_search_space = []
+        if is_multilabel and self.target_metric not in self.node_info.multilabel_available_metrics:
+            handle_message_on_mode(
+                mode, f"Target metric '{self.target_metric}' is not available for multilabel datasets.", True
+            )
+        elif not is_multilabel and self.target_metric not in self.node_info.multiclass_available_metrics:
+            handle_message_on_mode(
+                mode, f"Target metric '{self.target_metric}' is not available for multiclass datasets.", True
+            )
+
+        for metric in self.metrics:
+            if is_multilabel and metric not in self.node_info.multilabel_available_metrics:
+                handle_message_on_mode(mode, f"Metric '{metric}' is not available for multilabel datasets.", True)
+            elif not is_multilabel and metric not in self.node_info.multiclass_available_metrics:
+                handle_message_on_mode(mode, f"Metric '{metric}' is not available for multiclass datasets.", True)
 
         for search_space in deepcopy(self.modules_search_spaces):
             module_name = search_space["module_name"]
             module = self.node_info.modules_available[module_name]
-            # todo add check for oos
-
             messages = []
 
             if module_name == "description" and not dataset.has_descriptions:
@@ -273,11 +285,7 @@ def validate_nodes_with_dataset(self, dataset: Dataset, mode: SearchSpaceValidat
 
             if len(messages) > 0:
                 msg = "\n".join(messages)
-                if mode == "raise":
-                    self._logger.error(msg)
-                    raise ValueError(msg)
-                if mode == "warning":
-                    self._logger.warning(msg)
+                handle_message_on_mode(mode, msg)
             else:
                 filtered_search_space.append(search_space)
 
@@ -393,3 +401,26 @@ def load_or_create_study(
             finished_trials,
             remaining_trials,
         )
+
+
+def handle_message_on_mode(
+    mode: SearchSpaceValidationMode,
+    message: str,
+    strict: bool = False,
+) -> None:
+    """Handle messages based on the validation mode.
+
+    Args:
+        mode: The validation mode ("raise" or "warning").
+        message: The message to handle.
+        strict: If True always raises an error, even if mode is "warning".
+
+    Raises:
+        ValueError: If mode is "raise".
+    """
+    if mode == "raise":
+        raise ValueError(message)
+    if mode == "warning":
+        logger.warning(message)
+    if strict:
+        raise ValueError(message)
diff --git a/autointent/nodes/info/_base.py b/autointent/nodes/info/_base.py
@@ -17,3 +17,7 @@ class NodeInfo:
     """Available modules for the node."""
     node_type: NodeType
     """Node type."""
+    multiclass_available_metrics: ClassVar[Mapping[str, METRIC_FN]]
+    """Available metrics for multiclass classification."""
+    multilabel_available_metrics: ClassVar[Mapping[str, METRIC_FN]]
+    """Available metrics for multilabel classification."""
diff --git a/autointent/nodes/info/_decision.py b/autointent/nodes/info/_decision.py
@@ -19,3 +19,7 @@ class DecisionNodeInfo(NodeInfo):
     modules_available: ClassVar[dict[str, type[BaseDecision]]] = DECISION_MODULES
 
     node_type = NodeType.decision
+
+    multiclass_available_metrics = DECISION_METRICS
+
+    multilabel_available_metrics = DECISION_METRICS
diff --git a/autointent/nodes/info/_embedding.py b/autointent/nodes/info/_embedding.py
@@ -1,7 +1,7 @@
 """Retrieval node info."""
 
 from collections.abc import Mapping
-from typing import ClassVar
+from typing import ClassVar, cast
 
 from autointent.custom_types import NodeType
 from autointent.metrics import (
@@ -31,3 +31,11 @@ class EmbeddingNodeInfo(NodeInfo):
     modules_available: ClassVar[Mapping[str, type[BaseEmbedding]]] = EMBEDDING_MODULES
 
     node_type = NodeType.embedding
+
+    multiclass_available_metrics: ClassVar[Mapping[str, RetrievalMetricFn | ScoringMetricFn]] = cast(
+        Mapping[str, RetrievalMetricFn | ScoringMetricFn], RETRIEVAL_METRICS_MULTICLASS | SCORING_METRICS_MULTICLASS
+    )
+
+    multilabel_available_metrics: ClassVar[Mapping[str, RetrievalMetricFn | ScoringMetricFn]] = cast(
+        Mapping[str, RetrievalMetricFn | ScoringMetricFn], RETRIEVAL_METRICS_MULTILABEL | SCORING_METRICS_MULTILABEL
+    )
diff --git a/autointent/nodes/info/_regex.py b/autointent/nodes/info/_regex.py
@@ -20,3 +20,7 @@ class RegexNodeInfo(NodeInfo):
     modules_available: ClassVar[Mapping[str, type[BaseRegex]]] = REGEX_MODULES
 
     node_type = NodeType.regex
+
+    multiclass_available_metrics: ClassVar[Mapping[str, RegexMetricFn]] = REGEX_METRICS
+
+    multilabel_available_metrics: ClassVar[Mapping[str, RegexMetricFn]] = REGEX_METRICS
diff --git a/autointent/nodes/info/_scoring.py b/autointent/nodes/info/_scoring.py
@@ -19,3 +19,6 @@ class ScoringNodeInfo(NodeInfo):
     modules_available: ClassVar[Mapping[str, type[BaseScorer]]] = SCORING_MODULES
 
     node_type = NodeType.scoring
+
+    multiclass_available_metrics: ClassVar[Mapping[str, ScoringMetricFn]] = SCORING_METRICS_MULTICLASS
+    multilabel_available_metrics: ClassVar[Mapping[str, ScoringMetricFn]] = SCORING_METRICS_MULTILABEL
diff --git a/tests/assets/configs/multilabel.yaml b/tests/assets/configs/multilabel.yaml
@@ -7,7 +7,7 @@
         - model_name: sentence-transformers/all-MiniLM-L6-v2
         - model_name: avsolatorio/GIST-small-Embedding-v0
 - node_type: scoring
-  target_metric: scoring_roc_auc
+  target_metric: scoring_hit_rate
   search_space:
     - module_name: knn
       k: [5, 10]
diff --git a/tests/pipeline/test_optimization.py b/tests/pipeline/test_optimization.py
@@ -128,3 +128,19 @@ def test_dump_modules(dataset, task_type):
     context.dump()
 
     assert os.listdir(pipeline_optimizer.logging_config.dump_dir)
+
+
+@pytest.mark.parametrize(
+    "task_type",
+    ["multiclass", "multilabel"],
+)
+def test_optimization_validation_metric_names(dataset, task_type):
+    search_space = get_search_space(task_type)
+
+    pipeline_optimizer = Pipeline.from_search_space(search_space)
+
+    if task_type == "multiclass":
+        dataset = dataset.to_multilabel()
+
+    with pytest.raises(ValueError, match="Target metric .*"):
+        pipeline_optimizer.fit(dataset, incompatible_search_space="raise")