update multilabel handling (#229)

Samoed · web-flow · commit 43b671c4e8a6 · 2025-06-22T18:31:39.000+03:00
* update multilabel handling

* add autoconvert to multilabel when read

* remove enum from split

* remove useless conversions

* format
diff --git a/autointent/_dataset/_dataset.py b/autointent/_dataset/_dataset.py
@@ -94,19 +94,22 @@ def from_json(cls, filepath: str | Path) -> "Dataset":
         return JsonReader().read(filepath)
 
     @classmethod
-    def from_hub(cls, repo_name: str, intent_subset_name: str = Split.INTENTS) -> "Dataset":
+    def from_hub(
+        cls, repo_name: str, data_split: str = "default", intent_subset_name: str = Split.INTENTS
+    ) -> "Dataset":
         """Loads a dataset from the Hugging Face Hub.
 
         Args:
             repo_name: The name of the Hugging Face repository, like `DeepPavlov/clinc150`.
+            data_split: The name of the dataset split to load, defaults to `default`.
             intent_subset_name: The name of the intent subset to load, defaults to `intents`.
         """
         from ._reader import DictReader
 
-        splits = load_dataset(repo_name, "default")
+        splits = load_dataset(repo_name, data_split)
         mapping = dict(**splits)
         if intent_subset_name in get_dataset_config_names(repo_name):
-            mapping[Split.INTENTS] = load_dataset(repo_name, intent_subset_name, split=Split.INTENTS).to_list()
+            mapping[Split.INTENTS] = load_dataset(repo_name, name=intent_subset_name, split=Split.INTENTS).to_list()
 
         return DictReader().read(mapping)
 
diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py
@@ -25,7 +25,7 @@
     SearchSpacePreset,
     SearchSpaceValidationMode,
 )
-from autointent.metrics import DECISION_METRICS
+from autointent.metrics import DECISION_METRICS, DICISION_METRICS_MULTILABEL
 from autointent.nodes import InferenceNode, NodeOptimizer
 from autointent.utils import load_preset, load_search_space
 
@@ -247,7 +247,8 @@ def fit(
 
         if test_utterances is not None:
             predictions = self.predict(test_utterances)
-            for metric_name, metric in DECISION_METRICS.items():
+            metrics = DICISION_METRICS_MULTILABEL if context.data_handler.multilabel else DECISION_METRICS
+            for metric_name, metric in metrics.items():
                 context.optimization_info.pipeline_metrics[metric_name] = metric(
                     context.data_handler.test_labels(),
                     predictions,
diff --git a/autointent/metrics/__init__.py b/autointent/metrics/__init__.py
@@ -73,7 +73,14 @@
     scoring_roc_auc,
 )
 
-SCORING_METRICS_MULTILABEL: dict[str, ScoringMetricFn] = SCORING_METRICS_MULTICLASS | _funcs_to_dict(
+SCORING_METRICS_MULTILABEL: dict[str, ScoringMetricFn] = _funcs_to_dict(
+    # multiclass except for scoring_roc_auc
+    scoring_accuracy,
+    scoring_f1,
+    scoring_log_likelihood,
+    scoring_precision,
+    scoring_recall,
+    # multilabel
     scoring_hit_rate,
     scoring_map,
     scoring_neg_coverage,
@@ -88,6 +95,13 @@
     decision_roc_auc,
 )
 
+DICISION_METRICS_MULTILABEL: dict[str, DecisionMetricFn] = _funcs_to_dict(
+    decision_accuracy,
+    decision_f1,
+    decision_precision,
+    decision_recall,
+)
+
 REGEX_METRICS = _funcs_to_dict(regex_partial_accuracy, regex_partial_precision)
 
 METRIC_FN = DecisionMetricFn | RegexMetricFn | RetrievalMetricFn | ScoringMetricFn
diff --git a/autointent/metrics/decision.py b/autointent/metrics/decision.py
@@ -180,6 +180,7 @@ def decision_roc_auc(y_true: ListOfGenericLabels, y_pred: ListOfGenericLabels) -
     if y_pred_.ndim == y_true_.ndim == 1:
         return _decision_roc_auc_multiclass(y_true_, y_pred_)
     if y_pred_.ndim == y_true_.ndim == 2:  # noqa: PLR2004
+        # not working with 1 class in y_true
         return _decision_roc_auc_multilabel(y_true_, y_pred_)
     msg = "Something went wrong with labels dimensions"
     logger.error(msg)
diff --git a/autointent/nodes/_node_optimizer.py b/autointent/nodes/_node_optimizer.py
@@ -237,18 +237,34 @@ def validate_nodes_with_dataset(self, dataset: Dataset, mode: SearchSpaceValidat
         filtered_search_space = []
         if is_multilabel and self.target_metric not in self.node_info.multilabel_available_metrics:
             handle_message_on_mode(
-                mode, f"Target metric '{self.target_metric}' is not available for multilabel datasets.", True
+                mode,
+                f"Target metric '{self.target_metric}' is not available for multilabel datasets. "
+                f"Available metrics: {list(self.node_info.multilabel_available_metrics.keys())}",
+                True,
             )
         elif not is_multilabel and self.target_metric not in self.node_info.multiclass_available_metrics:
             handle_message_on_mode(
-                mode, f"Target metric '{self.target_metric}' is not available for multiclass datasets.", True
+                mode,
+                f"Target metric '{self.target_metric}' is not available for multiclass datasets. "
+                f"Available metrics: {list(self.node_info.multiclass_available_metrics.keys())}",
+                True,
             )
 
         for metric in self.metrics:
             if is_multilabel and metric not in self.node_info.multilabel_available_metrics:
-                handle_message_on_mode(mode, f"Metric '{metric}' is not available for multilabel datasets.", True)
+                handle_message_on_mode(
+                    mode,
+                    f"Metric '{metric}' is not available for multilabel datasets. "
+                    f"Available metrics: {list(self.node_info.multilabel_available_metrics.keys())}",
+                    True,
+                )
             elif not is_multilabel and metric not in self.node_info.multiclass_available_metrics:
-                handle_message_on_mode(mode, f"Metric '{metric}' is not available for multiclass datasets.", True)
+                handle_message_on_mode(
+                    mode,
+                    f"Metric '{metric}' is not available for multiclass datasets. "
+                    f"Available metrics: {list(self.node_info.multiclass_available_metrics.keys())}",
+                    True,
+                )
 
         for search_space in deepcopy(self.modules_search_spaces):
             module_name = search_space["module_name"]
diff --git a/autointent/nodes/info/_decision.py b/autointent/nodes/info/_decision.py
@@ -4,7 +4,7 @@
 from typing import ClassVar
 
 from autointent.custom_types import NodeType
-from autointent.metrics import DECISION_METRICS, DecisionMetricFn
+from autointent.metrics import DECISION_METRICS, DICISION_METRICS_MULTILABEL, DecisionMetricFn
 from autointent.modules import DECISION_MODULES
 from autointent.modules.base import BaseDecision
 
@@ -22,4 +22,4 @@ class DecisionNodeInfo(NodeInfo):
 
     multiclass_available_metrics = DECISION_METRICS
 
-    multilabel_available_metrics = DECISION_METRICS
+    multilabel_available_metrics = DICISION_METRICS_MULTILABEL