fix typing

voorhs · voorhs · commit 5f0c565eccd1 · 2025-02-17T11:16:43.000+03:00
diff --git a/autointent/metrics/__init__.py b/autointent/metrics/__init__.py
@@ -90,7 +90,7 @@
 
 PREDICTION_METRICS_MULTILABEL = PREDICTION_METRICS_MULTICLASS
 
-REGEXP_METRICS = _funcs_to_dict(regex_partial_accuracy, regex_partial_precision)
+REGEX_METRICS = _funcs_to_dict(regex_partial_accuracy, regex_partial_precision)
 
 METRIC_FN = DecisionMetricFn | RegexMetricFn | RetrievalMetricFn | ScoringMetricFn
 
diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py
@@ -202,7 +202,3 @@ def _get_task_specs(labels: ListOfGenericLabels) -> tuple[int, bool, bool]:
         multilabel = isinstance(in_domain_label, list)
         n_classes = len(in_domain_label) if multilabel else len(set(labels).difference([None]))  # type: ignore[arg-type]
         return n_classes, multilabel, contains_oos_samples
-
-    @abstractmethod
-    def get_train_data(self, context: Context) -> Any:  # noqa: ANN401
-        ...
diff --git a/autointent/modules/regex/_simple.py b/autointent/modules/regex/_simple.py
@@ -1,13 +1,13 @@
 """Module for regular expressions based intent detection."""
 
 import re
-from typing import Any, Literal, TypedDict
+from typing import Any, TypedDict
 
 from autointent import Context
 from autointent.context.data_handler._data_handler import RegexPatterns
 from autointent.context.optimization_info import Artifact
 from autointent.custom_types import LabelType
-from autointent.metrics import REGEXP_METRICS
+from autointent.metrics import REGEX_METRICS
 from autointent.modules.abc import RegexModule
 from autointent.schemas import Intent
 
@@ -33,23 +33,19 @@ def from_context(cls, context: Context) -> "Regex":
         """Initialize from context."""
         return cls()
 
-    def get_train_data(self, context: Context) -> list[Intent]:
-        return context.data_handler.dataset.intents
-
-    def fit(self, intents: list[dict[str, Any]]) -> None:
+    def fit(self, intents: list[Intent]) -> None:
         """
         Fit the model.
 
         :param intents: Intents to fit
         """
-        intents_parsed = [Intent(**dct) for dct in intents]
         self.regex_patterns = [
             RegexPatterns(
                 id=intent.id,
                 regex_full_match=intent.regex_full_match,
                 regex_partial_match=intent.regex_partial_match,
             )
-            for intent in intents_parsed
+            for intent in intents
         ]
         self._compile_regex_patterns()
 
@@ -109,24 +105,32 @@ def _predict_single(self, utterance: str) -> tuple[LabelType, dict[str, list[str
             matches["partial_matches"].extend(intent_matches["partial_matches"])
         return list(prediction), matches
 
-    def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]:
+    def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
+        self.fit(context.data_handler.dataset.intents)
+
+        val_utterances = context.data_handler.validation_utterances(0)
+        val_labels = context.data_handler.validation_labels(0)
+
+        pred_labels = self.predict(val_utterances)
+
+        chosen_metrics = {name: fn for name, fn in REGEX_METRICS.items() if name in metrics}
+        return self.score_metrics_ho((val_labels, pred_labels), chosen_metrics)
+
+    def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
         """
-        Calculate metric on test set and return metric value.
+        Evaluate the scorer on a test set and compute the specified metric.
 
-        :param context: Context to score
-        :param split: Split to score on
+        :param context: Context containing test set and other data.
+        :param split: Target split
         :return: Computed metrics value for the test set or error code of metrics
         """
-        # TODO add parameter to a whole pipeline (or just to regex module):
-        # whether or not to omit utterances on next stages if they were detected with regex module
-        assets = {
-            "test_matches": list(self.predict(context.data_handler.test_utterances())),
-        }
-        if assets["test_matches"] is None:
-            msg = "no matches found"
-            raise ValueError(msg)
-        chosen_metrics = {name: fn for name, fn in REGEXP_METRICS.items() if name in metrics}
-        return self.score_metrics((context.data_handler.test_labels(), assets["test_matches"]), chosen_metrics)
+        chosen_metrics = {name: fn for name, fn in REGEX_METRICS.items() if name in metrics}
+
+        metrics_calculated, _ = self.score_metrics_cv(
+            chosen_metrics, context.data_handler.validation_iterator()
+        )
+
+        return metrics_calculated
 
     def clear_cache(self) -> None:
         """Clear cache."""
diff --git a/autointent/nodes/info/_regex.py b/autointent/nodes/info/_regex.py
@@ -4,7 +4,7 @@
 from typing import ClassVar
 
 from autointent.custom_types import NodeType
-from autointent.metrics import REGEXP_METRICS
+from autointent.metrics import REGEX_METRICS
 from autointent.metrics.regex import RegexMetricFn
 from autointent.modules.abc import RegexModule
 from autointent.modules.regex import Regex
@@ -15,7 +15,7 @@
 class RegexNodeInfo(NodeInfo):
     """Regex node info."""
 
-    metrics_available: ClassVar[Mapping[str, RegexMetricFn]] = REGEXP_METRICS
+    metrics_available: ClassVar[Mapping[str, RegexMetricFn]] = REGEX_METRICS
 
     modules_available: ClassVar[Mapping[str, type[RegexModule]]] = {NodeType.regex: Regex}