deeppavlov · voorhs · Apr 4, 2025 · Mar 30, 2025 · Mar 30, 2025 · Mar 30, 2025
diff --git a/autointent/modules/__init__.py b/autointent/modules/__init__.py
@@ -54,4 +54,25 @@ def _create_modules_dict(modules: list[type[T]]) -> dict[str, type[T]]:
 )
 
 
-__all__ = []  # type: ignore[var-annotated]
+__all__ = [
+    "AdaptiveDecision",
+    "ArgmaxDecision",
+    "BaseDecision",
+    "BaseEmbedding",
+    "BaseModule",
+    "BaseRegex",
+    "BaseScorer",
+    "DNNCScorer",
+    "DescriptionScorer",
+    "JinoosDecision",
+    "KNNScorer",
+    "LinearScorer",
+    "LogregAimedEmbedding",
+    "MLKnnScorer",
+    "RerankScorer",
+    "RetrievalAimedEmbedding",
+    "SimpleRegex",
+    "SklearnScorer",
+    "ThresholdDecision",
+    "TunableDecision",
+]
diff --git a/autointent/modules/scoring/_bert.py b/autointent/modules/scoring/_bert.py
@@ -31,14 +31,14 @@ class BertScorer(BaseScorer):
 
     def __init__(
         self,
-        model_config: HFModelConfig | str | dict[str, Any] | None = None,
+        classification_model_config: HFModelConfig | str | dict[str, Any] | None = None,
         num_train_epochs: int = 3,
         batch_size: int = 8,
         learning_rate: float = 5e-5,
         seed: int = 0,
         report_to: REPORTERS_NAMES | None = None,  # type: ignore  # noqa: PGH003
     ) -> None:
-        self.model_config = HFModelConfig.from_search_config(model_config)
+        self.classification_model_config = HFModelConfig.from_search_config(classification_model_config)
         self.num_train_epochs = num_train_epochs
         self.batch_size = batch_size
         self.learning_rate = learning_rate
@@ -49,19 +49,19 @@ def __init__(
     def from_context(
         cls,
         context: Context,
-        model_config: HFModelConfig | str | dict[str, Any] | None = None,
+        classification_model_config: HFModelConfig | str | dict[str, Any] | None = None,
         num_train_epochs: int = 3,
         batch_size: int = 8,
         learning_rate: float = 5e-5,
         seed: int = 0,
     ) -> "BertScorer":
-        if model_config is None:
-            model_config = context.resolve_embedder()
+        if classification_model_config is None:
+            classification_model_config = context.resolve_embedder()
 
         report_to = context.logging_config.report_to
 
         return cls(
-            model_config=model_config,
+            classification_model_config=classification_model_config,
             num_train_epochs=num_train_epochs,
             batch_size=batch_size,
             learning_rate=learning_rate,
@@ -70,7 +70,7 @@ def from_context(
         )
 
     def get_embedder_config(self) -> dict[str, Any]:
-        return self.model_config.model_dump()
+        return self.classification_model_config.model_dump()
 
     def fit(
         self,
@@ -81,7 +81,7 @@ def fit(
             self.clear_cache()
         self._validate_task(labels)
 
-        model_name = self.model_config.model_name
+        model_name = self.classification_model_config.model_name
         self._tokenizer = AutoTokenizer.from_pretrained(model_name)
 
         label2id = {i: i for i in range(self._n_classes)}
@@ -95,11 +95,11 @@ def fit(
             problem_type="multi_label_classification" if self._multilabel else "single_label_classification",
         )
 
-        use_cpu = self.model_config.device == "cpu"
+        use_cpu = self.classification_model_config.device == "cpu"
 
         def tokenize_function(examples: dict[str, Any]) -> dict[str, Any]:
             return self._tokenizer(  # type: ignore[no-any-return]
-                examples["text"], return_tensors="pt", **self.model_config.tokenizer_config.model_dump()
+                examples["text"], return_tensors="pt", **self.classification_model_config.tokenizer_config.model_dump()
             )
 
         dataset = Dataset.from_dict({"text": utterances, "labels": labels})
@@ -148,7 +148,9 @@ def predict(self, utterances: list[str]) -> npt.NDArray[Any]:
         all_predictions = []
         for i in range(0, len(utterances), self.batch_size):
             batch = utterances[i : i + self.batch_size]
-            inputs = self._tokenizer(batch, return_tensors="pt", **self.model_config.tokenizer_config.model_dump())
+            inputs = self._tokenizer(
+                batch, return_tensors="pt", **self.classification_model_config.tokenizer_config.model_dump()
+            )
             inputs = {k: v.to(device) for k, v in inputs.items()}
             with torch.no_grad():
                 outputs = self._model(**inputs)

diff --git a/autointent/modules/scoring/_linear.py b/autointent/modules/scoring/_linear.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 import numpy.typing as npt
+from pydantic import PositiveInt
 from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
 from sklearn.multioutput import MultiOutputClassifier
 
@@ -22,7 +23,6 @@ class LinearScorer(BaseScorer):
     Args:
         embedder_config: Config of the embedder model
         cv: Number of cross-validation folds, defaults to 3
-        n_jobs: Number of parallel jobs for cross-validation, defaults to None
         seed: Random seed for reproducibility, defaults to 0
 
     Example:
@@ -72,18 +72,21 @@ def __init__(
     def from_context(
         cls,
         context: Context,
+        cv: PositiveInt = 3,
         embedder_config: EmbedderConfig | str | None = None,
     ) -> "LinearScorer":
         """Create a LinearScorer instance using a Context object.
 
         Args:
             context: Context containing configurations and utilities
+            cv: Number of cross-validation folds, defaults to 3
             embedder_config: Config of the embedder, or None to use the best embedder
         """
         if embedder_config is None:
             embedder_config = context.resolve_embedder()
 
         return cls(
+            cv=cv,
             embedder_config=embedder_config,
         )
 

diff --git a/autointent/nodes/_node_optimizer.py b/autointent/nodes/_node_optimizer.py
@@ -11,33 +11,14 @@
 import optuna
 import torch
 from optuna.trial import Trial
-from pydantic import BaseModel, Field
 from typing_extensions import assert_never
 
 from autointent import Dataset
 from autointent.context import Context
 from autointent.custom_types import NodeType, SamplerType, SearchSpaceValidationMode
 from autointent.nodes.emissions_tracker import EmissionsTracker
 from autointent.nodes.info import NODES_INFO
-
-
-class ParamSpaceInt(BaseModel):
-    """Integer parameter search space configuration."""
-
-    low: int = Field(..., description="Lower boundary of the search space.")
-    high: int = Field(..., description="Upper boundary of the search space.")
-    step: int = Field(1, description="Step size for the search space.")
-    log: bool = Field(False, description="Indicates whether to use a logarithmic scale.")
-
-
-class ParamSpaceFloat(BaseModel):
-    """Float parameter search space configuration."""
-
-    low: float = Field(..., description="Lower boundary of the search space.")
-    high: float = Field(..., description="Upper boundary of the search space.")
-    step: float | None = Field(None, description="Step size for the search space (if applicable).")
-    log: bool = Field(False, description="Indicates whether to use a logarithmic scale.")
-
+from autointent.schemas.node_validation import ParamSpaceFloat, ParamSpaceInt, SearchSpaceConfig
 
 logger = logging.getLogger(__name__)
 
@@ -277,7 +258,8 @@ def validate_nodes_with_dataset(self, dataset: Dataset, mode: SearchSpaceValidat
 
     def validate_search_space(self, search_space: list[dict[str, Any]]) -> None:
         """Check if search space is configured correctly."""
-        for module_search_space in search_space:
+        validated_search_space = SearchSpaceConfig(search_space).model_dump()
+        for module_search_space in validated_search_space:
             module_search_space_no_optuna, module_name = self._reformat_search_space(deepcopy(module_search_space))
 
             for params_combination in it.product(*module_search_space_no_optuna.values()):