deeppavlov
diff --git a/‎.github/workflows/ruff.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/ruff.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 4 additions & 1 deletion b/‎Makefile‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎autointent/configs/__init__.py‎
Lines changed: 28 additions & 0 deletions b/‎autointent/configs/__init__.py‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎autointent/configs/inference_cli.py‎
Lines changed: 9 additions & 0 deletions b/‎autointent/configs/inference_cli.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎autointent/configs/inference_pipeline.py‎
Lines changed: 5 additions & 0 deletions b/‎autointent/configs/inference_pipeline.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎autointent/configs/name.py‎
Lines changed: 13 additions & 0 deletions b/‎autointent/configs/name.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎autointent/configs/node.py‎
Lines changed: 19 additions & 4 deletions b/‎autointent/configs/node.py‎
Lines changed: 19 additions & 4 deletions
diff --git a/‎autointent/configs/optimization_cli.py‎
Lines changed: 51 additions & 6 deletions b/‎autointent/configs/optimization_cli.py‎
Lines changed: 51 additions & 6 deletions
diff --git a/‎autointent/configs/pipeline_optimizer.py‎
Lines changed: 5 additions & 0 deletions b/‎autointent/configs/pipeline_optimizer.py‎
Lines changed: 5 additions & 0 deletions
@@ -5,4 +5,4 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - uses: chartboost/ruff-action@v1
+      - uses: astral-sh/ruff-action@v1
@@ -33,4 +33,7 @@ docs:
 
 .PHONY: serve-docs
 serve-docs: docs
-	$(poetry) python -m http.server -d docs/build/html 8333
+	$(poetry) python -m http.server -d docs/build/html 8333
+
+.PHONY: all
+all: lint
@@ -0,0 +1,28 @@
+from .inference_cli import InferenceConfig
+from .inference_pipeline import InferencePipelineConfig
+from .node import InferenceNodeConfig, NodeOptimizerConfig
+from .optimization_cli import (
+    AugmentationConfig,
+    DataConfig,
+    EmbedderConfig,
+    LoggingConfig,
+    OptimizationConfig,
+    TaskConfig,
+    VectorIndexConfig,
+)
+from .pipeline_optimizer import PipelineOptimizerConfig
+
+__all__ = [
+    "AugmentationConfig",
+    "DataConfig",
+    "EmbedderConfig",
+    "InferenceConfig",
+    "InferenceNodeConfig",
+    "InferencePipelineConfig",
+    "LoggingConfig",
+    "NodeOptimizerConfig",
+    "OptimizationConfig",
+    "PipelineOptimizerConfig",
+    "TaskConfig",
+    "VectorIndexConfig",
+]
@@ -1,3 +1,5 @@
+"""Inference CLI configuration module."""
+
 from dataclasses import dataclass
 
 from hydra.core.config_store import ConfigStore
@@ -7,11 +9,18 @@
 
 @dataclass
 class InferenceConfig:
+    """Configuration for the inference process."""
+
     data_path: str
+    """Path to the file containing the data for prediction"""
     source_dir: str
+    """Path to the directory containing the inference config"""
     output_path: str
+    """Path to the file where the predictions will be saved"""
     log_level: LogLevel = LogLevel.ERROR
+    """Logging level"""
     with_metadata: bool = False
+    """Whether to save metadata along with the predictions"""
 
 
 cs = ConfigStore.instance()
 
@@ -1,3 +1,5 @@
+"""Configuration for the inference pipeline."""
+
 from dataclasses import dataclass
 
 from omegaconf import MISSING
@@ -7,5 +9,8 @@
 
 @dataclass
 class InferencePipelineConfig:
+    """Configuration for the inference pipeline."""
+
     nodes: list[InferenceNodeConfig] = MISSING
+    """List of nodes in the inference pipeline"""
     _target_: str = "autointent.pipeline.InferencePipeline"
@@ -1,3 +1,5 @@
+"""Random name generator."""
+
 import random
 from datetime import datetime
 
@@ -340,12 +342,23 @@
 
 
 def generate_name() -> str:
+    """
+    Generate a random name for a run.
+
+    :return: Random name
+    """
     adjective = random.choice(adjectives)
     noun = random.choice(nouns)
     return f"{adjective}_{noun}"
 
 
 def get_run_name(run_name: str | None = None) -> str:
+    """
+    Get a run name.
+
+    :param run_name: Run name. If None, generate a random name
+    :return: Run name with a timestamp
+    """
     if run_name is None:
         run_name = generate_name()
     return f"{run_name}_{datetime.now().strftime('%m-%d-%Y_%H-%M-%S')}"  # noqa: DTZ005
@@ -1,21 +1,36 @@
+"""Configuration for the nodes."""
+
 from dataclasses import dataclass
 from typing import Any
 
 from omegaconf import MISSING
 
+from autointent.custom_types import NodeType, NodeTypeType
+
 
 @dataclass
 class InferenceNodeConfig:
-    node_type: str = MISSING
-    module_type: str = MISSING
+    """Configuration for the inference node."""
+
+    node_type: NodeTypeType = MISSING
+    """Type of the node. Should be one of the NODE_TYPES"""
+    module_type: str = MISSING  # TODO: add custom type
+    """Type of the module. Should be one of the Module"""
     module_config: dict[str, Any] = MISSING
+    """Configuration of the module"""
     load_path: str | None = None
+    """Path to the module dump. If None, the module will be trained from scratch"""
     _target_: str = "autointent.nodes.InferenceNode"
 
 
 @dataclass
 class NodeOptimizerConfig:
-    node_type: str = MISSING
+    """Configuration for the node optimizer."""
+
+    node_type: NodeType = MISSING
+    """Type of the node. Should be one of the NODE_TYPES"""
     search_space: list[dict[str, Any]] = MISSING
-    metric: str = MISSING
+    """Search space for the optimization"""
+    metric: str = MISSING  # TODO: add custom type
+    """Metric to optimize"""
     _target_: str = "autointent.nodes.NodeOptimizer"
@@ -1,53 +1,70 @@
+"""Configuration for the optimization process."""
+
 from dataclasses import dataclass, field
-from datetime import datetime
 from pathlib import Path
 from typing import Any
 
 from hydra.core.config_store import ConfigStore
 from omegaconf import MISSING
 
-from .name import generate_name
+from .name import get_run_name
 
 
 @dataclass
 class DataConfig:
+    """Configuration for the data used in the optimization process."""
+
     train_path: str | Path = MISSING
+    """Path to the training data"""
     test_path: Path | None = None
+    """Path to the testing data. If None, no testing data will be used"""
     force_multilabel: bool = False
+    """Force multilabel classification even if the data is multiclass"""
 
 
 @dataclass
 class TaskConfig:
-    """TODO presets"""
+    """Configuration for the task to optimize."""
 
     search_space_path: Path | None = None
+    """Path to the search space configuration file. If None, the default search space will be used"""
 
 
 @dataclass
 class LoggingConfig:
+    """Configuration for the logging."""
+
     run_name: str | None = None
+    """Name of the run. If None, a random name will be generated"""
     dirpath: Path | None = None
+    """Path to the directory where the logs will be saved.
+    If None, the logs will be saved in the current working directory"""
     dump_dir: Path | None = None
+    """Path to the directory where the modules will be dumped. If None, the modules will not be dumped"""
     dump_modules: bool = False
+    """Whether to dump the modules or not"""
     clear_ram: bool = True
+    """Whether to clear the RAM after dumping the modules"""
 
     def __post_init__(self) -> None:
+        """Define the run name, directory path and dump directory."""
         self.define_run_name()
         self.define_dirpath()
         self.define_dump_dir()
 
     def define_run_name(self) -> None:
-        if self.run_name is None:
-            self.run_name = generate_name()
-        self.run_name = f"{self.run_name}_{datetime.now().strftime('%m-%d-%Y_%H-%M-%S')}"  # noqa: DTZ005
+        """Define the run name. If None, a random name will be generated."""
+        self.run_name = get_run_name(self.run_name)
 
     def define_dirpath(self) -> None:
+        """Define the directory path. If None, the logs will be saved in the current working directory."""
         dirpath = Path.cwd() / "runs" if self.dirpath is None else self.dirpath
         if self.run_name is None:
             raise ValueError
         self.dirpath = dirpath / self.run_name
 
     def define_dump_dir(self) -> None:
+        """Define the dump directory. If None, the modules will not be dumped."""
         if self.dump_dir is None:
             if self.dirpath is None:
                 raise ValueError
@@ -56,32 +73,60 @@ def define_dump_dir(self) -> None:
 
 @dataclass
 class VectorIndexConfig:
+    """Configuration for the vector index."""
+
     db_dir: Path | None = None
+    """Path to the directory where the vector index database will be saved. If None, the database will not be saved"""
     device: str = "cpu"
+    """Device to use for the vector index. Can be 'cpu', 'cuda', 'cuda:0', 'mps', etc."""
     save_db: bool = False
+    """Whether to save the vector index database or not"""
 
 
 @dataclass
 class AugmentationConfig:
+    """Configuration for the augmentation."""
+
     regex_sampling: int = 0
+    """Number of regex samples to generate"""
     multilabel_generation_config: str | None = None
+    """Path to the multilabel generation configuration file. If None, the default configuration will be used"""
 
 
 @dataclass
 class EmbedderConfig:
+    """
+    Configuration for the embedder.
+
+    The embedder is used to embed the data before training the model. These parameters
+    will be applied to the embedder used in the optimization process in vector db.
+    Only one model can be used globally.
+    """
+
     batch_size: int = 32
+    """Batch size for the embedder"""
     max_length: int | None = None
+    """Max length for the embedder. If None, the max length will be taken from model config"""
 
 
 @dataclass
 class OptimizationConfig:
+    """Configuration for the optimization process."""
+
     seed: int = 0
+    """Seed for the random number generator"""
     data: DataConfig = field(default_factory=DataConfig)
+    """Configuration for the data used in the optimization process"""
     task: TaskConfig = field(default_factory=TaskConfig)
+    """Configuration for the task to optimize"""
     logs: LoggingConfig = field(default_factory=LoggingConfig)
+    """Configuration for the logging"""
     vector_index: VectorIndexConfig = field(default_factory=VectorIndexConfig)
+    """Configuration for the vector index"""
     augmentation: AugmentationConfig = field(default_factory=AugmentationConfig)
+    """Configuration for the augmentation"""
     embedder: EmbedderConfig = field(default_factory=EmbedderConfig)
+    """Configuration for the embedder"""
 
     defaults: list[Any] = field(
         default_factory=lambda: [
 
@@ -1,3 +1,5 @@
+"""Pipeline configuration."""
+
 from dataclasses import dataclass
 
 from omegaconf import MISSING
@@ -7,5 +9,8 @@
 
 @dataclass
 class PipelineOptimizerConfig:
+    """Configuration for the pipeline optimizer."""
+
     nodes: list[NodeOptimizerConfig] = MISSING
+    """List of the nodes to optimize"""
     _target_: str = "autointent.pipeline.PipelineOptimizer"