deeppavlov · voorhs · Jan 20, 2025 · Jan 16, 2025 · Jan 16, 2025 · Jan 16, 2025
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -50,41 +50,6 @@ make lint
 
 ![](assets/dependency-graph.png)
 
-## Настройка логгера
-Чтобы видеть debug строчки у вас есть несколько опций:
-
-1. Включить весь debug output через опцию коммандной строки: 
-```bash 
-autointent hydra.verbose=true
-```
-2. Включить debug output только для определенных модулей, пример для autointent.pipeline.optimization.cli_endpoint и самой hydra: 
-```bash
-autointent hydra.verbose=[hydra,autointent/pipeline/optimization/cli_endpoint] hydra.job_logging.root.level=DEBUG
-```
-
-Само конфигурирование логгера сделано в autointent.configs.optimization_cli.logger_config. Вы можете изменить любой параметр логгера через коммандную строку. Вот пример, как поменять уровень логгера на ERROR:
-```bash
-autointent hydra.job_logging.root.level=ERROR
-```
-
-Еще можно изменить параметры логгера через yaml файлы:
-1. Создадим папку с конфиг. файлами: test_config
-2. test_config/config.yaml:
-```yaml
-defaults:
-  - optimization_config
-  - _self_
-  - override hydra/job_logging: custom
-
-# set your config params for optimization here
-embedder_batch_size: 32
-```
-3. Поместите конфигурацию логгера в test_config/hydra/job_logging/custom.yaml (параметры см. [здесь](https://docs.python.org/3/howto/logging.html))
-4. Запускаем с конфиг файлом config.yaml:
-```bash
-autointent --config-path FULL_PATH/test_config --config-name config
-```
-
 ## Построение документации
 
 Построить html версию в папке `docs/build`:

diff --git a/autointent/_pipeline/_cli_endpoint.py b/autointent/_pipeline/_cli_endpoint.py
diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py
@@ -25,14 +25,17 @@ class Pipeline:
     def __init__(
         self,
         nodes: list[NodeOptimizer] | list[InferenceNode],
+        seed: int = 42,
     ) -> None:
         """
         Initialize the pipeline optimizer.
 
         :param nodes: list of nodes
+        :param seed: random seed
         """
         self._logger = logging.getLogger(__name__)
         self.nodes = {node.node_type: node for node in nodes}
+        self.seed = seed
 
         if isinstance(nodes[0], NodeOptimizer):
             self.logging_config = LoggingConfig(dump_dir=None)
@@ -62,7 +65,7 @@ def set_config(self, config: LoggingConfig | VectorIndexConfig | EmbedderConfig
             raise TypeError(msg)
 
     @classmethod
-    def from_search_space(cls, search_space: list[dict[str, Any]] | Path | str) -> "Pipeline":
+    def from_search_space(cls, search_space: list[dict[str, Any]] | Path | str, seed: int = 42) -> "Pipeline":
         """
         Create pipeline optimizer from dictionary search space.
 
@@ -71,16 +74,16 @@ def from_search_space(cls, search_space: list[dict[str, Any]] | Path | str) -> "
         if isinstance(search_space, Path | str):
             search_space = load_search_space(search_space)
         nodes = [NodeOptimizer(**node) for node in search_space]
-        return cls(nodes)
+        return cls(nodes=nodes, seed=seed)
 
     @classmethod
-    def default_optimizer(cls, multilabel: bool) -> "Pipeline":
+    def default_optimizer(cls, multilabel: bool, seed: int = 42) -> "Pipeline":
         """
         Create pipeline optimizer with default search space for given classification task.
 
         :param multilabel: Whether the task multi-label, or single-label.
         """
-        return cls.from_search_space(load_default_search_space(multilabel))
+        return cls.from_search_space(search_space=load_default_search_space(multilabel), seed=seed)
 
     def _fit(self, context: Context) -> None:
         """
@@ -91,8 +94,8 @@ def _fit(self, context: Context) -> None:
         self.context = context
         self._logger.info("starting pipeline optimization...")
         self.context.callback_handler.start_run(
-            run_name=self.context.logging_config.get_run_name(),
-            dirpath=self.context.logging_config.get_dirpath(),
+            run_name=self.context.logging_config.safe_run_name,
+            dirpath=self.context.logging_config.safe_dirpath,
         )
         for node_type in NodeType:
             node_optimizer = self.nodes.get(node_type, None)
@@ -111,7 +114,7 @@ def _is_inference(self) -> bool:
         """
         return isinstance(self.nodes[NodeType.scoring], InferenceNode)
 
-    def fit(self, dataset: Dataset, force_multilabel: bool = False) -> Context:
+    def fit(self, dataset: Dataset) -> Context:
         """
         Optimize the pipeline from dataset.
 
@@ -124,7 +127,7 @@ def fit(self, dataset: Dataset, force_multilabel: bool = False) -> Context:
             raise RuntimeError(msg)
 
         context = Context()
-        context.set_dataset(dataset, force_multilabel)
+        context.set_dataset(dataset)
         context.configure_logging(self.logging_config)
         context.configure_vector_index(self.vector_index_config, self.embedder_config)
         context.configure_cross_encoder(self.cross_encoder_config)

diff --git a/autointent/configs/__init__.py b/autointent/configs/__init__.py
@@ -1,7 +1,7 @@
 """Dataclasses for the configuration of the :class:`autointent.Embedder` and other objects."""
 
 from ._inference_node import InferenceNodeConfig
-from ._optimization_cli import (
+from ._optimization import (
     CrossEncoderConfig,
     DataConfig,
     EmbedderConfig,