Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 0 additions & 35 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,41 +50,6 @@ make lint

![](assets/dependency-graph.png)

## Настройка логгера
Чтобы видеть debug строчки у вас есть несколько опций:

1. Включить весь debug output через опцию коммандной строки:
```bash
autointent hydra.verbose=true
```
2. Включить debug output только для определенных модулей, пример для autointent.pipeline.optimization.cli_endpoint и самой hydra:
```bash
autointent hydra.verbose=[hydra,autointent/pipeline/optimization/cli_endpoint] hydra.job_logging.root.level=DEBUG
```

Само конфигурирование логгера сделано в autointent.configs.optimization_cli.logger_config. Вы можете изменить любой параметр логгера через коммандную строку. Вот пример, как поменять уровень логгера на ERROR:
```bash
autointent hydra.job_logging.root.level=ERROR
```

Еще можно изменить параметры логгера через yaml файлы:
1. Создадим папку с конфиг. файлами: test_config
2. test_config/config.yaml:
```yaml
defaults:
- optimization_config
- _self_
- override hydra/job_logging: custom

# set your config params for optimization here
embedder_batch_size: 32
```
3. Поместите конфигурацию логгера в test_config/hydra/job_logging/custom.yaml (параметры см. [здесь](https://docs.python.org/3/howto/logging.html))
4. Запускаем с конфиг файлом config.yaml:
```bash
autointent --config-path FULL_PATH/test_config --config-name config
```

## Построение документации

Построить html версию в папке `docs/build`:
Expand Down
68 changes: 0 additions & 68 deletions autointent/_pipeline/_cli_endpoint.py

This file was deleted.

19 changes: 11 additions & 8 deletions autointent/_pipeline/_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,17 @@ class Pipeline:
def __init__(
self,
nodes: list[NodeOptimizer] | list[InferenceNode],
seed: int = 42,
) -> None:
"""
Initialize the pipeline optimizer.

:param nodes: list of nodes
:param seed: random seed
"""
self._logger = logging.getLogger(__name__)
self.nodes = {node.node_type: node for node in nodes}
self.seed = seed

if isinstance(nodes[0], NodeOptimizer):
self.logging_config = LoggingConfig(dump_dir=None)
Expand Down Expand Up @@ -62,7 +65,7 @@ def set_config(self, config: LoggingConfig | VectorIndexConfig | EmbedderConfig
raise TypeError(msg)

@classmethod
def from_search_space(cls, search_space: list[dict[str, Any]] | Path | str) -> "Pipeline":
def from_search_space(cls, search_space: list[dict[str, Any]] | Path | str, seed: int = 42) -> "Pipeline":
"""
Create pipeline optimizer from dictionary search space.

Expand All @@ -71,16 +74,16 @@ def from_search_space(cls, search_space: list[dict[str, Any]] | Path | str) -> "
if isinstance(search_space, Path | str):
search_space = load_search_space(search_space)
nodes = [NodeOptimizer(**node) for node in search_space]
return cls(nodes)
return cls(nodes=nodes, seed=seed)

@classmethod
def default_optimizer(cls, multilabel: bool) -> "Pipeline":
def default_optimizer(cls, multilabel: bool, seed: int = 42) -> "Pipeline":
"""
Create pipeline optimizer with default search space for given classification task.

:param multilabel: Whether the task multi-label, or single-label.
"""
return cls.from_search_space(load_default_search_space(multilabel))
return cls.from_search_space(search_space=load_default_search_space(multilabel), seed=seed)

def _fit(self, context: Context) -> None:
"""
Expand All @@ -91,8 +94,8 @@ def _fit(self, context: Context) -> None:
self.context = context
self._logger.info("starting pipeline optimization...")
self.context.callback_handler.start_run(
run_name=self.context.logging_config.get_run_name(),
dirpath=self.context.logging_config.get_dirpath(),
run_name=self.context.logging_config.safe_run_name,
dirpath=self.context.logging_config.safe_dirpath,
)
for node_type in NodeType:
node_optimizer = self.nodes.get(node_type, None)
Expand All @@ -111,7 +114,7 @@ def _is_inference(self) -> bool:
"""
return isinstance(self.nodes[NodeType.scoring], InferenceNode)

def fit(self, dataset: Dataset, force_multilabel: bool = False) -> Context:
def fit(self, dataset: Dataset) -> Context:
"""
Optimize the pipeline from dataset.

Expand All @@ -124,7 +127,7 @@ def fit(self, dataset: Dataset, force_multilabel: bool = False) -> Context:
raise RuntimeError(msg)

context = Context()
context.set_dataset(dataset, force_multilabel)
context.set_dataset(dataset)
context.configure_logging(self.logging_config)
context.configure_vector_index(self.vector_index_config, self.embedder_config)
context.configure_cross_encoder(self.cross_encoder_config)
Expand Down
2 changes: 1 addition & 1 deletion autointent/configs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Dataclasses for the configuration of the :class:`autointent.Embedder` and other objects."""

from ._inference_node import InferenceNodeConfig
from ._optimization_cli import (
from ._optimization import (
CrossEncoderConfig,
DataConfig,
EmbedderConfig,
Expand Down
Loading