deeppavlov
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 1 addition & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎autointent/_dataset/_dataset.py‎
Lines changed: 1 addition & 1 deletion b/‎autointent/_dataset/_dataset.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎autointent/_dataset/_reader.py‎
Lines changed: 1 addition & 1 deletion b/‎autointent/_dataset/_reader.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎autointent/_dump_tools.py‎
Lines changed: 2 additions & 2 deletions b/‎autointent/_dump_tools.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎autointent/_embedder.py‎
Lines changed: 1 addition & 1 deletion b/‎autointent/_embedder.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎autointent/_logging/setup.py‎
Lines changed: 1 addition & 1 deletion b/‎autointent/_logging/setup.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎autointent/_optimization_config.py‎
Lines changed: 2 additions & 4 deletions b/‎autointent/_optimization_config.py‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎autointent/_pipeline/_pipeline.py‎
Lines changed: 13 additions & 19 deletions b/‎autointent/_pipeline/_pipeline.py‎
Lines changed: 13 additions & 19 deletions
diff --git a/‎autointent/_presets/heavy.yaml‎
Lines changed: 4 additions & 6 deletions b/‎autointent/_presets/heavy.yaml‎
Lines changed: 4 additions & 6 deletions
diff --git a/‎autointent/_presets/heavy_extra.yaml‎
Lines changed: 0 additions & 41 deletions b/‎autointent/_presets/heavy_extra.yaml‎
Lines changed: 0 additions & 41 deletions
@@ -59,6 +59,7 @@ Note: If mypy shows different errors locally compared to github actions, you sho
 ```bash
 make update
 ```
+But it still doesn't guarantee that the local type checker will give the same errors as CI. This is because CI is configured to check on Python 3.10 and your local python version is probably the latest one.
 
 ## Building Documentation
 
 
@@ -133,7 +133,7 @@ def to_json(self, filepath: str | Path) -> None:
         path = Path(filepath)
         if not path.parent.exists():
             path.parent.mkdir(parents=True)
-        with path.open("w") as file:
+        with path.open("w", encoding="utf-8") as file:
             json.dump(self.to_dict(), file, indent=4, ensure_ascii=False)
 
     def push_to_hub(self, repo_name: str, private: bool = False) -> None:
 
@@ -95,5 +95,5 @@ def _read(self, filepath: str | Path) -> DatasetReader:
         Returns:
             DatasetReader: A validated dataset representation.
         """
-        with Path(filepath).open() as file:
+        with Path(filepath).open(encoding="utf-8") as file:
             return DatasetReader.model_validate(json.load(file))
@@ -151,7 +151,7 @@ def dump(obj: Any, path: Path, exists_ok: bool = False, exclude: list[type[Any]]
                 msg = f"Attribute {key} of type {type(val)} cannot be dumped to file system."
                 logger.error(msg)
 
-        with (path / Dumper.simple_attrs).open("w") as file:
+        with (path / Dumper.simple_attrs).open("w", encoding="utf-8") as file:
             json.dump(simple_attrs, file, ensure_ascii=False, indent=4)
 
         np.savez(path / Dumper.arrays, allow_pickle=False, **arrays)
@@ -179,7 +179,7 @@ def load(  # noqa: C901, PLR0912, PLR0915
             if child.name == Dumper.tags:
                 tags = {tags_dump.name: TagsList.load(tags_dump) for tags_dump in child.iterdir()}
             elif child.name == Dumper.simple_attrs:
-                with child.open() as file:
+                with child.open(encoding="utf-8") as file:
                     simple_attrs = json.load(file)
             elif child.name == Dumper.arrays:
                 arrays = dict(np.load(child))
 
@@ -164,7 +164,7 @@ def load(cls, path: Path | str, override_config: EmbedderConfig | None = None) -
             path: Path to the directory where the model is stored.
             override_config: one can override presaved settings
         """
-        with (Path(path) / cls._metadata_dict_name).open() as file:
+        with (Path(path) / cls._metadata_dict_name).open(encoding="utf-8") as file:
             metadata: EmbedderDumpMetadata = json.load(file)
 
         if override_config is not None:
 
@@ -20,7 +20,7 @@ def setup_logging(level: LogLevel | str, log_filename: Path | str | None = None)
         log_filename: specify location of logfile, omit extension as suffix ``.log.jsonl`` will be appended.
     """
     config_file = ires.files("autointent._logging").joinpath("config.yaml")
-    with config_file.open() as f_in:
+    with config_file.open(encoding="utf-8") as f_in:
         config = yaml.safe_load(f_in)
 
     level = LogLevel(level)
 
@@ -2,8 +2,7 @@
 
 from pydantic import BaseModel, PositiveInt
 
-from .configs import CrossEncoderConfig, DataConfig, EmbedderConfig, HFModelConfig, LoggingConfig
-from .custom_types import SamplerType
+from .configs import CrossEncoderConfig, DataConfig, EmbedderConfig, HFModelConfig, HPOConfig, LoggingConfig
 
 
 class OptimizationConfig(BaseModel):
@@ -27,7 +26,6 @@ class OptimizationConfig(BaseModel):
 
     transformer_config: HFModelConfig = HFModelConfig()
 
-    sampler: SamplerType = "brute"
-    """See tutorial on optuna and presets."""
+    hpo_config: HPOConfig = HPOConfig()
 
     seed: PositiveInt = 42
@@ -3,7 +3,7 @@
 import json
 import logging
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, get_args
+from typing import TYPE_CHECKING, Any
 
 import numpy as np
 import yaml
@@ -15,13 +15,13 @@
     DataConfig,
     EmbedderConfig,
     HFModelConfig,
+    HPOConfig,
     InferenceNodeConfig,
     LoggingConfig,
 )
 from autointent.custom_types import (
     ListOfGenericLabels,
     NodeType,
-    SamplerType,
     SearchSpacePreset,
     SearchSpaceValidationMode,
 )
@@ -44,7 +44,6 @@ class Pipeline:
     def __init__(
         self,
         nodes: list[NodeOptimizer] | list[InferenceNode],
-        sampler: SamplerType = "brute",
         seed: int | None = 42,
     ) -> None:
         """Initialize the pipeline optimizer.
@@ -57,23 +56,19 @@ def __init__(
         self._logger = logging.getLogger(__name__)
         self.nodes = {node.node_type: node for node in nodes}
         self._seed = seed
-        if sampler not in get_args(SamplerType):
-            msg = f"Sampler should be one of {get_args(SamplerType)}"
-            raise ValueError(msg)
-
-        self._sampler = sampler
 
         if isinstance(nodes[0], NodeOptimizer):
             self.logging_config = LoggingConfig()
             self.embedder_config = EmbedderConfig()
             self.cross_encoder_config = CrossEncoderConfig()
             self.data_config = DataConfig()
             self.transformer_config = HFModelConfig()
+            self.hpo_config = HPOConfig()
         elif not isinstance(nodes[0], InferenceNode):
             assert_never(nodes)
 
     def set_config(
-        self, config: LoggingConfig | EmbedderConfig | CrossEncoderConfig | DataConfig | HFModelConfig
+        self, config: LoggingConfig | EmbedderConfig | CrossEncoderConfig | DataConfig | HFModelConfig | HPOConfig
     ) -> None:
         """Set the configuration for the pipeline.
 
@@ -90,6 +85,8 @@ def set_config(
             self.data_config = config
         elif isinstance(config, HFModelConfig):
             self.transformer_config = config
+        elif isinstance(config, HPOConfig):
+            self.hpo_config = config
         else:
             assert_never(config)
 
@@ -126,23 +123,23 @@ def from_optimization_config(cls, config: dict[str, Any] | Path | str | Optimiza
             if isinstance(config, dict):
                 dict_params = config
             else:
-                with Path(config).open() as file:
+                with Path(config).open(encoding="utf-8") as file:
                     dict_params = yaml.safe_load(file)
             optimization_config = OptimizationConfig(**dict_params)
 
         pipeline = cls(
             [NodeOptimizer(**node) for node in optimization_config.search_space],
-            optimization_config.sampler,
             optimization_config.seed,
         )
         pipeline.set_config(optimization_config.logging_config)
         pipeline.set_config(optimization_config.data_config)
         pipeline.set_config(optimization_config.embedder_config)
         pipeline.set_config(optimization_config.cross_encoder_config)
         pipeline.set_config(optimization_config.transformer_config)
+        pipeline.set_config(optimization_config.hpo_config)
         return pipeline
 
-    def _fit(self, context: Context, sampler: SamplerType) -> None:
+    def _fit(self, context: Context) -> None:
         """Optimize the pipeline.
 
         Args:
@@ -167,7 +164,7 @@ def _fit(self, context: Context, sampler: SamplerType) -> None:
         for node_type in NodeType:
             node_optimizer = self.nodes.get(node_type, None)
             if node_optimizer is not None:
-                node_optimizer.fit(context, sampler)  # type: ignore[union-attr]
+                node_optimizer.fit(context)  # type: ignore[union-attr]
         self.context.callback_handler.end_run()
 
     def _is_inference(self) -> bool:
@@ -182,7 +179,6 @@ def fit(
         self,
         dataset: Dataset,
         refit_after: bool = False,
-        sampler: SamplerType | None = None,
         incompatible_search_space: SearchSpaceValidationMode = "filter",
     ) -> Context:
         """Optimize the pipeline from dataset.
@@ -206,6 +202,7 @@ def fit(
         context.configure_transformer(self.embedder_config)
         context.configure_transformer(self.cross_encoder_config)
         context.configure_transformer(self.transformer_config)
+        context.configure_hpo(self.hpo_config)
 
         self.validate_modules(dataset, mode=incompatible_search_space)
 
@@ -221,10 +218,7 @@ def fit(
                 "Change settings in LoggerConfig to obtain different behavior."
             )
 
-        if sampler is None:
-            sampler = self._sampler
-
-        self._fit(context, sampler)
+        self._fit(context)
 
         if context.logging_config.clear_ram and context.logging_config.dump_modules:
             nodes_configs = context.optimization_info.get_inference_nodes_config()
@@ -336,7 +330,7 @@ def load(
             embedder_config: one can override presaved settings
             cross_encoder_config: one can override presaved settings
         """
-        with (Path(path) / "inference_config.yaml").open() as file:
+        with (Path(path) / "inference_config.yaml").open(encoding="utf-8") as file:
             inference_nodes_configs: list[dict[str, Any]] = yaml.safe_load(file)
 
         inference_config = [
 
@@ -8,19 +8,16 @@ search_space:
           low: 1
           high: 20
         weights: [uniform, distance, closest]
-        n_trials: 10
       - module_name: linear
       - module_name: mlknn
         k:
           low: 1
           high: 20
-        n_trials: 10
       - module_name: description
         temperature:
           low: 0.01
           high: 10
           log: true
-        n_trials: 10
       - module_name: rerank
         k:
           low: 10
@@ -29,17 +26,18 @@ search_space:
           low: 1
           high: 10
         weights: [uniform, distance, closest]
-        n_trials: 15
   - node_type: decision
     target_metric: decision_accuracy
     search_space:
       - module_name: threshold
         thresh:
           low: 0.1
           high: 0.9
-        n_trials: 10
       - module_name: argmax
       - module_name: jinoos
       - module_name: tunable
       - module_name: adaptive
-sampler: tpe
+hpo_config:
+  sampler: tpe
+  n_trials: 128 # dont know yet if its good
+  n_startup_trials: 32