deeppavlov
diff --git a/‎docs/source/conf.py‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/conf.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 20 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎src/autointent/_pipeline/_pipeline.py‎
Lines changed: 15 additions & 7 deletions b/‎src/autointent/_pipeline/_pipeline.py‎
Lines changed: 15 additions & 7 deletions
diff --git a/‎src/autointent/_wrappers/embedder.py‎
Lines changed: 11 additions & 35 deletions b/‎src/autointent/_wrappers/embedder.py‎
Lines changed: 11 additions & 35 deletions
diff --git a/‎src/autointent/_wrappers/ranker.py‎
Lines changed: 5 additions & 5 deletions b/‎src/autointent/_wrappers/ranker.py‎
Lines changed: 5 additions & 5 deletions
@@ -188,6 +188,7 @@
 
 llms_txt_exclude = ["autoapi*"]
 
+
 def setup(app: Sphinx) -> None:
     generate_versions_json(repo_root, BASE_URL)
     user_guids_dir = app.srcdir / "user_guides"
 
@@ -48,6 +48,7 @@ dependencies = [
     "catboost (>=1.2.8,<2.0.0)",
     "aiometer (>=1.0.0,<2.0.0)",
     "aiofiles (>=24.1.0,<25.0.0)",
+    "threadpoolctl (>=3.0.0,<4.0.0)",
 ]
 
 [project.optional-dependencies]
@@ -61,6 +62,18 @@ codecarbon = [
     "codecarbon (>=3.0.2, <3.1.0)",
     "pynvml (>=8.0.4, <12.0.0)", # to avoid "attribute nvmlDeviceGetTotalEnergyConsumption not found" error
 ]
+fastapi = [
+    "fastapi (>=0.115, <1.0)",
+    "uvicorn (>=0.24, <1.0)",
+    "pydantic-settings (>=2.0, <3.0)"
+]
+fastmcp = [
+    "pydantic-settings (>=2.0, <3.0)",
+    "fastmcp (>=2.11.3, <3.0)"
+]
+opensearch = [
+    "opensearch-py (>=3.0.0, <4.0.0)",
+]
 
 [dependency-groups]
 nb = [
@@ -251,3 +264,10 @@ module = [
 ]
 warn_unreachable = false
 
+[[tool.mypy.overrides]]
+module = [
+    "autointent.server.http",
+    "autointent.server.mcp",
+]
+ignore_errors = true
+
@@ -18,13 +18,10 @@
     HPOConfig,
     InferenceNodeConfig,
     LoggingConfig,
+    VectorIndexConfig,
+    get_default_vector_index_config,
 )
-from autointent.custom_types import (
-    ListOfGenericLabels,
-    NodeType,
-    SearchSpacePreset,
-    SearchSpaceValidationMode,
-)
+from autointent.custom_types import ListOfGenericLabels, NodeType, SearchSpacePreset, SearchSpaceValidationMode
 from autointent.metrics import DECISION_METRICS, DICISION_METRICS_MULTILABEL
 from autointent.nodes import InferenceNode, NodeOptimizer
 from autointent.utils import load_preset, load_search_space
@@ -64,11 +61,19 @@ def __init__(
             self.data_config = DataConfig()
             self.transformer_config = HFModelConfig()
             self.hpo_config = HPOConfig()
+            self.vector_index_config = get_default_vector_index_config()
         elif not isinstance(nodes[0], InferenceNode):
             assert_never(nodes)
 
     def set_config(
-        self, config: LoggingConfig | EmbedderConfig | CrossEncoderConfig | DataConfig | HFModelConfig | HPOConfig
+        self,
+        config: LoggingConfig
+        | EmbedderConfig
+        | CrossEncoderConfig
+        | DataConfig
+        | HFModelConfig
+        | HPOConfig
+        | VectorIndexConfig,
     ) -> None:
         """Set the configuration for the pipeline.
 
@@ -87,6 +92,8 @@ def set_config(
             self.transformer_config = config
         elif isinstance(config, HPOConfig):
             self.hpo_config = config
+        elif isinstance(config, VectorIndexConfig):
+            self.vector_index_config = config
         else:
             assert_never(config)
 
@@ -203,6 +210,7 @@ def fit(
         context.configure_transformer(self.cross_encoder_config)
         context.configure_transformer(self.transformer_config)
         context.configure_hpo(self.hpo_config)
+        context.configure_vector_index(self.vector_index_config)
 
         self.validate_modules(dataset, mode=incompatible_search_space)
 
 
@@ -9,7 +9,6 @@
 import shutil
 from functools import lru_cache
 from pathlib import Path
-from typing import TypedDict
 
 import huggingface_hub
 import numpy as np
@@ -59,23 +58,6 @@ def _get_latest_commit_hash(model_name: str) -> str:
     return commit_hash
 
 
-class EmbedderDumpMetadata(TypedDict):
-    """Metadata for saving and loading an Embedder instance."""
-
-    model_name: str
-    """Name of the hugging face model or a local path to sentence transformers dump."""
-    device: str | None
-    """Torch notation for CPU or CUDA."""
-    batch_size: int
-    """Batch size used for embedding calculations."""
-    max_length: int | None
-    """Maximum sequence length for the embedding model."""
-    use_cache: bool
-    """Whether to use embeddings caching."""
-    similarity_fn_name: str | None
-    """Name of the similarity function to use."""
-
-
 class Embedder:
     """A wrapper for managing embedding models using :py:class:`sentence_transformers.SentenceTransformer`.
 
@@ -85,7 +67,6 @@ class Embedder:
 
     _metadata_dict_name: str = "metadata.json"
     _dump_dir: Path | None = None
-    embedding_model: SentenceTransformer
 
     def __init__(self, embedder_config: EmbedderConfig) -> None:
         """Initialize the Embedder.
@@ -106,22 +87,25 @@ def _get_hash(self) -> int:
             commit_hash = _get_latest_commit_hash(self.config.model_name)
             hasher.update(commit_hash)
         else:
-            self._load_model()
+            self.embedding_model = self._load_model()
             for parameter in self.embedding_model.parameters():
                 hasher.update(parameter.detach().cpu().numpy())
         hasher.update(self.config.tokenizer_config.max_length)
         return hasher.intdigest()
 
-    def _load_model(self) -> None:
+    def _load_model(self) -> SentenceTransformer:
         """Load sentence transformers model to device."""
         if not hasattr(self, "embedding_model"):
-            self.embedding_model = SentenceTransformer(
+            res = SentenceTransformer(
                 self.config.model_name,
                 device=self.config.device,
                 prompts=self.config.get_prompt_config(),
                 similarity_fn_name=self.config.similarity_fn_name,
                 trust_remote_code=self.config.trust_remote_code,
             )
+        else:
+            res = self.embedding_model
+        return res
 
     def clear_ram(self) -> None:
         """Move the embedding model to CPU and delete it from memory."""
@@ -144,17 +128,9 @@ def dump(self, path: Path) -> None:
             path: Path to the directory where the model will be saved.
         """
         self._dump_dir = path
-        metadata = EmbedderDumpMetadata(
-            model_name=str(self.config.model_name),
-            device=self.config.device,
-            batch_size=self.config.batch_size,
-            max_length=self.config.tokenizer_config.max_length,
-            use_cache=self.config.use_cache,
-            similarity_fn_name=self.config.similarity_fn_name,
-        )
         path.mkdir(parents=True, exist_ok=True)
         with (path / self._metadata_dict_name).open("w") as file:
-            json.dump(metadata, file, indent=4)
+            json.dump(self.config.model_dump(mode="json"), file, indent=4)
 
     @classmethod
     def load(cls, path: Path | str, override_config: EmbedderConfig | None = None) -> "Embedder":
@@ -165,12 +141,12 @@ def load(cls, path: Path | str, override_config: EmbedderConfig | None = None) -
             override_config: one can override presaved settings
         """
         with (Path(path) / cls._metadata_dict_name).open(encoding="utf-8") as file:
-            metadata: EmbedderDumpMetadata = json.load(file)
+            config = EmbedderConfig.model_validate_json(file.read())
 
         if override_config is not None:
-            kwargs = {**metadata, **override_config.model_dump(exclude_unset=True)}
+            kwargs = {**config.model_dump(), **override_config.model_dump(exclude_unset=True)}
         else:
-            kwargs = metadata  # type: ignore[assignment]
+            kwargs = config.model_dump()
 
         max_length = kwargs.pop("max_length", None)
         if max_length is not None:
@@ -203,7 +179,7 @@ def embed(self, utterances: list[str], task_type: TaskTypeEnum | None = None) ->
                 logger.debug("loading embeddings from %s", str(embeddings_path))
                 return np.load(embeddings_path)  # type: ignore[no-any-return]
 
-        self._load_model()
+        self.embedding_model = self._load_model()
 
         logger.debug(
             "Calculating embeddings with model %s, batch_size=%d, max_seq_length=%s, embedder_device=%s, prompt=%s",
 
@@ -21,7 +21,7 @@
 from torch import nn
 
 from autointent.configs import CrossEncoderConfig
-from autointent.custom_types import ListOfLabels
+from autointent.custom_types import ListOfLabels, RerankedItem
 
 logger = logging.getLogger(__name__)
 
@@ -194,7 +194,7 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
         pairs, labels_ = construct_samples(utterances, labels, balancing_factor=1)
         self._fit(pairs, labels_)  # type: ignore[arg-type]
 
-    def predict(self, pairs: list[tuple[str, str]]) -> npt.NDArray[Any]:
+    def predict(self, pairs: list[tuple[str, str]]) -> npt.NDArray[np.float32]:
         """Predict probabilities of two utterances having the same intent label.
 
         Args:
@@ -224,7 +224,7 @@ def rank(
         query: str,
         query_docs: list[str],
         top_k: int | None = None,
-    ) -> list[dict[str, Any]]:
+    ) -> list[RerankedItem]:
         """Rank documents according to meaning closeness to the query.
 
         Args:
@@ -241,8 +241,8 @@ def rank(
         if top_k is None:
             top_k = len(query_docs)
 
-        results = [{"corpus_id": i, "score": scores[i]} for i in range(len(query_docs))]
-        results.sort(key=lambda x: x["score"], reverse=True)
+        results = [RerankedItem(corpus_id=i, score=scores[i]) for i in range(len(query_docs))]
+        results.sort(key=lambda x: x.score, reverse=True)
         return results[:top_k]
 
     def save(self, path: str) -> None: