deeppavlov
diff --git a/‎autointent/_dump_tools/unit_dumpers.py‎
Lines changed: 24 additions & 0 deletions b/‎autointent/_dump_tools/unit_dumpers.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎autointent/generation/_cache.py‎
Lines changed: 180 additions & 22 deletions b/‎autointent/generation/_cache.py‎
Lines changed: 180 additions & 22 deletions
@@ -4,6 +4,7 @@
 from pathlib import Path
 from typing import Any, TypeVar
 
+import aiofiles
 import joblib
 import numpy as np
 import numpy.typing as npt
@@ -163,6 +164,15 @@ def dump(obj: BaseModel, path: Path, exists_ok: bool) -> None:
         with (path / "model_dump.json").open("w", encoding="utf-8") as file:
             json.dump(obj.model_dump(), file, ensure_ascii=False, indent=4)
 
+    @staticmethod
+    async def dump_async(obj: BaseModel, path: Path, exists_ok: bool) -> None:
+        class_info = {"name": obj.__class__.__name__, "module": obj.__class__.__module__}
+        path.mkdir(parents=True, exist_ok=exists_ok)
+        async with aiofiles.open(path / "class_info.json", mode="w", encoding="utf-8") as file:
+            await file.write(json.dumps(class_info, ensure_ascii=False, indent=4))
+        async with aiofiles.open(path / "model_dump.json", mode="w", encoding="utf-8") as file:
+            await file.write(json.dumps(obj.model_dump(), ensure_ascii=False, indent=4))
+
     @staticmethod
     def load(path: Path, **kwargs: Any) -> BaseModel:  # noqa: ANN401, ARG004
         with (path / "model_dump.json").open("r", encoding="utf-8") as file:
@@ -175,6 +185,20 @@ def load(path: Path, **kwargs: Any) -> BaseModel:  # noqa: ANN401, ARG004
         model_type = getattr(model_type, class_info["name"])
         return model_type.model_validate(content)  # type: ignore[no-any-return]
 
+    @staticmethod
+    async def load_async(path: Path, **kwargs: Any) -> BaseModel:  # noqa: ANN401, ARG004
+        async with aiofiles.open(path / "model_dump.json", encoding="utf-8") as file:
+            content_str = await file.read()
+            content = json.loads(content_str)
+
+        async with aiofiles.open(path / "class_info.json", encoding="utf-8") as file:
+            class_info_str = await file.read()
+            class_info = json.loads(class_info_str)
+
+        model_type = importlib.import_module(class_info["module"])
+        model_type = getattr(model_type, class_info["name"])
+        return model_type.model_validate(content)  # type: ignore[no-any-return]
+
     @classmethod
     def check_isinstance(cls, obj: Any) -> bool:  # noqa: ANN401
         return isinstance(obj, BaseModel)
 
@@ -48,16 +48,35 @@ def __init__(self, use_cache: bool = True) -> None:
             use_cache: Whether to use caching.
         """
         self.use_cache = use_cache
+        self._memory_cache: dict[str, BaseModel] = {}
 
-    def _get_cache_key(
-        self, messages: list[Message], output_model: type[T], backend: str, generation_params: dict[str, Any]
-    ) -> str:
+        if self.use_cache:
+            self._load_existing_cache()
+
+    def _load_existing_cache(self) -> None:
+        """Load all existing cache items from disk into memory."""
+        cache_dir = Path(user_cache_dir("autointent")) / "structured_outputs"
+
+        if not cache_dir.exists():
+            return
+
+        for cache_file in cache_dir.iterdir():
+            if cache_file.is_file():
+                try:
+                    cached_data = PydanticModelDumper.load(cache_file)
+                    if isinstance(cached_data, BaseModel):
+                        self._memory_cache[cache_file.name] = cached_data
+                        logger.debug("Loaded cached item into memory: %s", cache_file.name)
+                except (ValidationError, ImportError) as e:
+                    logger.warning("Failed to load cached item %s: %s", cache_file.name, e)
+                    cache_file.unlink(missing_ok=True)
+
+    def _get_cache_key(self, messages: list[Message], output_model: type[T], generation_params: dict[str, Any]) -> str:
         """Generate a cache key for the given parameters.
 
         Args:
             messages: List of messages to send to the model.
             output_model: Pydantic model class to parse the response into.
-            backend: Backend to use for structured output.
             generation_params: Generation parameters.
 
         Returns:
@@ -66,19 +85,76 @@ def _get_cache_key(
         hasher = Hasher(strict=True)
         hasher.update(json.dumps(messages))
         hasher.update(json.dumps(output_model.model_json_schema()))
-        hasher.update(backend)
         hasher.update(json.dumps(generation_params))
         return hasher.hexdigest()
 
-    def get(
-        self, messages: list[Message], output_model: type[T], backend: str, generation_params: dict[str, Any]
-    ) -> T | None:
+    def _check_memory_cache(self, cache_key: str, output_model: type[T]) -> T | None:
+        """Check if the result is available in memory cache.
+
+        Args:
+            cache_key: The cache key to look up.
+            output_model: Pydantic model class to parse the response into.
+
+        Returns:
+            Cached result if available and valid, None otherwise.
+        """
+        if cache_key in self._memory_cache:
+            cached_data = self._memory_cache[cache_key]
+            if isinstance(cached_data, output_model):
+                logger.debug("Using cached structured output from memory for key: %s", cache_key)
+                return cached_data
+            # Type mismatch, remove from memory cache
+            del self._memory_cache[cache_key]
+            logger.warning("Cached data type mismatch in memory, removing invalid cache")
+        return None
+
+    def _load_from_disk(self, cache_key: str, output_model: type[T]) -> T | None:
+        """Load cached result from disk.
+
+        Args:
+            cache_key: The cache key to look up.
+            output_model: Pydantic model class to parse the response into.
+
+        Returns:
+            Cached result if available and valid, None otherwise.
+        """
+        cache_path = _get_structured_output_cache_path(cache_key)
+
+        if cache_path.exists():
+            try:
+                cached_data = PydanticModelDumper.load(cache_path)
+
+                if isinstance(cached_data, output_model):
+                    logger.debug("Using cached structured output from disk for key: %s", cache_key)
+                    # Add to memory cache for future access
+                    self._memory_cache[cache_key] = cached_data
+                    return cached_data
+
+                logger.warning("Cached data type mismatch on disk, removing invalid cache")
+                cache_path.unlink()
+            except (ValidationError, ImportError) as e:
+                logger.warning("Failed to load cached structured output from disk: %s", e)
+                cache_path.unlink(missing_ok=True)
+
+        return None
+
+    def _save_to_disk(self, cache_key: str, result: T) -> None:
+        """Save result to disk cache.
+
+        Args:
+            cache_key: The cache key to use.
+            result: The result to cache.
+        """
+        cache_path = _get_structured_output_cache_path(cache_key)
+        cache_path.parent.mkdir(parents=True, exist_ok=True)
+        PydanticModelDumper.dump(result, cache_path, exists_ok=True)
+
+    def get(self, messages: list[Message], output_model: type[T], generation_params: dict[str, Any]) -> T | None:
         """Get cached result if available.
 
         Args:
             messages: List of messages to send to the model.
             output_model: Pydantic model class to parse the response into.
-            backend: Backend to use for structured output.
             generation_params: Generation parameters.
 
         Returns:
@@ -87,29 +163,109 @@ def get(
         if not self.use_cache:
             return None
 
-        cache_key = self._get_cache_key(messages, output_model, backend, generation_params)
+        cache_key = self._get_cache_key(messages, output_model, generation_params)
+
+        # First check in-memory cache
+        memory_result = self._check_memory_cache(cache_key, output_model)
+        if memory_result is not None:
+            return memory_result
+
+        # Fallback to disk cache
+        return self._load_from_disk(cache_key, output_model)
+
+    def set(self, messages: list[Message], output_model: type[T], generation_params: dict[str, Any], result: T) -> None:
+        """Cache the result.
+
+        Args:
+            messages: List of messages to send to the model.
+            output_model: Pydantic model class to parse the response into.
+            backend: Backend to use for structured output.
+            generation_params: Generation parameters.
+            result: The result to cache.
+        """
+        if not self.use_cache:
+            return
+
+        cache_key = self._get_cache_key(messages, output_model, generation_params)
+
+        # Store in memory cache
+        self._memory_cache[cache_key] = result
+
+        # Store in disk cache
+        self._save_to_disk(cache_key, result)
+        logger.debug("Cached structured output for key: %s (memory and disk)", cache_key)
+
+    async def _load_from_disk_async(self, cache_key: str, output_model: type[T]) -> T | None:
+        """Load cached result from disk asynchronously.
+
+        Args:
+            cache_key: The cache key to look up.
+            output_model: Pydantic model class to parse the response into.
+
+        Returns:
+            Cached result if available and valid, None otherwise.
+        """
         cache_path = _get_structured_output_cache_path(cache_key)
 
         if cache_path.exists():
             try:
-                cached_data = PydanticModelDumper.load(cache_path)
+                cached_data = await PydanticModelDumper.load_async(cache_path)
 
                 if isinstance(cached_data, output_model):
-                    logger.debug("Using cached structured output for key: %s", cache_key)
+                    logger.debug("Using cached structured output from disk for key: %s", cache_key)
+                    # Add to memory cache for future access
+                    self._memory_cache[cache_key] = cached_data
                     return cached_data
 
-                logger.warning("Cached data type mismatch, removing invalid cache")
+                logger.warning("Cached data type mismatch on disk, removing invalid cache")
                 cache_path.unlink()
             except (ValidationError, ImportError) as e:
-                logger.warning("Failed to load cached structured output: %s", e)
+                logger.warning("Failed to load cached structured output from disk: %s", e)
                 cache_path.unlink(missing_ok=True)
 
         return None
 
-    def set(
-        self, messages: list[Message], output_model: type[T], backend: str, generation_params: dict[str, Any], result: T
+    async def _save_to_disk_async(self, cache_key: str, result: T) -> None:
+        """Save result to disk cache asynchronously.
+
+        Args:
+            cache_key: The cache key to use.
+            result: The result to cache.
+        """
+        cache_path = _get_structured_output_cache_path(cache_key)
+        cache_path.parent.mkdir(parents=True, exist_ok=True)
+        await PydanticModelDumper.dump_async(result, cache_path, exists_ok=True)
+
+    async def get_async(
+        self, messages: list[Message], output_model: type[T], generation_params: dict[str, Any]
+    ) -> T | None:
+        """Get cached result if available (async version).
+
+        Args:
+            messages: List of messages to send to the model.
+            output_model: Pydantic model class to parse the response into.
+            generation_params: Generation parameters.
+
+        Returns:
+            Cached result if available, None otherwise.
+        """
+        if not self.use_cache:
+            return None
+
+        cache_key = self._get_cache_key(messages, output_model, generation_params)
+
+        # First check in-memory cache
+        memory_result = self._check_memory_cache(cache_key, output_model)
+        if memory_result is not None:
+            return memory_result
+
+        # Fallback to disk cache
+        return await self._load_from_disk_async(cache_key, output_model)
+
+    async def set_async(
+        self, messages: list[Message], output_model: type[T], generation_params: dict[str, Any], result: T
     ) -> None:
-        """Cache the result.
+        """Cache the result (async version).
 
         Args:
             messages: List of messages to send to the model.
@@ -121,9 +277,11 @@ def set(
         if not self.use_cache:
             return
 
-        cache_key = self._get_cache_key(messages, output_model, backend, generation_params)
-        cache_path = _get_structured_output_cache_path(cache_key)
+        cache_key = self._get_cache_key(messages, output_model, generation_params)
 
-        cache_path.parent.mkdir(parents=True, exist_ok=True)
-        PydanticModelDumper.dump(result, cache_path, exists_ok=True)
-        logger.debug("Cached structured output for key: %s", cache_key)
+        # Store in memory cache
+        self._memory_cache[cache_key] = result
+
+        # Store in disk cache
+        await self._save_to_disk_async(cache_key, result)
+        logger.debug("Cached structured output for key: %s (memory and disk)", cache_key)