deeppavlov
diff --git a/‎autointent/_hash.py‎
Lines changed: 6 additions & 3 deletions b/‎autointent/_hash.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎autointent/generation/_cache.py‎
Lines changed: 129 additions & 0 deletions b/‎autointent/generation/_cache.py‎
Lines changed: 129 additions & 0 deletions
diff --git a/‎autointent/generation/_generator.py‎
Lines changed: 33 additions & 7 deletions b/‎autointent/generation/_generator.py‎
Lines changed: 33 additions & 7 deletions
diff --git a/‎tests/generation/structured_output/__init__.py‎ b/‎tests/generation/structured_output/__init__.py‎
diff --git a/‎tests/generation/structured_output/test_basics.py‎
Lines changed: 71 additions & 0 deletions b/‎tests/generation/structured_output/test_basics.py‎
Lines changed: 71 additions & 0 deletions
@@ -15,16 +15,16 @@ class Hasher:
     hashing embeddings from :py:class:`autointent.Embedder`.
     """
 
-    def __init__(self) -> None:
+    def __init__(self, strict: bool = False) -> None:
         """Initialize the Hasher instance and sets up the internal xxhash state.
 
         This state will be used for progressively hashing values using the
         `update` method and obtaining the final digest using `hexdigest`.
         """
         self._state = xxhash.xxh64()
+        self.strict = strict
 
-    @classmethod
-    def hash(cls, value: Any) -> int:  # noqa: ANN401
+    def hash(self, value: Any) -> int:  # noqa: ANN401
         """Generate a hash for the given value using xxhash.
 
         Args:
@@ -35,6 +35,9 @@ def hash(cls, value: Any) -> int:  # noqa: ANN401
         """
         if hasattr(value, "__hash__") and value.__hash__ not in {None, object.__hash__}:
             return hash(value)
+        if self.strict:
+            msg = "Object is not hashable."
+            raise ValueError(msg)
         return xxhash.xxh64(pickle.dumps(value)).intdigest()
 
     def update(self, value: Any) -> None:  # noqa: ANN401
 
@@ -0,0 +1,129 @@
+"""Helpers for caching structured outputs from LLM."""
+
+import json
+import logging
+from pathlib import Path
+from typing import Any, TypeVar
+
+from appdirs import user_cache_dir
+from dotenv import load_dotenv
+from pydantic import BaseModel, ValidationError
+
+from autointent._dump_tools import PydanticModelDumper
+from autointent._hash import Hasher
+from autointent.generation.chat_templates import Message
+
+logger = logging.getLogger(__name__)
+
+load_dotenv()
+
+T = TypeVar("T", bound=BaseModel)
+"""Type variable for Pydantic models used in structured output generation."""
+
+
+def _get_structured_output_cache_path(dirname: str) -> Path:
+    """Get the path to the structured output cache file.
+
+    This function constructs the full path to a cache directory stored
+    in a specific directory under the user's home directory. The cache
+    directory is named based on the provided dirname.
+    added.
+
+    Args:
+        dirname: The name of the cache file (without extension).
+
+    Returns:
+        The full path to the cache file.
+    """
+    return Path(user_cache_dir("autointent")) / "structured_outputs" / dirname
+
+
+class StructuredOutputCache:
+    """Cache for structured output results."""
+
+    def __init__(self, use_cache: bool = True) -> None:
+        """Initialize the cache.
+
+        Args:
+            use_cache: Whether to use caching.
+        """
+        self.use_cache = use_cache
+
+    def _get_cache_key(
+        self, messages: list[Message], output_model: type[T], backend: str, generation_params: dict[str, Any]
+    ) -> str:
+        """Generate a cache key for the given parameters.
+
+        Args:
+            messages: List of messages to send to the model.
+            output_model: Pydantic model class to parse the response into.
+            backend: Backend to use for structured output.
+            generation_params: Generation parameters.
+
+        Returns:
+            Cache key as a hexadecimal string.
+        """
+        hasher = Hasher(strict=True)
+        hasher.update(json.dumps(messages))
+        hasher.update(json.dumps(output_model.model_json_schema()))
+        hasher.update(backend)
+        hasher.update(json.dumps(generation_params))
+        return hasher.hexdigest()
+
+    def get(
+        self, messages: list[Message], output_model: type[T], backend: str, generation_params: dict[str, Any]
+    ) -> T | None:
+        """Get cached result if available.
+
+        Args:
+            messages: List of messages to send to the model.
+            output_model: Pydantic model class to parse the response into.
+            backend: Backend to use for structured output.
+            generation_params: Generation parameters.
+
+        Returns:
+            Cached result if available, None otherwise.
+        """
+        if not self.use_cache:
+            return None
+
+        cache_key = self._get_cache_key(messages, output_model, backend, generation_params)
+        cache_path = _get_structured_output_cache_path(cache_key)
+
+        if cache_path.exists():
+            try:
+                cached_data = PydanticModelDumper.load(cache_path)
+
+                if isinstance(cached_data, output_model):
+                    logger.debug("Using cached structured output for key: %s", cache_key)
+                    return cached_data
+
+                logger.warning("Cached data type mismatch, removing invalid cache")
+                cache_path.unlink()
+            except (ValidationError, ImportError) as e:
+                logger.warning("Failed to load cached structured output: %s", e)
+                cache_path.unlink(missing_ok=True)
+
+        return None
+
+    def set(
+        self, messages: list[Message], output_model: type[T], backend: str, generation_params: dict[str, Any], result: T
+    ) -> None:
+        """Cache the result.
+
+        Args:
+            messages: List of messages to send to the model.
+            output_model: Pydantic model class to parse the response into.
+            backend: Backend to use for structured output.
+            generation_params: Generation parameters.
+            result: The result to cache.
+        """
+        if not self.use_cache:
+            return
+
+        cache_key = self._get_cache_key(messages, output_model, backend, generation_params)
+        cache_path = _get_structured_output_cache_path(cache_key)
+
+        cache_path.parent.mkdir(parents=True, exist_ok=True)
+        PydanticModelDumper.dump(result, cache_path, exists_ok=True)
+        logger.debug("Cached structured output for key: %s", cache_key)
@@ -12,6 +12,8 @@
 
 from autointent.generation.chat_templates import Message, Role
 
+from ._cache import StructuredOutputCache
+
 logger = logging.getLogger(__name__)
 
 load_dotenv()
@@ -38,12 +40,19 @@ class Generator:
     }
     """Default generation parameters for API requests."""
 
-    def __init__(self, base_url: str | None = None, model_name: str | None = None, **generation_params: Any) -> None:  # noqa: ANN401
+    def __init__(
+        self,
+        base_url: str | None = None,
+        model_name: str | None = None,
+        use_cache: bool = True,
+        **generation_params: Any,  # noqa: ANN401
+    ) -> None:
         """Initialize the Generator with API configuration.
 
         Args:
             base_url: OpenAI API compatible server URL.
             model_name: Name of the language model to use.
+            use_cache: Whether to use caching for structured outputs.
             **generation_params: Additional generation parameters to override defaults passed to OpenAI completions API.
         """
         base_url = base_url or os.getenv("OPENAI_BASE_URL")
@@ -58,6 +67,7 @@ def __init__(self, base_url: str | None = None, model_name: str | None = None, *
             **self._default_generation_params,
             **generation_params,
         }  #  https://stackoverflow.com/a/65539348
+        self.cache = StructuredOutputCache(use_cache=use_cache)
 
     def get_chat_completion(self, messages: list[Message]) -> str:
         """Prompt LLM and return its answer.
@@ -92,15 +102,15 @@ def _create_retry_messages(self, error_message: str, raw: str | None) -> list[Me
             res.append({"role": Role.ASSISTANT, "content": raw})
         res.append(
             {
-                "role": "user",
+                "role": Role.USER,
                 "content": dedent(
                     f"""The previous response failed validation with the following error: {error_message}
 
-                Make sure to:
-                1. Follow the exact schema structure
-                2. Use the correct data types for each field
-                3. Include all required fields
-                4. Ensure the response is valid JSON"""
+                    Make sure to:
+                    1. Follow the exact schema structure
+                    2. Use the correct data types for each field
+                    3. Include all required fields
+                    4. Ensure the response is valid JSON"""
                 ),
             }
         )
@@ -184,6 +194,11 @@ async def get_structured_output_async(
         Returns:
             Parsed response as an instance of the provided Pydantic model.
         """
+        # Check cache first
+        cached_result = self.cache.get(messages, output_model, backend, self.generation_params)
+        if cached_result is not None:
+            return cached_result
+
         current_messages = messages.copy()
         res: T | None = None
 
@@ -213,6 +228,9 @@ async def get_structured_output_async(
             logger.exception(msg)
             raise RuntimeError(msg)
 
+        # Cache the successful result
+        self.cache.set(messages, output_model, backend, self.generation_params, res)
+
         return res
 
     def _get_structured_output_openai_sync(
@@ -293,6 +311,11 @@ def get_structured_output_sync(
         Returns:
             Parsed response as an instance of the provided Pydantic model.
         """
+        # Check cache first
+        cached_result = self.cache.get(messages, output_model, backend, self.generation_params)
+        if cached_result is not None:
+            return cached_result
+
         current_messages = messages.copy()
         res: T | None = None
 
@@ -322,4 +345,7 @@ def get_structured_output_sync(
             logger.exception(msg)
             raise RuntimeError(msg)
 
+        # Cache the successful result
+        self.cache.set(messages, output_model, backend, self.generation_params, res)
+
         return res
@@ -0,0 +1,71 @@
+"""Tests for structured output functionality."""
+
+import os
+from typing import Literal
+
+import pytest
+from pydantic import BaseModel, Field
+
+from autointent.generation import Generator
+from autointent.generation.chat_templates import Role
+
+
+class Person(BaseModel):
+    reasoning: str = Field(description="Some preliminary reasoning to plan fields' values")
+    name: str = Field(description="The person's full name")
+    age: int = Field(description="The person's age in years", ge=0, le=150)
+    email: str = Field(description="The person's email address")
+    occupation: str = Field(description="The person's job or profession")
+    is_active: bool = Field(description="Whether the person is currently active", default=True)
+    status: Literal["active", "inactive", "pending"] = Field(description="Current status of the person")
+    hobbies: list[str] = Field(description="List of the person's hobbies and interests")
+
+
+@pytest.fixture
+def generator():
+    """Create a generator instance for testing."""
+    return Generator(max_tokens=1000, use_cache=False)
+
+
+@pytest.mark.skipif(
+    not os.getenv("OPENAI_API_KEY") or not os.getenv("OPENAI_MODEL_NAME"),
+    reason="OPENAI_API_KEY and OPENAI_MODEL_NAME environment variables are required for this test",
+)
+class TestStructuredOutput:
+    """Test structured output functionality for different backends."""
+
+    def test_basic_chat_completion(self, generator):
+        """Test basic chat completion functionality."""
+        response = generator.get_chat_completion(messages=[{"role": Role.USER, "content": "hi! tell me a joke"}])
+        assert isinstance(response, str)
+        assert len(response) > 0
+
+    @pytest.mark.asyncio
+    async def test_async_chat_completion(self, generator):
+        """Test async chat completion functionality."""
+        response = await generator.get_chat_completion_async(
+            messages=[{"role": Role.USER, "content": "hi! tell me a joke"}]
+        )
+        assert isinstance(response, str)
+        assert len(response) > 0
+
+    def test_structured_output(self, generator):
+        """Test that async structured output works without failing."""
+        result = generator.get_structured_output_sync(
+            messages=[{"role": Role.USER, "content": "Create a person"}],
+            output_model=Person,
+            max_retries=5,
+        )
+
+        assert isinstance(result, Person)
+
+    @pytest.mark.asyncio
+    async def test_structured_output_async(self, generator):
+        """Test that async structured output works without failing."""
+        result = await generator.get_structured_output_async(
+            messages=[{"role": Role.USER, "content": "Create a person"}],
+            output_model=Person,
+            max_retries=5,
+        )
+
+        assert isinstance(result, Person)