deeppavlov
diff --git a/‎autointent/generation/_generator.py‎
Lines changed: 65 additions & 5 deletions b/‎autointent/generation/_generator.py‎
Lines changed: 65 additions & 5 deletions
diff --git a/‎autointent/modules/base/_base.py‎
Lines changed: 1 addition & 1 deletion b/‎autointent/modules/base/_base.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎autointent/modules/scoring/_description/base.py‎
Lines changed: 5 additions & 5 deletions b/‎autointent/modules/scoring/_description/base.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎autointent/modules/scoring/_description/bi_encoder.py‎
Lines changed: 42 additions & 12 deletions b/‎autointent/modules/scoring/_description/bi_encoder.py‎
Lines changed: 42 additions & 12 deletions
diff --git a/‎autointent/modules/scoring/_description/cross_encoder.py‎
Lines changed: 43 additions & 9 deletions b/‎autointent/modules/scoring/_description/cross_encoder.py‎
Lines changed: 43 additions & 9 deletions
diff --git a/‎autointent/modules/scoring/_description/llm_encoder.py‎
Lines changed: 51 additions & 15 deletions b/‎autointent/modules/scoring/_description/llm_encoder.py‎
Lines changed: 51 additions & 15 deletions
@@ -46,13 +46,73 @@ def __init__(self, max_retries: int, messages: list[Message]) -> None:
 
 
 class Generator:
-    """Wrapper class for accessing OpenAI API.
+    """Wrapper class for accessing OpenAI-compatible API endpoints for LLM generation.
+
+    This class provides a unified interface for interacting with OpenAI-compatible APIs,
+    supporting both synchronous and asynchronous operations. It includes built-in caching,
+    retry logic for structured output, and automatic environment variable detection.
+
+    The Generator can work with various OpenAI-compatible services including:
+    - OpenAI's official API
+    - Azure OpenAI
+    - Local inference servers (vLLM, Ollama, etc.)
+    - Other OpenAI-compatible endpoints
+
+    Environment Variables:
+        The following environment variables can be used for configuration:
+
+        **OPENAI_API_KEY** (required):
+            API key for authentication with the OpenAI-compatible service.
+            This is required for most API endpoints.
+
+        **OPENAI_BASE_URL** (optional):
+            Base URL for the API endpoint. If not provided, defaults to OpenAI's API.
+            - https://api.openai.com/v1 (OpenAI official)
+            - https://your-org.openai.azure.com (Azure OpenAI)
+            - http://localhost:8000/v1 (local vLLM server)
+
+        **OPENAI_MODEL_NAME** (optional):
+            Default model name to use if not specified in the constructor.
+            Examples: "gpt-4o-mini", "gpt-3.5-turbo", "claude-3-haiku"
 
     Args:
-        base_url: HTTP-endpoint for sending API requests to OpenAI API compatible server.
-            Omit this to infer ``OPENAI_BASE_URL`` from environment.
-        model_name: Name of LLM. Omit this to infer ``OPENAI_MODEL_NAME`` from environment.
-        **generation_params: kwargs that will be sent with a request to the endpoint.
+        base_url: HTTP endpoint for API requests. If None, uses OPENAI_BASE_URL environment variable.
+        model_name: Name of the language model. If None, uses OPENAI_MODEL_NAME environment variable.
+        use_cache: Whether to enable caching for structured outputs (default: True).
+        client_params: Additional parameters passed to the OpenAI client constructor.
+        **generation_params: Additional parameters passed to the chat completion API calls.
+
+    Example:
+    --------
+    .. code-block::
+
+        import os
+        from autointent.generation import Generator
+
+        # Method 1: Using environment variables
+        # Set these in your environment or .env file:
+        # OPENAI_API_KEY=your-api-key-here
+        # OPENAI_MODEL_NAME=gpt-4o-mini
+        # OPENAI_BASE_URL=https://api.openai.com/v1  # optional
+
+        generator = Generator()
+
+        # Method 2: Explicit configuration
+        generator = Generator(
+            base_url="https://api.openai.com/v1",
+            model_name="gpt-4o-mini",
+            temperature=0.7,
+            max_tokens=1000
+        )
+
+        # Basic chat completion
+        from autointent.generation.chat_templates import Message, Role
+
+        messages = [{"role": Role.USER, "content": "Hello, how are you?"}]
+        response = generator.get_chat_completion(messages)
+
+    Raises:
+        ValueError: If model_name is not provided and OPENAI_MODEL_NAME is not set.
     """
 
     _dump_data_filename = "init_params.json"
 
@@ -30,7 +30,7 @@ class BaseModule(ABC):
     supports_multiclass: bool
     """Whether the module supports multiclass classification"""
     name: str
-    """Name of the module."""
+    """Name of the module to reference in search space configuration."""
 
     @property
     def trial_name(self) -> str:
 
@@ -22,13 +22,15 @@ class BaseDescriptionScorer(BaseScorer, ABC):
 
     Args:
         temperature: Temperature parameter for scaling logits, defaults to 1.0
+        multilabe: Flag indicating classification task type
     """
 
     supports_multiclass = True
     supports_multilabel = True
 
-    def __init__(self, temperature: PositiveFloat = 1.0) -> None:
+    def __init__(self, temperature: PositiveFloat = 1.0, multilabel: bool = False) -> None:
         self.temperature = temperature
+        self._multilabel = multilabel
         self._validate_temperature()
 
     def _validate_temperature(self) -> None:
@@ -82,16 +84,14 @@ def fit(
         Raises:
             ValueError: If descriptions contain None values
         """
-        self._validate_task(labels)
         self._validate_descriptions(descriptions)
-        self._fit_implementation(utterances, descriptions)
+        self._fit_implementation(descriptions)
 
     @abstractmethod
-    def _fit_implementation(self, utterances: list[str], descriptions: list[str]) -> None:
+    def _fit_implementation(self, descriptions: list[str]) -> None:
         """Implementation-specific fitting logic.
 
         Args:
-            utterances: List of utterances to process
             descriptions: List of intent descriptions
         """
 
 
@@ -13,14 +13,46 @@
 
 
 class BiEncoderDescriptionScorer(BaseDescriptionScorer):
-    """Bi-encoder description scorer that embeds utterances and descriptions separately.
+    """Bi-encoder description scorer for zero-shot intent classification.
 
-    This scorer uses a bi-encoder architecture where both utterances and descriptions
-    are embedded separately, then cosine similarity is computed between them.
+    This scorer uses a bi-encoder architecture where utterances and intent descriptions
+    are embedded separately using the same encoder model, then cosine similarity is
+    computed between utterance embeddings and description embeddings. This is a
+    zero-shot approach that doesn't require training examples, only intent descriptions.
+
+    The bi-encoder approach is efficient for inference as descriptions are embedded
+    once during fitting, and only utterances need to be embedded during prediction.
 
     Args:
-        embedder_config: Config of the embedder model
-        temperature: Temperature parameter for scaling logits, defaults to 1.0
+        embedder_config: Configuration for the embedder model (HuggingFace model name or config)
+        temperature: Temperature parameter for scaling logits before softmax/sigmoid (default: 1.0)
+        multilabel: Flag indicating classification task type
+
+    Example:
+    --------
+    .. testcode::
+
+        from autointent.modules.scoring import BiEncoderDescriptionScorer
+
+        # Initialize bi-encoder scorer
+        scorer = BiEncoderDescriptionScorer(
+            embedder_config="sentence-transformers/all-MiniLM-L6-v2",
+            temperature=0.8
+        )
+
+        # Zero-shot classification with intent descriptions
+        descriptions = [
+            "User wants to book or reserve transportation like flights, trains, or hotels",
+            "User wants to cancel an existing booking or reservation",
+            "User asks about weather conditions or forecasts"
+        ]
+
+        # Fit using descriptions only (zero-shot approach)
+        scorer.fit([], [], descriptions)
+
+        # Make predictions on new utterances
+        test_utterances = ["Reserve a hotel room", "Delete my booking"]
+        probabilities = scorer.predict(test_utterances)
     """
 
     name = "description_bi"
@@ -29,8 +61,9 @@ def __init__(
         self,
         embedder_config: EmbedderConfig | str | dict[str, Any] | None = None,
         temperature: PositiveFloat = 1.0,
+        multilabel: bool = False,
     ) -> None:
-        super().__init__(temperature)
+        super().__init__(temperature=temperature, multilabel=multilabel)
         self.embedder_config = EmbedderConfig.from_search_config(embedder_config)
         self._embedder: Embedder | None = None
         self._description_vectors: NDArray[Any] | None = None
@@ -55,16 +88,13 @@ def from_context(
         if embedder_config is None:
             embedder_config = context.resolve_embedder()
 
-        return cls(
-            temperature=temperature,
-            embedder_config=embedder_config,
-        )
+        return cls(temperature=temperature, embedder_config=embedder_config, multilabel=context.is_multilabel())
 
     def get_implicit_initialization_params(self) -> dict[str, Any]:
         """Get implicit initialization parameters for this scorer."""
-        return {"embedder_config": self.embedder_config.model_dump()}
+        return {"embedder_config": self.embedder_config.model_dump(), "multilabel": self._multilabel}
 
-    def _fit_implementation(self, utterances: list[str], descriptions: list[str]) -> None:
+    def _fit_implementation(self, descriptions: list[str]) -> None:
         """Fit the bi-encoder by embedding descriptions.
 
         Args:
 
@@ -13,14 +13,48 @@
 
 
 class CrossEncoderDescriptionScorer(BaseDescriptionScorer):
-    """Cross-encoder description scorer that directly computes similarity between pairs.
+    """Cross-encoder description scorer for zero-shot intent classification.
 
     This scorer uses a cross-encoder architecture that directly computes similarity
-    between each utterance-description pair.
+    scores between each utterance-description pair by passing them together through
+    a transformer model. Unlike bi-encoders that embed texts separately, cross-encoders
+    can capture more complex interactions between utterances and descriptions, often
+    leading to higher accuracy at the cost of computational efficiency.
+
+    This is a zero-shot approach that doesn't require training examples, only intent
+    descriptions. The cross-encoder processes each utterance-description pair separately
+    during inference, making it more computationally intensive but potentially more accurate.
 
     Args:
-        cross_encoder_config: Config of the cross-encoder model
-        temperature: Temperature parameter for scaling logits, defaults to 1.0
+        cross_encoder_config: Configuration for the cross-encoder model (HuggingFace model name or config)
+        temperature: Temperature parameter for scaling logits before softmax/sigmoid (default: 1.0)
+        multilabel: Flag indicating classification task type
+
+    Example:
+    --------
+    .. testcode::
+
+        from autointent.modules.scoring import CrossEncoderDescriptionScorer
+
+        # Initialize cross-encoder scorer
+        scorer = CrossEncoderDescriptionScorer(
+            cross_encoder_config="cross-encoder/ms-marco-MiniLM-L-6-v2",
+            temperature=1.2
+        )
+
+        # Zero-shot classification with intent descriptions
+        descriptions = [
+            "User wants to book or reserve transportation like flights, trains, or hotels",
+            "User wants to cancel an existing booking or reservation",
+            "User asks about weather conditions or forecasts"
+        ]
+
+        # Fit using descriptions only (zero-shot approach)
+        scorer.fit([], [], descriptions)
+
+        # Make predictions on new utterances
+        test_utterances = ["Reserve a hotel room", "Delete my booking"]
+        probabilities = scorer.predict(test_utterances)
     """
 
     name = "description_cross"
@@ -29,8 +63,9 @@ def __init__(
         self,
         cross_encoder_config: CrossEncoderConfig | str | dict[str, Any] | None = None,
         temperature: PositiveFloat = 1.0,
+        multilabel: bool = False,
     ) -> None:
-        super().__init__(temperature)
+        super().__init__(temperature=temperature, multilabel=multilabel)
         self.cross_encoder_config = CrossEncoderConfig.from_search_config(cross_encoder_config)
         self._cross_encoder: Ranker | None = None
         self._description_texts: list[str] | None = None
@@ -56,15 +91,14 @@ def from_context(
             cross_encoder_config = context.resolve_ranker()
 
         return cls(
-            temperature=temperature,
-            cross_encoder_config=cross_encoder_config,
+            temperature=temperature, cross_encoder_config=cross_encoder_config, multilabel=context.is_multilabel()
         )
 
     def get_implicit_initialization_params(self) -> dict[str, Any]:
         """Get implicit initialization parameters for this scorer."""
-        return {"cross_encoder_config": self.cross_encoder_config.model_dump()}
+        return {"cross_encoder_config": self.cross_encoder_config.model_dump(), "multilabel": self._multilabel}
 
-    def _fit_implementation(self, utterances: list[str], descriptions: list[str]) -> None:
+    def _fit_implementation(self, descriptions: list[str]) -> None:
         """Fit the cross-encoder by storing descriptions.
 
         Args:
 
@@ -42,21 +42,55 @@ class IntentCategorization(BaseModel):
 
 
 class LLMDescriptionScorer(BaseDescriptionScorer):
-    """LLM-based description scorer that uses structured output to categorize intents.
+    """LLM-based description scorer for zero-shot intent classification using structured output.
 
-    This scorer uses a language model with structured output to categorize intent descriptions
-    into three categories based on their probability to correspond to a given text sample:
-    - Most probable (probability 1.0)
-    - Promising but not confident (probability 0.5)
-    - Unlikely (probability 0.0)
+    This scorer uses a Large Language Model (LLM) with structured output to perform
+    zero-shot intent classification. The LLM is prompted to categorize intent descriptions
+    into three probability levels for each utterance:
+    - Most probable (probability 1.0): Intents that are most likely to match the utterance
+    - Promising (probability 0.5): Intents that are plausible but less confident
+    - Unlikely (probability 0.0): All other intents (implicit)
+
+    This approach leverages the reasoning capabilities of LLMs to understand complex
+    relationships between utterances and intent descriptions, potentially achieving
+    high accuracy for nuanced classification tasks. However, it requires API access
+    to LLM services and can be slower and more expensive than encoder-based methods.
 
     Args:
-        generator_config: Configuration for the Generator instance
-        temperature: Temperature parameter for scaling classifier logits, defaults to 1.0
-        max_concurrent: Maximum number of concurrent async calls to LLM, defaults to 15
-        max_per_second: Maximum number of API calls per second, defaults to 10
-        max_retries: Maximum number of retry attempts for failed validations, defaults to 3
-        backend: Backend to use for structured output, either "openai" or "vllm", defaults to "openai"
+        generator_config: Configuration for the Generator instance (LLM model settings)
+        temperature: Temperature parameter for scaling classifier logits (default: 1.0)
+        max_concurrent: Maximum number of concurrent async calls to LLM (default: 15)
+        max_per_second: Maximum number of API calls per second for rate limiting (default: 10)
+        max_retries: Maximum number of retry attempts for failed API calls (default: 3)
+        multilabel: Flag indicating classification task type
+
+    Example:
+    --------
+    .. code-block::
+
+        from autointent.modules.scoring import LLMDescriptionScorer
+
+        # Initialize LLM scorer with OpenAI GPT
+        scorer = LLMDescriptionScorer(
+            temperature=1.0,
+            max_concurrent=10,
+            max_per_second=5,
+            max_retries=2
+        )
+
+        # Zero-shot classification with intent descriptions
+        descriptions = [
+            "User wants to book or reserve transportation like flights, trains, or hotels",
+            "User wants to cancel an existing booking or reservation",
+            "User asks about weather conditions or forecasts"
+        ]
+
+        # Fit using descriptions only (zero-shot approach)
+        scorer.fit([], [], descriptions)
+
+        # Make predictions on new utterances
+        test_utterances = ["Reserve a hotel room", "Delete my booking"]
+        probabilities = scorer.predict(test_utterances)
     """
 
     name = "description_llm"
@@ -68,8 +102,9 @@ def __init__(
         max_concurrent: PositiveInt | None = 15,
         max_per_second: PositiveInt = 10,
         max_retries: PositiveInt = 3,
+        multilabel: bool = False,
     ) -> None:
-        super().__init__(temperature=temperature)
+        super().__init__(temperature=temperature, multilabel=multilabel)
 
         self.generator_config = generator_config or {}
         self.max_concurrent = max_concurrent
@@ -92,12 +127,13 @@ def from_context(
             max_concurrent=max_concurrent,
             max_per_second=max_per_second,
             max_retries=max_retries,
+            multilabel=context.is_multilabel(),
         )
 
     def get_implicit_initialization_params(self) -> dict[str, Any]:
-        return {}
+        return {"multilabel": self._multilabel}
 
-    def _fit_implementation(self, utterances: list[str], descriptions: list[str]) -> None:
+    def _fit_implementation(self, descriptions: list[str]) -> None:
         """Fit the LLM scorer by initializing the generator and storing descriptions.
 
         Args: