Skip to content

Commit a6c064c

Browse files
authored
Fix/usage examples for new scorers (#254)
* small fix * add docstring to cnn scorer * add docstring for biencoder * add docstring for cross encoder * add docstring to llm encoder * add docstring to rnn scorer * add docstring to generator class * small fix * try to fix zero shot scorers * try to fix tests
1 parent 102f3b5 commit a6c064c

File tree

11 files changed

+313
-52
lines changed

11 files changed

+313
-52
lines changed

autointent/generation/_generator.py

Lines changed: 65 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,73 @@ def __init__(self, max_retries: int, messages: list[Message]) -> None:
4646

4747

4848
class Generator:
49-
"""Wrapper class for accessing OpenAI API.
49+
"""Wrapper class for accessing OpenAI-compatible API endpoints for LLM generation.
50+
51+
This class provides a unified interface for interacting with OpenAI-compatible APIs,
52+
supporting both synchronous and asynchronous operations. It includes built-in caching,
53+
retry logic for structured output, and automatic environment variable detection.
54+
55+
The Generator can work with various OpenAI-compatible services including:
56+
- OpenAI's official API
57+
- Azure OpenAI
58+
- Local inference servers (vLLM, Ollama, etc.)
59+
- Other OpenAI-compatible endpoints
60+
61+
Environment Variables:
62+
The following environment variables can be used for configuration:
63+
64+
**OPENAI_API_KEY** (required):
65+
API key for authentication with the OpenAI-compatible service.
66+
This is required for most API endpoints.
67+
68+
**OPENAI_BASE_URL** (optional):
69+
Base URL for the API endpoint. If not provided, defaults to OpenAI's API.
70+
- https://api.openai.com/v1 (OpenAI official)
71+
- https://your-org.openai.azure.com (Azure OpenAI)
72+
- http://localhost:8000/v1 (local vLLM server)
73+
74+
**OPENAI_MODEL_NAME** (optional):
75+
Default model name to use if not specified in the constructor.
76+
Examples: "gpt-4o-mini", "gpt-3.5-turbo", "claude-3-haiku"
5077
5178
Args:
52-
base_url: HTTP-endpoint for sending API requests to OpenAI API compatible server.
53-
Omit this to infer ``OPENAI_BASE_URL`` from environment.
54-
model_name: Name of LLM. Omit this to infer ``OPENAI_MODEL_NAME`` from environment.
55-
**generation_params: kwargs that will be sent with a request to the endpoint.
79+
base_url: HTTP endpoint for API requests. If None, uses OPENAI_BASE_URL environment variable.
80+
model_name: Name of the language model. If None, uses OPENAI_MODEL_NAME environment variable.
81+
use_cache: Whether to enable caching for structured outputs (default: True).
82+
client_params: Additional parameters passed to the OpenAI client constructor.
83+
**generation_params: Additional parameters passed to the chat completion API calls.
84+
85+
Example:
86+
--------
87+
.. code-block::
88+
89+
import os
90+
from autointent.generation import Generator
91+
92+
# Method 1: Using environment variables
93+
# Set these in your environment or .env file:
94+
# OPENAI_API_KEY=your-api-key-here
95+
# OPENAI_MODEL_NAME=gpt-4o-mini
96+
# OPENAI_BASE_URL=https://api.openai.com/v1 # optional
97+
98+
generator = Generator()
99+
100+
# Method 2: Explicit configuration
101+
generator = Generator(
102+
base_url="https://api.openai.com/v1",
103+
model_name="gpt-4o-mini",
104+
temperature=0.7,
105+
max_tokens=1000
106+
)
107+
108+
# Basic chat completion
109+
from autointent.generation.chat_templates import Message, Role
110+
111+
messages = [{"role": Role.USER, "content": "Hello, how are you?"}]
112+
response = generator.get_chat_completion(messages)
113+
114+
Raises:
115+
ValueError: If model_name is not provided and OPENAI_MODEL_NAME is not set.
56116
"""
57117

58118
_dump_data_filename = "init_params.json"

autointent/modules/base/_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class BaseModule(ABC):
3030
supports_multiclass: bool
3131
"""Whether the module supports multiclass classification"""
3232
name: str
33-
"""Name of the module."""
33+
"""Name of the module to reference in search space configuration."""
3434

3535
@property
3636
def trial_name(self) -> str:

autointent/modules/scoring/_description/base.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,15 @@ class BaseDescriptionScorer(BaseScorer, ABC):
2222
2323
Args:
2424
temperature: Temperature parameter for scaling logits, defaults to 1.0
25+
multilabe: Flag indicating classification task type
2526
"""
2627

2728
supports_multiclass = True
2829
supports_multilabel = True
2930

30-
def __init__(self, temperature: PositiveFloat = 1.0) -> None:
31+
def __init__(self, temperature: PositiveFloat = 1.0, multilabel: bool = False) -> None:
3132
self.temperature = temperature
33+
self._multilabel = multilabel
3234
self._validate_temperature()
3335

3436
def _validate_temperature(self) -> None:
@@ -82,16 +84,14 @@ def fit(
8284
Raises:
8385
ValueError: If descriptions contain None values
8486
"""
85-
self._validate_task(labels)
8687
self._validate_descriptions(descriptions)
87-
self._fit_implementation(utterances, descriptions)
88+
self._fit_implementation(descriptions)
8889

8990
@abstractmethod
90-
def _fit_implementation(self, utterances: list[str], descriptions: list[str]) -> None:
91+
def _fit_implementation(self, descriptions: list[str]) -> None:
9192
"""Implementation-specific fitting logic.
9293
9394
Args:
94-
utterances: List of utterances to process
9595
descriptions: List of intent descriptions
9696
"""
9797

autointent/modules/scoring/_description/bi_encoder.py

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,46 @@
1313

1414

1515
class BiEncoderDescriptionScorer(BaseDescriptionScorer):
16-
"""Bi-encoder description scorer that embeds utterances and descriptions separately.
16+
"""Bi-encoder description scorer for zero-shot intent classification.
1717
18-
This scorer uses a bi-encoder architecture where both utterances and descriptions
19-
are embedded separately, then cosine similarity is computed between them.
18+
This scorer uses a bi-encoder architecture where utterances and intent descriptions
19+
are embedded separately using the same encoder model, then cosine similarity is
20+
computed between utterance embeddings and description embeddings. This is a
21+
zero-shot approach that doesn't require training examples, only intent descriptions.
22+
23+
The bi-encoder approach is efficient for inference as descriptions are embedded
24+
once during fitting, and only utterances need to be embedded during prediction.
2025
2126
Args:
22-
embedder_config: Config of the embedder model
23-
temperature: Temperature parameter for scaling logits, defaults to 1.0
27+
embedder_config: Configuration for the embedder model (HuggingFace model name or config)
28+
temperature: Temperature parameter for scaling logits before softmax/sigmoid (default: 1.0)
29+
multilabel: Flag indicating classification task type
30+
31+
Example:
32+
--------
33+
.. testcode::
34+
35+
from autointent.modules.scoring import BiEncoderDescriptionScorer
36+
37+
# Initialize bi-encoder scorer
38+
scorer = BiEncoderDescriptionScorer(
39+
embedder_config="sentence-transformers/all-MiniLM-L6-v2",
40+
temperature=0.8
41+
)
42+
43+
# Zero-shot classification with intent descriptions
44+
descriptions = [
45+
"User wants to book or reserve transportation like flights, trains, or hotels",
46+
"User wants to cancel an existing booking or reservation",
47+
"User asks about weather conditions or forecasts"
48+
]
49+
50+
# Fit using descriptions only (zero-shot approach)
51+
scorer.fit([], [], descriptions)
52+
53+
# Make predictions on new utterances
54+
test_utterances = ["Reserve a hotel room", "Delete my booking"]
55+
probabilities = scorer.predict(test_utterances)
2456
"""
2557

2658
name = "description_bi"
@@ -29,8 +61,9 @@ def __init__(
2961
self,
3062
embedder_config: EmbedderConfig | str | dict[str, Any] | None = None,
3163
temperature: PositiveFloat = 1.0,
64+
multilabel: bool = False,
3265
) -> None:
33-
super().__init__(temperature)
66+
super().__init__(temperature=temperature, multilabel=multilabel)
3467
self.embedder_config = EmbedderConfig.from_search_config(embedder_config)
3568
self._embedder: Embedder | None = None
3669
self._description_vectors: NDArray[Any] | None = None
@@ -55,16 +88,13 @@ def from_context(
5588
if embedder_config is None:
5689
embedder_config = context.resolve_embedder()
5790

58-
return cls(
59-
temperature=temperature,
60-
embedder_config=embedder_config,
61-
)
91+
return cls(temperature=temperature, embedder_config=embedder_config, multilabel=context.is_multilabel())
6292

6393
def get_implicit_initialization_params(self) -> dict[str, Any]:
6494
"""Get implicit initialization parameters for this scorer."""
65-
return {"embedder_config": self.embedder_config.model_dump()}
95+
return {"embedder_config": self.embedder_config.model_dump(), "multilabel": self._multilabel}
6696

67-
def _fit_implementation(self, utterances: list[str], descriptions: list[str]) -> None:
97+
def _fit_implementation(self, descriptions: list[str]) -> None:
6898
"""Fit the bi-encoder by embedding descriptions.
6999
70100
Args:

autointent/modules/scoring/_description/cross_encoder.py

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,48 @@
1313

1414

1515
class CrossEncoderDescriptionScorer(BaseDescriptionScorer):
16-
"""Cross-encoder description scorer that directly computes similarity between pairs.
16+
"""Cross-encoder description scorer for zero-shot intent classification.
1717
1818
This scorer uses a cross-encoder architecture that directly computes similarity
19-
between each utterance-description pair.
19+
scores between each utterance-description pair by passing them together through
20+
a transformer model. Unlike bi-encoders that embed texts separately, cross-encoders
21+
can capture more complex interactions between utterances and descriptions, often
22+
leading to higher accuracy at the cost of computational efficiency.
23+
24+
This is a zero-shot approach that doesn't require training examples, only intent
25+
descriptions. The cross-encoder processes each utterance-description pair separately
26+
during inference, making it more computationally intensive but potentially more accurate.
2027
2128
Args:
22-
cross_encoder_config: Config of the cross-encoder model
23-
temperature: Temperature parameter for scaling logits, defaults to 1.0
29+
cross_encoder_config: Configuration for the cross-encoder model (HuggingFace model name or config)
30+
temperature: Temperature parameter for scaling logits before softmax/sigmoid (default: 1.0)
31+
multilabel: Flag indicating classification task type
32+
33+
Example:
34+
--------
35+
.. testcode::
36+
37+
from autointent.modules.scoring import CrossEncoderDescriptionScorer
38+
39+
# Initialize cross-encoder scorer
40+
scorer = CrossEncoderDescriptionScorer(
41+
cross_encoder_config="cross-encoder/ms-marco-MiniLM-L-6-v2",
42+
temperature=1.2
43+
)
44+
45+
# Zero-shot classification with intent descriptions
46+
descriptions = [
47+
"User wants to book or reserve transportation like flights, trains, or hotels",
48+
"User wants to cancel an existing booking or reservation",
49+
"User asks about weather conditions or forecasts"
50+
]
51+
52+
# Fit using descriptions only (zero-shot approach)
53+
scorer.fit([], [], descriptions)
54+
55+
# Make predictions on new utterances
56+
test_utterances = ["Reserve a hotel room", "Delete my booking"]
57+
probabilities = scorer.predict(test_utterances)
2458
"""
2559

2660
name = "description_cross"
@@ -29,8 +63,9 @@ def __init__(
2963
self,
3064
cross_encoder_config: CrossEncoderConfig | str | dict[str, Any] | None = None,
3165
temperature: PositiveFloat = 1.0,
66+
multilabel: bool = False,
3267
) -> None:
33-
super().__init__(temperature)
68+
super().__init__(temperature=temperature, multilabel=multilabel)
3469
self.cross_encoder_config = CrossEncoderConfig.from_search_config(cross_encoder_config)
3570
self._cross_encoder: Ranker | None = None
3671
self._description_texts: list[str] | None = None
@@ -56,15 +91,14 @@ def from_context(
5691
cross_encoder_config = context.resolve_ranker()
5792

5893
return cls(
59-
temperature=temperature,
60-
cross_encoder_config=cross_encoder_config,
94+
temperature=temperature, cross_encoder_config=cross_encoder_config, multilabel=context.is_multilabel()
6195
)
6296

6397
def get_implicit_initialization_params(self) -> dict[str, Any]:
6498
"""Get implicit initialization parameters for this scorer."""
65-
return {"cross_encoder_config": self.cross_encoder_config.model_dump()}
99+
return {"cross_encoder_config": self.cross_encoder_config.model_dump(), "multilabel": self._multilabel}
66100

67-
def _fit_implementation(self, utterances: list[str], descriptions: list[str]) -> None:
101+
def _fit_implementation(self, descriptions: list[str]) -> None:
68102
"""Fit the cross-encoder by storing descriptions.
69103
70104
Args:

autointent/modules/scoring/_description/llm_encoder.py

Lines changed: 51 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,21 +42,55 @@ class IntentCategorization(BaseModel):
4242

4343

4444
class LLMDescriptionScorer(BaseDescriptionScorer):
45-
"""LLM-based description scorer that uses structured output to categorize intents.
45+
"""LLM-based description scorer for zero-shot intent classification using structured output.
4646
47-
This scorer uses a language model with structured output to categorize intent descriptions
48-
into three categories based on their probability to correspond to a given text sample:
49-
- Most probable (probability 1.0)
50-
- Promising but not confident (probability 0.5)
51-
- Unlikely (probability 0.0)
47+
This scorer uses a Large Language Model (LLM) with structured output to perform
48+
zero-shot intent classification. The LLM is prompted to categorize intent descriptions
49+
into three probability levels for each utterance:
50+
- Most probable (probability 1.0): Intents that are most likely to match the utterance
51+
- Promising (probability 0.5): Intents that are plausible but less confident
52+
- Unlikely (probability 0.0): All other intents (implicit)
53+
54+
This approach leverages the reasoning capabilities of LLMs to understand complex
55+
relationships between utterances and intent descriptions, potentially achieving
56+
high accuracy for nuanced classification tasks. However, it requires API access
57+
to LLM services and can be slower and more expensive than encoder-based methods.
5258
5359
Args:
54-
generator_config: Configuration for the Generator instance
55-
temperature: Temperature parameter for scaling classifier logits, defaults to 1.0
56-
max_concurrent: Maximum number of concurrent async calls to LLM, defaults to 15
57-
max_per_second: Maximum number of API calls per second, defaults to 10
58-
max_retries: Maximum number of retry attempts for failed validations, defaults to 3
59-
backend: Backend to use for structured output, either "openai" or "vllm", defaults to "openai"
60+
generator_config: Configuration for the Generator instance (LLM model settings)
61+
temperature: Temperature parameter for scaling classifier logits (default: 1.0)
62+
max_concurrent: Maximum number of concurrent async calls to LLM (default: 15)
63+
max_per_second: Maximum number of API calls per second for rate limiting (default: 10)
64+
max_retries: Maximum number of retry attempts for failed API calls (default: 3)
65+
multilabel: Flag indicating classification task type
66+
67+
Example:
68+
--------
69+
.. code-block::
70+
71+
from autointent.modules.scoring import LLMDescriptionScorer
72+
73+
# Initialize LLM scorer with OpenAI GPT
74+
scorer = LLMDescriptionScorer(
75+
temperature=1.0,
76+
max_concurrent=10,
77+
max_per_second=5,
78+
max_retries=2
79+
)
80+
81+
# Zero-shot classification with intent descriptions
82+
descriptions = [
83+
"User wants to book or reserve transportation like flights, trains, or hotels",
84+
"User wants to cancel an existing booking or reservation",
85+
"User asks about weather conditions or forecasts"
86+
]
87+
88+
# Fit using descriptions only (zero-shot approach)
89+
scorer.fit([], [], descriptions)
90+
91+
# Make predictions on new utterances
92+
test_utterances = ["Reserve a hotel room", "Delete my booking"]
93+
probabilities = scorer.predict(test_utterances)
6094
"""
6195

6296
name = "description_llm"
@@ -68,8 +102,9 @@ def __init__(
68102
max_concurrent: PositiveInt | None = 15,
69103
max_per_second: PositiveInt = 10,
70104
max_retries: PositiveInt = 3,
105+
multilabel: bool = False,
71106
) -> None:
72-
super().__init__(temperature=temperature)
107+
super().__init__(temperature=temperature, multilabel=multilabel)
73108

74109
self.generator_config = generator_config or {}
75110
self.max_concurrent = max_concurrent
@@ -92,12 +127,13 @@ def from_context(
92127
max_concurrent=max_concurrent,
93128
max_per_second=max_per_second,
94129
max_retries=max_retries,
130+
multilabel=context.is_multilabel(),
95131
)
96132

97133
def get_implicit_initialization_params(self) -> dict[str, Any]:
98-
return {}
134+
return {"multilabel": self._multilabel}
99135

100-
def _fit_implementation(self, utterances: list[str], descriptions: list[str]) -> None:
136+
def _fit_implementation(self, descriptions: list[str]) -> None:
101137
"""Fit the LLM scorer by initializing the generator and storing descriptions.
102138
103139
Args:

0 commit comments

Comments
 (0)