Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions autointent/custom_types/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Types used throughout AutoIntent library."""

from ._types import (
FloatFromZeroToOne,
LabelType,
Expand Down
1 change: 1 addition & 0 deletions autointent/generation/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Some generative methods for enriching training datasets."""
38 changes: 38 additions & 0 deletions autointent/generation/chat_templates/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Chat templates used throughout :py:mod:`autointent.generation` module."""

from ._abstract import AbstractEvolution
from ._base_evolver import EvolutionChatTemplate
from ._base_synthesizer import BaseSynthesizerTemplate
from ._concrete import ConcreteEvolution
from ._evolution_templates_schemas import Message, Role
from ._formal import FormalEvolution
from ._funny import FunnyEvolution
from ._goofy import GoofyEvolution
from ._informal import InformalEvolution
from ._intent_descriptions import PromptDescription
from ._reasoning import ReasoningEvolution
from ._synthesizer_en import EnglishSynthesizerTemplate
from ._synthesizer_ru import RussianSynthesizerTemplate

EVOLUTION_NAMES = [evolution.name for evolution in EvolutionChatTemplate.__subclasses__()]

EVOLUTION_MAPPING = {evolution.name: evolution() for evolution in EvolutionChatTemplate.__subclasses__()}

__all__ = [
"EVOLUTION_MAPPING",
"EVOLUTION_NAMES",
"AbstractEvolution",
"BaseSynthesizerTemplate",
"ConcreteEvolution",
"EnglishSynthesizerTemplate",
"EvolutionChatTemplate",
"FormalEvolution",
"FunnyEvolution",
"GoofyEvolution",
"InformalEvolution",
"Message",
"PromptDescription",
"ReasoningEvolution",
"Role",
"RussianSynthesizerTemplate",
]
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

from typing import ClassVar

from autointent.generation.utterances.schemas import Message, Role

from .base import EvolutionChatTemplate
from ._base_evolver import EvolutionChatTemplate
from ._evolution_templates_schemas import Message, Role


class AbstractEvolution(EvolutionChatTemplate):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

from typing import ClassVar

from autointent.generation.utterances.schemas import Message, Role
from autointent.schemas import Intent

from ._evolution_templates_schemas import Message, Role


class EvolutionChatTemplate:
"""Base class for chat templates for evolution augmentation."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
from typing import ClassVar

from autointent import Dataset
from autointent.generation.utterances.schemas import Message, Role
from autointent.schemas import Intent

from ._evolution_templates_schemas import Message, Role


class BaseChatTemplate(ABC):
"""Base class."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

from typing import ClassVar

from autointent.generation.utterances.schemas import Message, Role

from .base import EvolutionChatTemplate
from ._base_evolver import EvolutionChatTemplate
from ._evolution_templates_schemas import Message, Role


class ConcreteEvolution(EvolutionChatTemplate):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

from typing import ClassVar

from autointent.generation.utterances.schemas import Message, Role

from .base import EvolutionChatTemplate
from ._base_evolver import EvolutionChatTemplate
from ._evolution_templates_schemas import Message, Role


class FormalEvolution(EvolutionChatTemplate):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

from typing import ClassVar

from autointent.generation.utterances.schemas import Message, Role

from .base import EvolutionChatTemplate
from ._base_evolver import EvolutionChatTemplate
from ._evolution_templates_schemas import Message, Role


class FunnyEvolution(EvolutionChatTemplate):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

from typing import ClassVar

from autointent.generation.utterances.schemas import Message, Role

from .base import EvolutionChatTemplate
from ._base_evolver import EvolutionChatTemplate
from ._evolution_templates_schemas import Message, Role


class GoofyEvolution(EvolutionChatTemplate):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

from typing import ClassVar

from autointent.generation.utterances.schemas import Message, Role

from .base import EvolutionChatTemplate
from ._base_evolver import EvolutionChatTemplate
from ._evolution_templates_schemas import Message, Role


class InformalEvolution(EvolutionChatTemplate):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""Prompt description."""
"""Prompt description configuration."""

from pydantic import BaseModel, field_validator

PROMPT_DESCRIPTION = """
Your task is to write a description of the intent.
Expand Down Expand Up @@ -55,3 +57,35 @@
description:

"""


class PromptDescription(BaseModel):
"""Prompt description configuration."""

text: str = PROMPT_DESCRIPTION
"""
The template for the prompt to generate descriptions for intents.
Should include placeholders for {intent_name} and {user_utterances}.
- `{intent_name}` will be replaced with the name of the intent.
- `{user_utterances}` will be replaced with the user utterances related to the intent.
- (optionally) `{regex_patterns}` will be replaced with the regular expressions that match user utterances.
"""

@classmethod
@field_validator("text")
def check_valid_prompt(cls, value: str) -> str:
"""Validate the prompt description template.

Args:
value: The prompt description template.

Returns:
The validated prompt description template.
"""
if value.find("{intent_name}") == -1 or value.find("{user_utterances}") == -1:
text_error = (
"The 'prompt_description' template must properly "
"include {intent_name} and {user_utterances} placeholders."
)
raise ValueError(text_error)
return value
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

from typing import ClassVar

from autointent.generation.utterances.schemas import Message, Role

from .base import EvolutionChatTemplate
from ._base_evolver import EvolutionChatTemplate
from ._evolution_templates_schemas import Message, Role


class ReasoningEvolution(EvolutionChatTemplate):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

from typing import ClassVar

from autointent.generation.utterances.schemas import Message, Role

from ._base import BaseSynthesizerTemplate
from ._base_synthesizer import BaseSynthesizerTemplate
from ._evolution_templates_schemas import Message, Role


class EnglishSynthesizerTemplate(BaseSynthesizerTemplate):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

from typing import ClassVar

from autointent.generation.utterances.schemas import Message, Role

from ._base import BaseSynthesizerTemplate
from ._base_synthesizer import BaseSynthesizerTemplate
from ._evolution_templates_schemas import Message, Role


class RussianSynthesizerTemplate(BaseSynthesizerTemplate):
Expand Down
5 changes: 5 additions & 0 deletions autointent/generation/intents/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Generative methods for enriching intents' metadata."""

from ._description_generation import generate_descriptions

__all__ = ["generate_descriptions"]
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from openai import AsyncOpenAI

from autointent import Dataset
from autointent.generation.intents.prompt_scheme import PromptDescription
from autointent.generation.chat_templates import PromptDescription
from autointent.schemas import Intent, Sample


Expand Down Expand Up @@ -59,9 +59,6 @@ async def create_intent_description(
user_utterances, and regex_patterns.
model_name: Identifier of the OpenAI model to use.

Returns:
Generated description of the intent.

Raises:
TypeError: If the model response is not a string.
"""
Expand Down Expand Up @@ -103,9 +100,6 @@ async def generate_intent_descriptions(
prompt: Template for model prompt with placeholders for intent_name,
user_utterances, and regex_patterns.
model_name: Name of the OpenAI model to use.

Returns:
List of intents with updated descriptions.
"""
tasks = []
for intent in intents:
Expand All @@ -131,28 +125,27 @@ async def generate_intent_descriptions(
return intents


def enhance_dataset_with_descriptions(
def generate_descriptions(
dataset: Dataset,
client: AsyncOpenAI,
prompt: PromptDescription,
model_name: str = "gpt-4o-mini",
model_name: str,
prompt: PromptDescription | None = None,
) -> Dataset:
"""Enhance a dataset by adding generated descriptions to its intents.
"""Add LLM-generated text descriptions to dataset's intents.

Args:
dataset: Dataset containing utterances and intents needing descriptions.
client: OpenAI client for generating descriptions.
prompt: Template for model prompt with placeholders for intent_name,
user_utterances, and regex_patterns.
model_name: OpenAI model identifier for generating descriptions.

Returns:
Dataset with enhanced intent descriptions.
"""
samples = []
for split in dataset.values():
samples.extend([Sample(**sample) for sample in split.to_list()])
intent_utterances = group_utterances_by_label(samples)
if prompt is None:
prompt = PromptDescription()
dataset.intents = asyncio.run(
generate_intent_descriptions(client, intent_utterances, dataset.intents, prompt, model_name),
)
Expand Down
37 changes: 0 additions & 37 deletions autointent/generation/intents/prompt_scheme.py

This file was deleted.

20 changes: 3 additions & 17 deletions autointent/generation/utterances/__init__.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,17 @@
"""Generative methods for enriching dataset with synthetic samples."""

from .balancer import DatasetBalancer
from .basic import EnglishSynthesizerTemplate, RussianSynthesizerTemplate, UtteranceGenerator
from .basic import UtteranceGenerator
from .evolution import (
AbstractEvolution,
ConcreteEvolution,
EvolutionChatTemplate,
FormalEvolution,
FunnyEvolution,
GoofyEvolution,
IncrementalUtteranceEvolver,
InformalEvolution,
ReasoningEvolution,
UtteranceEvolver,
)
from .generator import Generator

__all__ = [
"AbstractEvolution",
"ConcreteEvolution",
"DatasetBalancer",
"EvolutionChatTemplate",
"FormalEvolution",
"FunnyEvolution",
"Generator",
"GoofyEvolution",
"IncrementalUtteranceEvolver",
"InformalEvolution",
"ReasoningEvolution",
"UtteranceEvolver",
"UtteranceGenerator",
]
Loading