diff --git a/autointent/custom_types/__init__.py b/autointent/custom_types/__init__.py index 9c9c2e1b3..f68ddec2f 100644 --- a/autointent/custom_types/__init__.py +++ b/autointent/custom_types/__init__.py @@ -1,3 +1,5 @@ +"""Types used throughout AutoIntent library.""" + from ._types import ( FloatFromZeroToOne, LabelType, diff --git a/autointent/generation/__init__.py b/autointent/generation/__init__.py index e69de29bb..096e19f7d 100644 --- a/autointent/generation/__init__.py +++ b/autointent/generation/__init__.py @@ -0,0 +1 @@ +"""Some generative methods for enriching training datasets.""" diff --git a/autointent/generation/chat_templates/__init__.py b/autointent/generation/chat_templates/__init__.py new file mode 100644 index 000000000..663702507 --- /dev/null +++ b/autointent/generation/chat_templates/__init__.py @@ -0,0 +1,38 @@ +"""Chat templates used throughout :py:mod:`autointent.generation` module.""" + +from ._abstract import AbstractEvolution +from ._base_evolver import EvolutionChatTemplate +from ._base_synthesizer import BaseSynthesizerTemplate +from ._concrete import ConcreteEvolution +from ._evolution_templates_schemas import Message, Role +from ._formal import FormalEvolution +from ._funny import FunnyEvolution +from ._goofy import GoofyEvolution +from ._informal import InformalEvolution +from ._intent_descriptions import PromptDescription +from ._reasoning import ReasoningEvolution +from ._synthesizer_en import EnglishSynthesizerTemplate +from ._synthesizer_ru import RussianSynthesizerTemplate + +EVOLUTION_NAMES = [evolution.name for evolution in EvolutionChatTemplate.__subclasses__()] + +EVOLUTION_MAPPING = {evolution.name: evolution() for evolution in EvolutionChatTemplate.__subclasses__()} + +__all__ = [ + "EVOLUTION_MAPPING", + "EVOLUTION_NAMES", + "AbstractEvolution", + "BaseSynthesizerTemplate", + "ConcreteEvolution", + "EnglishSynthesizerTemplate", + "EvolutionChatTemplate", + "FormalEvolution", + "FunnyEvolution", + "GoofyEvolution", + "InformalEvolution", + "Message", + "PromptDescription", + "ReasoningEvolution", + "Role", + "RussianSynthesizerTemplate", +] diff --git a/autointent/generation/utterances/evolution/chat_templates/abstract.py b/autointent/generation/chat_templates/_abstract.py similarity index 93% rename from autointent/generation/utterances/evolution/chat_templates/abstract.py rename to autointent/generation/chat_templates/_abstract.py index 6698e027d..3a0392acd 100644 --- a/autointent/generation/utterances/evolution/chat_templates/abstract.py +++ b/autointent/generation/chat_templates/_abstract.py @@ -2,9 +2,8 @@ from typing import ClassVar -from autointent.generation.utterances.schemas import Message, Role - -from .base import EvolutionChatTemplate +from ._base_evolver import EvolutionChatTemplate +from ._evolution_templates_schemas import Message, Role class AbstractEvolution(EvolutionChatTemplate): diff --git a/autointent/generation/utterances/evolution/chat_templates/base.py b/autointent/generation/chat_templates/_base_evolver.py similarity index 92% rename from autointent/generation/utterances/evolution/chat_templates/base.py rename to autointent/generation/chat_templates/_base_evolver.py index 3f83feaf4..4ea6b3690 100644 --- a/autointent/generation/utterances/evolution/chat_templates/base.py +++ b/autointent/generation/chat_templates/_base_evolver.py @@ -2,9 +2,10 @@ from typing import ClassVar -from autointent.generation.utterances.schemas import Message, Role from autointent.schemas import Intent +from ._evolution_templates_schemas import Message, Role + class EvolutionChatTemplate: """Base class for chat templates for evolution augmentation.""" diff --git a/autointent/generation/utterances/basic/chat_templates/_base.py b/autointent/generation/chat_templates/_base_synthesizer.py similarity index 98% rename from autointent/generation/utterances/basic/chat_templates/_base.py rename to autointent/generation/chat_templates/_base_synthesizer.py index c14bc07eb..406440377 100644 --- a/autointent/generation/utterances/basic/chat_templates/_base.py +++ b/autointent/generation/chat_templates/_base_synthesizer.py @@ -6,9 +6,10 @@ from typing import ClassVar from autointent import Dataset -from autointent.generation.utterances.schemas import Message, Role from autointent.schemas import Intent +from ._evolution_templates_schemas import Message, Role + class BaseChatTemplate(ABC): """Base class.""" diff --git a/autointent/generation/utterances/evolution/chat_templates/concrete.py b/autointent/generation/chat_templates/_concrete.py similarity index 93% rename from autointent/generation/utterances/evolution/chat_templates/concrete.py rename to autointent/generation/chat_templates/_concrete.py index b6cf984e2..1ae2749b8 100644 --- a/autointent/generation/utterances/evolution/chat_templates/concrete.py +++ b/autointent/generation/chat_templates/_concrete.py @@ -2,9 +2,8 @@ from typing import ClassVar -from autointent.generation.utterances.schemas import Message, Role - -from .base import EvolutionChatTemplate +from ._base_evolver import EvolutionChatTemplate +from ._evolution_templates_schemas import Message, Role class ConcreteEvolution(EvolutionChatTemplate): diff --git a/autointent/generation/utterances/schemas.py b/autointent/generation/chat_templates/_evolution_templates_schemas.py similarity index 100% rename from autointent/generation/utterances/schemas.py rename to autointent/generation/chat_templates/_evolution_templates_schemas.py diff --git a/autointent/generation/utterances/evolution/chat_templates/formal.py b/autointent/generation/chat_templates/_formal.py similarity index 93% rename from autointent/generation/utterances/evolution/chat_templates/formal.py rename to autointent/generation/chat_templates/_formal.py index 9ba0d5364..6bf4f92d3 100644 --- a/autointent/generation/utterances/evolution/chat_templates/formal.py +++ b/autointent/generation/chat_templates/_formal.py @@ -2,9 +2,8 @@ from typing import ClassVar -from autointent.generation.utterances.schemas import Message, Role - -from .base import EvolutionChatTemplate +from ._base_evolver import EvolutionChatTemplate +from ._evolution_templates_schemas import Message, Role class FormalEvolution(EvolutionChatTemplate): diff --git a/autointent/generation/utterances/evolution/chat_templates/funny.py b/autointent/generation/chat_templates/_funny.py similarity index 93% rename from autointent/generation/utterances/evolution/chat_templates/funny.py rename to autointent/generation/chat_templates/_funny.py index 2b799a3d6..f251ca064 100644 --- a/autointent/generation/utterances/evolution/chat_templates/funny.py +++ b/autointent/generation/chat_templates/_funny.py @@ -2,9 +2,8 @@ from typing import ClassVar -from autointent.generation.utterances.schemas import Message, Role - -from .base import EvolutionChatTemplate +from ._base_evolver import EvolutionChatTemplate +from ._evolution_templates_schemas import Message, Role class FunnyEvolution(EvolutionChatTemplate): diff --git a/autointent/generation/utterances/evolution/chat_templates/goofy.py b/autointent/generation/chat_templates/_goofy.py similarity index 93% rename from autointent/generation/utterances/evolution/chat_templates/goofy.py rename to autointent/generation/chat_templates/_goofy.py index 5650eba4f..2e0e7d669 100644 --- a/autointent/generation/utterances/evolution/chat_templates/goofy.py +++ b/autointent/generation/chat_templates/_goofy.py @@ -2,9 +2,8 @@ from typing import ClassVar -from autointent.generation.utterances.schemas import Message, Role - -from .base import EvolutionChatTemplate +from ._base_evolver import EvolutionChatTemplate +from ._evolution_templates_schemas import Message, Role class GoofyEvolution(EvolutionChatTemplate): diff --git a/autointent/generation/utterances/evolution/chat_templates/informal.py b/autointent/generation/chat_templates/_informal.py similarity index 93% rename from autointent/generation/utterances/evolution/chat_templates/informal.py rename to autointent/generation/chat_templates/_informal.py index ee3debeb3..2b8186ef5 100644 --- a/autointent/generation/utterances/evolution/chat_templates/informal.py +++ b/autointent/generation/chat_templates/_informal.py @@ -2,9 +2,8 @@ from typing import ClassVar -from autointent.generation.utterances.schemas import Message, Role - -from .base import EvolutionChatTemplate +from ._base_evolver import EvolutionChatTemplate +from ._evolution_templates_schemas import Message, Role class InformalEvolution(EvolutionChatTemplate): diff --git a/autointent/generation/intents/prompts.py b/autointent/generation/chat_templates/_intent_descriptions.py similarity index 52% rename from autointent/generation/intents/prompts.py rename to autointent/generation/chat_templates/_intent_descriptions.py index c8b6356e2..09d32d571 100644 --- a/autointent/generation/intents/prompts.py +++ b/autointent/generation/chat_templates/_intent_descriptions.py @@ -1,4 +1,6 @@ -"""Prompt description.""" +"""Prompt description configuration.""" + +from pydantic import BaseModel, field_validator PROMPT_DESCRIPTION = """ Your task is to write a description of the intent. @@ -55,3 +57,35 @@ description: """ + + +class PromptDescription(BaseModel): + """Prompt description configuration.""" + + text: str = PROMPT_DESCRIPTION + """ + The template for the prompt to generate descriptions for intents. + Should include placeholders for {intent_name} and {user_utterances}. + - `{intent_name}` will be replaced with the name of the intent. + - `{user_utterances}` will be replaced with the user utterances related to the intent. + - (optionally) `{regex_patterns}` will be replaced with the regular expressions that match user utterances. + """ + + @classmethod + @field_validator("text") + def check_valid_prompt(cls, value: str) -> str: + """Validate the prompt description template. + + Args: + value: The prompt description template. + + Returns: + The validated prompt description template. + """ + if value.find("{intent_name}") == -1 or value.find("{user_utterances}") == -1: + text_error = ( + "The 'prompt_description' template must properly " + "include {intent_name} and {user_utterances} placeholders." + ) + raise ValueError(text_error) + return value diff --git a/autointent/generation/utterances/evolution/chat_templates/reasoning.py b/autointent/generation/chat_templates/_reasoning.py similarity index 93% rename from autointent/generation/utterances/evolution/chat_templates/reasoning.py rename to autointent/generation/chat_templates/_reasoning.py index 283c24515..f40148874 100644 --- a/autointent/generation/utterances/evolution/chat_templates/reasoning.py +++ b/autointent/generation/chat_templates/_reasoning.py @@ -2,9 +2,8 @@ from typing import ClassVar -from autointent.generation.utterances.schemas import Message, Role - -from .base import EvolutionChatTemplate +from ._base_evolver import EvolutionChatTemplate +from ._evolution_templates_schemas import Message, Role class ReasoningEvolution(EvolutionChatTemplate): diff --git a/autointent/generation/utterances/basic/chat_templates/_synthesizer_en.py b/autointent/generation/chat_templates/_synthesizer_en.py similarity index 97% rename from autointent/generation/utterances/basic/chat_templates/_synthesizer_en.py rename to autointent/generation/chat_templates/_synthesizer_en.py index 62914bd8f..e7ccc063f 100644 --- a/autointent/generation/utterances/basic/chat_templates/_synthesizer_en.py +++ b/autointent/generation/chat_templates/_synthesizer_en.py @@ -2,9 +2,8 @@ from typing import ClassVar -from autointent.generation.utterances.schemas import Message, Role - -from ._base import BaseSynthesizerTemplate +from ._base_synthesizer import BaseSynthesizerTemplate +from ._evolution_templates_schemas import Message, Role class EnglishSynthesizerTemplate(BaseSynthesizerTemplate): diff --git a/autointent/generation/utterances/basic/chat_templates/_synthesizer_ru.py b/autointent/generation/chat_templates/_synthesizer_ru.py similarity index 97% rename from autointent/generation/utterances/basic/chat_templates/_synthesizer_ru.py rename to autointent/generation/chat_templates/_synthesizer_ru.py index 54416060b..838e32da2 100644 --- a/autointent/generation/utterances/basic/chat_templates/_synthesizer_ru.py +++ b/autointent/generation/chat_templates/_synthesizer_ru.py @@ -2,9 +2,8 @@ from typing import ClassVar -from autointent.generation.utterances.schemas import Message, Role - -from ._base import BaseSynthesizerTemplate +from ._base_synthesizer import BaseSynthesizerTemplate +from ._evolution_templates_schemas import Message, Role class RussianSynthesizerTemplate(BaseSynthesizerTemplate): diff --git a/autointent/generation/intents/__init__.py b/autointent/generation/intents/__init__.py index e69de29bb..a34321822 100644 --- a/autointent/generation/intents/__init__.py +++ b/autointent/generation/intents/__init__.py @@ -0,0 +1,5 @@ +"""Generative methods for enriching intents' metadata.""" + +from ._description_generation import generate_descriptions + +__all__ = ["generate_descriptions"] diff --git a/autointent/generation/intents/description_generation.py b/autointent/generation/intents/_description_generation.py similarity index 92% rename from autointent/generation/intents/description_generation.py rename to autointent/generation/intents/_description_generation.py index bc1a477c7..1ce9eb768 100644 --- a/autointent/generation/intents/description_generation.py +++ b/autointent/generation/intents/_description_generation.py @@ -12,7 +12,7 @@ from openai import AsyncOpenAI from autointent import Dataset -from autointent.generation.intents.prompt_scheme import PromptDescription +from autointent.generation.chat_templates import PromptDescription from autointent.schemas import Intent, Sample @@ -59,9 +59,6 @@ async def create_intent_description( user_utterances, and regex_patterns. model_name: Identifier of the OpenAI model to use. - Returns: - Generated description of the intent. - Raises: TypeError: If the model response is not a string. """ @@ -103,9 +100,6 @@ async def generate_intent_descriptions( prompt: Template for model prompt with placeholders for intent_name, user_utterances, and regex_patterns. model_name: Name of the OpenAI model to use. - - Returns: - List of intents with updated descriptions. """ tasks = [] for intent in intents: @@ -131,13 +125,13 @@ async def generate_intent_descriptions( return intents -def enhance_dataset_with_descriptions( +def generate_descriptions( dataset: Dataset, client: AsyncOpenAI, - prompt: PromptDescription, - model_name: str = "gpt-4o-mini", + model_name: str, + prompt: PromptDescription | None = None, ) -> Dataset: - """Enhance a dataset by adding generated descriptions to its intents. + """Add LLM-generated text descriptions to dataset's intents. Args: dataset: Dataset containing utterances and intents needing descriptions. @@ -145,14 +139,13 @@ def enhance_dataset_with_descriptions( prompt: Template for model prompt with placeholders for intent_name, user_utterances, and regex_patterns. model_name: OpenAI model identifier for generating descriptions. - - Returns: - Dataset with enhanced intent descriptions. """ samples = [] for split in dataset.values(): samples.extend([Sample(**sample) for sample in split.to_list()]) intent_utterances = group_utterances_by_label(samples) + if prompt is None: + prompt = PromptDescription() dataset.intents = asyncio.run( generate_intent_descriptions(client, intent_utterances, dataset.intents, prompt, model_name), ) diff --git a/autointent/generation/intents/prompt_scheme.py b/autointent/generation/intents/prompt_scheme.py deleted file mode 100644 index 90384b771..000000000 --- a/autointent/generation/intents/prompt_scheme.py +++ /dev/null @@ -1,37 +0,0 @@ -"""Prompt description configuration.""" - -from pydantic import BaseModel, field_validator - -from autointent.generation.intents.prompts import PROMPT_DESCRIPTION - - -class PromptDescription(BaseModel): - """Prompt description configuration.""" - - text: str = PROMPT_DESCRIPTION - """ - The template for the prompt to generate descriptions for intents. - Should include placeholders for {intent_name} and {user_utterances}. - - `{intent_name}` will be replaced with the name of the intent. - - `{user_utterances}` will be replaced with the user utterances related to the intent. - - (optionally) `{regex_patterns}` will be replaced with the regular expressions that match user utterances. - """ - - @classmethod - @field_validator("text") - def check_valid_prompt(cls, value: str) -> str: - """Validate the prompt description template. - - Args: - value: The prompt description template. - - Returns: - The validated prompt description template. - """ - if value.find("{intent_name}") == -1 or value.find("{user_utterances}") == -1: - text_error = ( - "The 'prompt_description' template must properly " - "include {intent_name} and {user_utterances} placeholders." - ) - raise ValueError(text_error) - return value diff --git a/autointent/generation/utterances/__init__.py b/autointent/generation/utterances/__init__.py index 533151455..91a66aec7 100644 --- a/autointent/generation/utterances/__init__.py +++ b/autointent/generation/utterances/__init__.py @@ -1,31 +1,17 @@ +"""Generative methods for enriching dataset with synthetic samples.""" + from .balancer import DatasetBalancer -from .basic import EnglishSynthesizerTemplate, RussianSynthesizerTemplate, UtteranceGenerator +from .basic import UtteranceGenerator from .evolution import ( - AbstractEvolution, - ConcreteEvolution, - EvolutionChatTemplate, - FormalEvolution, - FunnyEvolution, - GoofyEvolution, IncrementalUtteranceEvolver, - InformalEvolution, - ReasoningEvolution, UtteranceEvolver, ) from .generator import Generator __all__ = [ - "AbstractEvolution", - "ConcreteEvolution", "DatasetBalancer", - "EvolutionChatTemplate", - "FormalEvolution", - "FunnyEvolution", "Generator", - "GoofyEvolution", "IncrementalUtteranceEvolver", - "InformalEvolution", - "ReasoningEvolution", "UtteranceEvolver", "UtteranceGenerator", ] diff --git a/autointent/generation/utterances/balancer.py b/autointent/generation/utterances/balancer.py index baaf68caf..2f7dd8180 100644 --- a/autointent/generation/utterances/balancer.py +++ b/autointent/generation/utterances/balancer.py @@ -7,7 +7,7 @@ from autointent import Dataset from autointent.custom_types import Split -from autointent.generation.utterances.basic.chat_templates._base import BaseSynthesizerTemplate +from autointent.generation.chat_templates import BaseSynthesizerTemplate from autointent.generation.utterances.basic.utterance_generator import UtteranceGenerator from autointent.generation.utterances.generator import Generator @@ -15,27 +15,26 @@ class DatasetBalancer: - """Class for balancing dataset through example augmentation.""" + """Balance dataset's classes distribution. - def __init__( + If your dataset is unbalanced, you can add LLM-generated samples. + This method uses :py:class:`autointent.generation.utterances.UtteranceGenerator` under the hood. + + Args: + generator (Generator): The generator object used to create utterances. + prompt_maker (Callable[[Intent, int], list[Message]]): A callable that creates prompts for the generator. + async_mode (bool, optional): Whether to run the generator in asynchronous mode. Defaults to False. + max_samples_per_class (int | None, optional): The maximum number of samples per class. + Must be a positive integer or None. Defaults to None. + """ + + def __init__( # noqa: D107 self, generator: Generator, prompt_maker: BaseSynthesizerTemplate, async_mode: bool = False, max_samples_per_class: int | None = None, ) -> None: - """Initialize the UtteranceBalancer. - - Args: - generator (Generator): The generator object used to create utterances. - prompt_maker (Callable[[Intent, int], list[Message]]): A callable that creates prompts for the generator. - async_mode (bool, optional): Whether to run the generator in asynchronous mode. Defaults to False. - max_samples_per_class (int | None, optional): The maximum number of samples per class. - Must be a positive integer or None. Defaults to None. - - Raises: - ValueError: If max_samples_per_class is not None and is less than or equal to 0. - """ if max_samples_per_class is not None and max_samples_per_class <= 0: msg = "max_samples_per_class must be a positive integer or None" raise ValueError(msg) @@ -48,10 +47,10 @@ def __init__( def balance(self, dataset: Dataset, split: str = Split.TRAIN, batch_size: int = 4) -> Dataset: """Balances the specified dataset split. - :param dataset: Source dataset - :param split: Target split for balancing - :param batch_size: Batch size for asynchronous processing - :return: Balanced dataset + Args: + dataset: Source dataset + split: Target split for balancing + batch_size: Batch size for asynchronous processing """ if dataset.multilabel: msg = "Method supports only single-label datasets" diff --git a/autointent/generation/utterances/basic/__init__.py b/autointent/generation/utterances/basic/__init__.py index b0351c1ae..5b68373ad 100644 --- a/autointent/generation/utterances/basic/__init__.py +++ b/autointent/generation/utterances/basic/__init__.py @@ -1,4 +1,3 @@ -from .chat_templates import EnglishSynthesizerTemplate, RussianSynthesizerTemplate from .utterance_generator import UtteranceGenerator -__all__ = ["EnglishSynthesizerTemplate", "RussianSynthesizerTemplate", "UtteranceGenerator"] +__all__ = ["UtteranceGenerator"] diff --git a/autointent/generation/utterances/basic/chat_templates/__init__.py b/autointent/generation/utterances/basic/chat_templates/__init__.py deleted file mode 100644 index c5ca882b8..000000000 --- a/autointent/generation/utterances/basic/chat_templates/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from ._base import BaseChatTemplate, BaseSynthesizerTemplate -from ._synthesizer_en import EnglishSynthesizerTemplate -from ._synthesizer_ru import RussianSynthesizerTemplate - -__all__ = ["BaseChatTemplate", "BaseSynthesizerTemplate", "EnglishSynthesizerTemplate", "RussianSynthesizerTemplate"] diff --git a/autointent/generation/utterances/basic/cli.py b/autointent/generation/utterances/basic/cli.py index e98d21a3f..0e56af613 100644 --- a/autointent/generation/utterances/basic/cli.py +++ b/autointent/generation/utterances/basic/cli.py @@ -4,10 +4,8 @@ from argparse import ArgumentParser from autointent import load_dataset -from autointent.generation.utterances.basic.utterance_generator import UtteranceGenerator -from autointent.generation.utterances.generator import Generator - -from .chat_templates import EnglishSynthesizerTemplate, RussianSynthesizerTemplate +from autointent.generation.chat_templates import EnglishSynthesizerTemplate, RussianSynthesizerTemplate +from autointent.generation.utterances import Generator, UtteranceGenerator logging.basicConfig(level="INFO") logger = logging.getLogger(__name__) @@ -69,7 +67,3 @@ def main() -> None: if args.output_repo is not None: dataset.push_to_hub(args.output_repo, private=args.private) - - -if __name__ == "__main__": - main() diff --git a/autointent/generation/utterances/basic/utterance_generator.py b/autointent/generation/utterances/basic/utterance_generator.py index c84524370..0e6c7b867 100644 --- a/autointent/generation/utterances/basic/utterance_generator.py +++ b/autointent/generation/utterances/basic/utterance_generator.py @@ -7,7 +7,7 @@ from autointent import Dataset from autointent.custom_types import Split -from autointent.generation.utterances.basic.chat_templates import BaseSynthesizerTemplate +from autointent.generation.chat_templates import BaseSynthesizerTemplate from autointent.generation.utterances.generator import Generator from autointent.schemas import Intent, Sample @@ -18,16 +18,14 @@ class UtteranceGenerator: This augmentation method simply prompts LLM to look at existing examples and generate similar. Additionally, it can consider some aspects of style, punctuation, and length of the desired generations. - """ - def __init__(self, generator: Generator, prompt_maker: BaseSynthesizerTemplate, async_mode: bool = False) -> None: - """Initialize the UtteranceGenerator. + Args: + generator: Generator instance for generating utterances. + prompt_maker: Prompt maker instance for generating prompts. + async_mode: Whether to use asynchronous mode for generation. + """ - Args: - generator: Generator instance for generating utterances. - prompt_maker: Prompt maker instance for generating prompts. - async_mode: Whether to use asynchronous mode for generation. - """ + def __init__(self, generator: Generator, prompt_maker: BaseSynthesizerTemplate, async_mode: bool = False) -> None: # noqa: D107 self.generator = generator self.prompt_maker = prompt_maker self.async_mode = async_mode @@ -38,9 +36,6 @@ def __call__(self, intent_data: Intent, n_generations: int) -> list[str]: Args: intent_data: Intent data for which to generate utterances. n_generations: Number of utterances to generate. - - Returns: - List of generated utterances. """ messages = self.prompt_maker(intent_data, n_generations) response_text = self.generator.get_chat_completion(messages) @@ -52,9 +47,6 @@ async def _call_async(self, intent_data: Intent, n_generations: int) -> list[str Args: intent_data: Intent data for which to generate utterances. n_generations: Number of utterances to generate. - - Returns: - List of generated utterances. """ messages = self.prompt_maker(intent_data, n_generations) response_text = await self.generator.get_chat_completion_async(messages) @@ -68,7 +60,7 @@ def augment( update_split: bool = True, batch_size: int = 4, ) -> list[Sample]: - """Augment some split of dataset. + """Add LLM-generated samples to some split of dataset. Args: dataset: Dataset object. @@ -76,9 +68,6 @@ def augment( n_generations: Number of utterances to generate per intent. update_split: Whether to update the dataset split. batch_size: Batch size for async generation. - - Returns: - List of generated samples. """ if self.async_mode: return asyncio.run(self._augment_async(dataset, split_name, n_generations, update_split, batch_size)) diff --git a/autointent/generation/utterances/evolution/__init__.py b/autointent/generation/utterances/evolution/__init__.py index e618e3436..5d12e65d5 100644 --- a/autointent/generation/utterances/evolution/__init__.py +++ b/autointent/generation/utterances/evolution/__init__.py @@ -1,25 +1,7 @@ -from .chat_templates import ( - AbstractEvolution, - ConcreteEvolution, - EvolutionChatTemplate, - FormalEvolution, - FunnyEvolution, - GoofyEvolution, - InformalEvolution, - ReasoningEvolution, -) from .evolver import UtteranceEvolver from .incremental_evolver import IncrementalUtteranceEvolver __all__ = [ - "AbstractEvolution", - "ConcreteEvolution", - "EvolutionChatTemplate", - "FormalEvolution", - "FunnyEvolution", - "GoofyEvolution", "IncrementalUtteranceEvolver", - "InformalEvolution", - "ReasoningEvolution", "UtteranceEvolver", ] diff --git a/autointent/generation/utterances/evolution/chat_templates/__init__.py b/autointent/generation/utterances/evolution/chat_templates/__init__.py deleted file mode 100644 index e38ce5df6..000000000 --- a/autointent/generation/utterances/evolution/chat_templates/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -from .abstract import AbstractEvolution -from .base import EvolutionChatTemplate -from .concrete import ConcreteEvolution -from .formal import FormalEvolution -from .funny import FunnyEvolution -from .goofy import GoofyEvolution -from .informal import InformalEvolution -from .reasoning import ReasoningEvolution - -EVOLUTION_NAMES = [evolution.name for evolution in EvolutionChatTemplate.__subclasses__()] - -EVOLUTION_MAPPING = {evolution.name: evolution() for evolution in EvolutionChatTemplate.__subclasses__()} - -__all__ = [ - "EVOLUTION_MAPPING", - "EVOLUTION_NAMES", - "AbstractEvolution", - "ConcreteEvolution", - "EvolutionChatTemplate", - "FormalEvolution", - "FunnyEvolution", - "GoofyEvolution", - "InformalEvolution", - "ReasoningEvolution", -] diff --git a/autointent/generation/utterances/evolution/cli.py b/autointent/generation/utterances/evolution/cli.py index d9d4ca8b1..50aff3788 100644 --- a/autointent/generation/utterances/evolution/cli.py +++ b/autointent/generation/utterances/evolution/cli.py @@ -4,13 +4,12 @@ from argparse import ArgumentParser, Namespace from autointent import load_dataset -from autointent.generation.utterances.evolution import IncrementalUtteranceEvolver, UtteranceEvolver -from autointent.generation.utterances.generator import Generator - -from .chat_templates import ( +from autointent.generation.chat_templates import ( EVOLUTION_MAPPING, EVOLUTION_NAMES, ) +from autointent.generation.utterances.evolution import IncrementalUtteranceEvolver, UtteranceEvolver +from autointent.generation.utterances.generator import Generator logging.basicConfig(level="INFO") logger = logging.getLogger(__name__) @@ -88,7 +87,3 @@ def main() -> None: if args.output_repo is not None: dataset.push_to_hub(args.output_repo, args.private) - - -if __name__ == "__main__": - main() diff --git a/autointent/generation/utterances/evolution/evolver.py b/autointent/generation/utterances/evolution/evolver.py index fef446a25..3e4efc414 100644 --- a/autointent/generation/utterances/evolution/evolver.py +++ b/autointent/generation/utterances/evolution/evolver.py @@ -12,7 +12,7 @@ from autointent import Dataset from autointent.custom_types import Split -from autointent.generation.utterances.evolution.chat_templates import EvolutionChatTemplate +from autointent.generation.chat_templates import EvolutionChatTemplate from autointent.generation.utterances.generator import Generator from autointent.schemas import Intent @@ -22,23 +22,21 @@ class UtteranceEvolver: Deeply inspired by DeepEval evolutions. This method takes single utterance and prompts LLM to change it in a specific way. + + Args: + generator: Generator instance for generating utterances. + prompt_makers: List of prompt makers for generating prompts. + seed: Random seed for reproducibility. + async_mode: Whether to use asynchronous mode for generation. """ - def __init__( + def __init__( # noqa: D107 self, generator: Generator, prompt_makers: Sequence[EvolutionChatTemplate], seed: int = 0, async_mode: bool = False, ) -> None: - """Initialize the UtteranceEvolver. - - Args: - generator: Generator instance for generating utterances. - prompt_makers: List of prompt makers for generating prompts. - seed: Random seed for reproducibility. - async_mode: Whether to use asynchronous mode for generation. - """ self.generator = generator self.prompt_makers = prompt_makers self.async_mode = async_mode @@ -50,9 +48,6 @@ def _evolve(self, utterance: str, intent_data: Intent) -> str: Args: utterance: Utterance to be evolved. intent_data: Intent data for which to evolve the utterance. - - Returns: - Evolved utterance. """ maker = random.choice(self.prompt_makers) chat = maker(utterance, intent_data) @@ -64,9 +59,6 @@ async def _evolve_async(self, utterance: str, intent_data: Intent) -> str: Args: utterance: Utterance to be evolved. intent_data: Intent data for which to evolve the utterance. - - Returns: - Evolved utterance. """ maker = random.choice(self.prompt_makers) chat = maker(utterance, intent_data) @@ -82,9 +74,6 @@ def __call__( intent_data: Intent data for which to evolve the utterance. n_evolutions: Number of evolutions to apply. sequential: Whether to apply evolutions sequentially. - - Returns: - List of evolved utterances. """ current_utterance = utterance generated_utterances = [] @@ -107,7 +96,7 @@ def augment( batch_size: int = 4, sequential: bool = False, ) -> HFDataset: - """Augment some split of dataset. + """Add LLM-generated samples to some split of dataset. Args: dataset: Dataset object. @@ -116,9 +105,6 @@ def augment( update_split: Whether to update the dataset split. batch_size: Batch size for async generation. sequential: Whether to apply evolutions sequentially. - - Returns: - List of generated samples. """ if self.async_mode: if sequential: diff --git a/autointent/generation/utterances/evolution/incremental_evolver.py b/autointent/generation/utterances/evolution/incremental_evolver.py index 80aeea588..032c643a5 100644 --- a/autointent/generation/utterances/evolution/incremental_evolver.py +++ b/autointent/generation/utterances/evolution/incremental_evolver.py @@ -13,7 +13,7 @@ from autointent import Dataset, Pipeline from autointent.custom_types import Split -from autointent.generation.utterances.evolution.chat_templates import EvolutionChatTemplate +from autointent.generation.chat_templates import EvolutionChatTemplate from autointent.generation.utterances.evolution.evolver import UtteranceEvolver from autointent.generation.utterances.generator import Generator @@ -40,9 +40,20 @@ class IncrementalUtteranceEvolver(UtteranceEvolver): - """Incremental evolutionary strategy to augmenting utterances.""" + """Incremental evolutionary strategy to augmenting utterances. - def __init__( + This method adds LLM-generated training samples until the quality + of linear classification on resulting dataset is rising. + + Args: + generator: Generator instance for generating utterances. + prompt_makers: List of prompt makers for generating prompts. + seed: Random seed for reproducibility. + async_mode: Whether to use asynchronous mode for generation. + search_space: Search space for the pipeline optimizer. + """ + + def __init__( # noqa: D107 self, generator: Generator, prompt_makers: Sequence[EvolutionChatTemplate], @@ -50,15 +61,6 @@ def __init__( async_mode: bool = False, search_space: str | None = None, ) -> None: - """Initialize the IncrementalUtteranceEvolver. - - Args: - generator: Generator instance for generating utterances. - prompt_makers: List of prompt makers for generating prompts. - seed: Random seed for reproducibility. - async_mode: Whether to use asynchronous mode for generation. - search_space: Search space for the pipeline optimizer. - """ super().__init__(generator, prompt_makers, seed, async_mode) self.search_space = self._choose_search_space(search_space) @@ -67,9 +69,6 @@ def _choose_search_space(self, search_space: str | None) -> list[dict[str, Any]] Args: search_space: Search space for the pipeline optimizer. If None, default search space is used. - - Returns: - The chosen search space. """ if search_space is None: return SEARCH_SPACE @@ -84,7 +83,7 @@ def augment( batch_size: int = 4, sequential: bool = False, ) -> HFDataset: - """Augment some split of dataset. + """Add LLM-generated samples to some split of dataset. Args: dataset: Dataset object. @@ -93,9 +92,6 @@ def augment( update_split: Whether to update the dataset split with the new samples. batch_size: Batch size for augmentation. sequential: Whether to perform augmentations sequentially. - - Returns: - List of generated samples. """ best_result = 0 merge_dataset = copy.deepcopy(dataset) diff --git a/autointent/generation/utterances/generator.py b/autointent/generation/utterances/generator.py index 043589fda..744d2765b 100644 --- a/autointent/generation/utterances/generator.py +++ b/autointent/generation/utterances/generator.py @@ -6,13 +6,20 @@ import openai from dotenv import load_dotenv -from .schemas import Message +from autointent.generation.chat_templates import Message load_dotenv() class Generator: - """Wrapper class for accessing OpenAI API.""" + """Wrapper class for accessing OpenAI API. + + Args: + base_url: HTTP-endpoint for sending API requests to OpenAI API compatible server. + Omit this to infer OPENAI_BASE_URL from environment. + model_name: Name of LLM. Omit this to infer OPENAI_MODEL_NAME from environment. + **generation_params: kwargs that will be sent with a request to the endpoint. + """ _default_generation_params: ClassVar[dict[str, Any]] = { "max_tokens": 150, @@ -21,15 +28,7 @@ class Generator: "temperature": 0.7, } - def __init__(self, base_url: str | None = None, model_name: str | None = None, **generation_params: Any) -> None: # noqa: ANN401 - """Initialize the wrapper for LLM. - - Args: - base_url: HTTP-endpoint for sending API requests to OpenAI API compatible server. - Omit this to infer OPENAI_BASE_URL from environment. - model_name: Name of LLM. Omit this to infer OPENAI_MODEL_NAME from environment. - **generation_params: kwargs that will be sent with a request to the endpoint. - """ + def __init__(self, base_url: str | None = None, model_name: str | None = None, **generation_params: Any) -> None: # noqa: ANN401, D107 if not base_url: base_url = os.environ["OPENAI_BASE_URL"] if not model_name: @@ -47,9 +46,6 @@ def get_chat_completion(self, messages: list[Message]) -> str: Args: messages: List of messages to send to the model. - - Returns: - Model's response. """ response = self.client.chat.completions.create( messages=messages, # type: ignore[arg-type] @@ -63,9 +59,6 @@ async def get_chat_completion_async(self, messages: list[Message]) -> str: Args: messages: List of messages to send to the model. - - Returns: - Model's response. """ response = await self.async_client.chat.completions.create( messages=messages, # type: ignore[arg-type] diff --git a/autointent/metrics/decision.py b/autointent/metrics/decision.py index df18aa4f4..684649695 100644 --- a/autointent/metrics/decision.py +++ b/autointent/metrics/decision.py @@ -93,6 +93,7 @@ def decision_accuracy(y_true: ListOfGenericLabels, y_pred: ListOfGenericLabels) \text{Accuracy} = \frac{\sum_{i=1}^N \mathbb{1}(y_{\text{true},i} = y_{\text{pred},i})}{N} where: + - :math:`N` is the total number of samples, - :math:`y_{\text{true},i}` is the true label for the :math:`i`-th sample, - :math:`y_{\text{pred},i}` is the predicted label for the :math:`i`-th sample, @@ -122,6 +123,7 @@ def _decision_roc_auc_multiclass(y_true: npt.NDArray[Any], y_pred: npt.NDArray[A \text{ROC AUC}_{\text{multiclass}} = \frac{1}{K} \sum_{k=1}^K \text{ROC AUC}_k where: + - :math:`K` is the number of classes, - :math:`\text{ROC AUC}_k` is the ROC AUC score for the :math:`k`-th class, calculated by treating it as a binary classification problem (class :math:`k` vs rest). diff --git a/autointent/metrics/regex.py b/autointent/metrics/regex.py index b92c9bd86..fbbf3ba50 100644 --- a/autointent/metrics/regex.py +++ b/autointent/metrics/regex.py @@ -34,6 +34,7 @@ def regex_partial_accuracy(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) \text{Partial Accuracy} = \frac{\sum_{i=1}^N \mathbb{1}(y_{\text{true},i} \in y_{\text{pred},i})}{N} where: + - :math:`N` is the total number of samples, - :math:`y_{\text{true},i}` is the true label for the :math:`i`-th sample, - :math:`y_{\text{pred},i}` is the predicted label for the :math:`i`-th sample, @@ -66,6 +67,7 @@ def regex_partial_precision(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE \mathbb{1}(|y_{\text{pred},i}| > 0)} where: + - :math:`N` is the total number of samples, - :math:`y_{\text{true},i}` is the true label for the :math:`i`-th sample, - :math:`y_{\text{pred},i}` is the predicted label for the :math:`i`-th sample, diff --git a/autointent/metrics/retrieval.py b/autointent/metrics/retrieval.py index caf80626b..11bb7a662 100644 --- a/autointent/metrics/retrieval.py +++ b/autointent/metrics/retrieval.py @@ -49,6 +49,7 @@ def _macrofy( \text{MacroAvg} = \frac{1}{C} \sum_{i=1}^{C} \text{metric}(y_{\text{true},i}, y_{\text{pred},i}, k) where: + - :math:`C` is the number of classes, - :math:`y_{\text{true},i}` is the true binary indicator for the :math:`i`-th class label, - :math:`y_{\text{pred},i}` is the predicted binary indicator for the :math:`i`-th class label, @@ -86,6 +87,7 @@ def _average_precision(query_label: int, candidate_labels: npt.NDArray[np.int64] \cdot \frac{\text{num_relevant}}{i+1} where: + - :math:`k` is the number of top items to consider for each query, - :math:`\text{num_relevant}` is the number of relevant items in the top-k ranking, - :math:`y_{\text{true},i}` is the true label (query label) for the :math:`i`-th ranked item, @@ -136,6 +138,7 @@ def retrieval_map(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_ \text{MAP} = \frac{1}{Q} \sum_{q=1}^{Q} \text{AP}(q, c, k) where: + - :math:`Q` is the total number of queries, - :math:`\text{AP}(q, c, k)` is the average precision for the :math:`q`-th query, calculated considering the true labels for that query :math:`q`, the ranked candidate @@ -164,6 +167,7 @@ def _average_precision_intersecting( y_{\text{true},j}(q) \cdot y_{\text{pred},j}(i) > 0 \right) \cdot \frac{\text{num_relevant}}{i+1} where: + - :math:`k` is the number of top items to consider for each query, - :math:`\text{num_relevant}` is the number of relevant items in the top-k ranking, - :math:`y_{\text{true},j}(q)` is the true binary label for the :math:`j`-th @@ -211,6 +215,7 @@ def retrieval_map_intersecting( \text{MAP} = \frac{1}{Q} \sum_{q=1}^{Q} \text{AP}_{\text{intersecting}}(q, c, k) where: + - :math:`Q` is the total number of queries, - :math:`\text{AP}_{\text{intersecting}}(q, c, k)` is the average precision for the :math:`q`-th query, calculated using the intersecting true labels (`q`), @@ -261,6 +266,7 @@ def retrieval_hit_rate( \text{Hit Rate} = \frac{\sum_{i=1}^N \mathbb{1}(y_{\text{query},i} \in y_{\text{candidates},i}^{(1:k)})}{N} where: + - :math:`N` is the total number of queries, - :math:`y_{\text{query},i}` is the true label for the :math:`i`-th query, - :math:`y_{\text{candidates},i}^{(1:k)}` is the set of top-k predicted labels for the :math:`i`-th query, @@ -301,6 +307,7 @@ def retrieval_hit_rate_intersecting( \left( y_{\text{query},i} \cdot y_{\text{candidates},i,j} \right) > 0 \right)}{N} where: + - :math:`N` is the total number of queries, - :math:`y_{\text{query},i}` is the one-hot encoded label vector for the :math:`i`-th query, - :math:`y_{\text{candidates},i,j}` is the one-hot encoded label vector of the :math:`j`-th @@ -366,6 +373,7 @@ def retrieval_precision( y_{\text{candidates},i}^{(1:k)}|}{k} where: + - :math:`N` is the total number of queries, - :math:`y_{\text{query},i}` is the true label for the :math:`i`-th query, - :math:`y_{\text{candidates},i}^{(1:k)}` is the set of top-k predicted labels for the :math:`i`-th query. @@ -407,6 +415,7 @@ def retrieval_precision_intersecting( \frac{\sum_{j=1}^k \mathbb{1} \left( y_{\text{query},i} \cdot y_{\text{candidates},i,j} > 0 \right)}{k} where: + - :math:`N` is the total number of queries, - :math:`y_{\text{query},i}` is the one-hot encoded label vector for the :math:`i`-th query, - :math:`y_{\text{candidates},i,j}` is the one-hot encoded label vector of the :math:`j`-th @@ -468,6 +477,7 @@ def _dcg(relevance_scores: npt.NDArray[Any], k: int | None = None) -> float: \text{DCG@k} = \sum_{i=1}^k \frac{r_i}{\log_2(i + 1)} where: + - :math:`r_i` is the relevance score of the item at rank :math:`i`, - :math:`k` is the number of top items considered. @@ -491,6 +501,7 @@ def _idcg(relevance_scores: npt.NDArray[Any], k: int | None = None) -> float: \text{IDCG@k} = \sum_{i=1}^k \frac{r_i^{\text{ideal}}}{\log_2(i + 1)} where: + - :math:`r_i^{\text{ideal}}` is the relevance score of the item at rank :math:`i` in the ideal (sorted) order, - :math:`k` is the number of top items considered. @@ -513,6 +524,7 @@ def retrieval_ndcg(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE \text{NDCG@k} = \frac{\text{DCG@k}}{\text{IDCG@k}} where: + - :math:`\text{DCG@k}` is the Discounted Cumulative Gain at position :math:`k`, - :math:`\text{IDCG@k}` is the Ideal Discounted Cumulative Gain at position :math:`k`. @@ -608,6 +620,7 @@ def retrieval_mrr(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_ \text{MRR@k} = \frac{1}{N} \sum_{i=1}^N \frac{1}{\text{rank}_i} where: + - :math:`\text{rank}_i` is the rank position of the first relevant item in the top-k results for query :math:`i`, - :math:`N` is the total number of queries. @@ -646,6 +659,7 @@ def retrieval_mrr_intersecting( \text{MRR@k}_{\text{intersecting}} = \frac{1}{N} \sum_{i=1}^N \frac{1}{\text{rank}_i} where: + - :math:`\text{rank}_i` is the rank position of the first relevant (intersecting) item in the top-k results for query :math:`i`, - :math:`N` is the total number of queries. diff --git a/autointent/metrics/scoring.py b/autointent/metrics/scoring.py index bc2cbefa5..9846801bd 100644 --- a/autointent/metrics/scoring.py +++ b/autointent/metrics/scoring.py @@ -61,7 +61,7 @@ def scoring_log_likelihood(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE, .. math:: - \\frac{1}{\\ell}\\sum_{i=1}^{\\ell}\\log(s[y[i]]) + \frac{1}{\ell}\sum_{i=1}^{\ell}\log(s[y[i]]) where ``s[y[i]]`` is the predicted score of the ``i``-th utterance having the ground truth label. @@ -70,7 +70,7 @@ def scoring_log_likelihood(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE, .. math:: - \\frac{1}{\\ell}\\sum_{i=1}^\\ell\\sum_{c=1}^C\\Big[y[i,c]\\cdot\\log(s[i,c])+(1-y[i,c])\\cdot\\log(1-s[i,c])\\Big] + \frac{1}{\ell}\sum_{i=1}^\ell\sum_{c=1}^C\Big[y[i,c]\cdot\log(s[i,c])+(1-y[i,c])\cdot\log(1-s[i,c])\Big] Args: labels: Ground truth labels for each utterance. @@ -109,7 +109,7 @@ def scoring_roc_auc(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> flo .. math:: - \\frac{1}{C}\\sum_{k=1}^C ROCAUC(scores[:, k], labels[:, k]) + \frac{1}{C}\sum_{k=1}^C ROCAUC(scores[:, k], labels[:, k]) where ``C`` is the number of classes. @@ -229,7 +229,7 @@ def scoring_hit_rate(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> fl .. math:: - \\text{Hit Rate} = \\frac{1}{N} \\sum_{i=1}^N \\mathbb{1}(y_{\\text{top},i} \\in y_{\\text{true},i}) + \text{Hit Rate} = \frac{1}{N} \sum_{i=1}^N \mathbb{1}(y_{\text{top},i} \in y_{\text{true},i}) Args: labels: Ground truth labels for each sample. diff --git a/autointent/modules/decision/_adaptive.py b/autointent/modules/decision/_adaptive.py index e980a3a8b..3e16fb1f9 100644 --- a/autointent/modules/decision/_adaptive.py +++ b/autointent/modules/decision/_adaptive.py @@ -25,14 +25,9 @@ class AdaptiveDecision(BaseDecision): The AdaptiveDecision calculates optimal thresholds based on the given scores and labels, ensuring the best performance on multi-label data. - Attributes: - _n_classes: Number of classes in the dataset - _r: Scaling factor for thresholds - tags: List of Tag objects for mutually exclusive classes - name: Name of the predictor, defaults to "adaptive" - supports_multilabel: Whether the module supports multilabel classification - supports_multiclass: Whether the module supports multiclass classification - supports_oos: Whether the module supports out-of-scope samples + Args: + search_space: List of threshold scaling factors to search for optimal performance. + Defaults to a range between 0 and 1 Examples: -------- @@ -62,12 +57,6 @@ class AdaptiveDecision(BaseDecision): name = "adaptive" def __init__(self, search_space: list[FloatFromZeroToOne] | None = None) -> None: - """Initialize the AdaptiveDecision. - - Args: - search_space: List of threshold scaling factors to search for optimal performance. - Defaults to a range between 0 and 1 - """ self.search_space = search_space if search_space is not None else default_search_space if any(val < 0 or val > 1 for val in self.search_space): @@ -81,9 +70,6 @@ def from_context(cls, context: Context, search_space: list[FloatFromZeroToOne] | Args: context: Context containing configurations and utilities search_space: List of threshold scaling factors, or None for default - - Returns: - Initialized AdaptiveDecision instance """ return cls( search_space=search_space, diff --git a/autointent/modules/decision/_argmax.py b/autointent/modules/decision/_argmax.py index 526cb745a..da7f5abba 100644 --- a/autointent/modules/decision/_argmax.py +++ b/autointent/modules/decision/_argmax.py @@ -21,13 +21,6 @@ class ArgmaxDecision(BaseDecision): The ArgmaxDecision is a simple predictor that selects the class with the highest score (argmax) for single-label classification tasks. - Attributes: - name: Name of the predictor, defaults to "argmax" - supports_oos: Whether the module supports out-of-scope samples - supports_multilabel: Whether the module supports multilabel classification - supports_multiclass: Whether the module supports multiclass classification - _n_classes: Number of classes in the dataset - Examples: -------- .. testcode:: @@ -54,8 +47,7 @@ class ArgmaxDecision(BaseDecision): supports_multiclass = True _n_classes: int - def __init__(self) -> None: - """Initialize ArgmaxDecision.""" + def __init__(self) -> None: ... @classmethod def from_context(cls, context: Context) -> "ArgmaxDecision": @@ -63,9 +55,6 @@ def from_context(cls, context: Context) -> "ArgmaxDecision": Args: context: Context object containing configurations and utilities - - Returns: - Initialized ArgmaxDecision instance """ return cls() diff --git a/autointent/modules/decision/_jinoos.py b/autointent/modules/decision/_jinoos.py index e60248762..0fba2a4de 100644 --- a/autointent/modules/decision/_jinoos.py +++ b/autointent/modules/decision/_jinoos.py @@ -20,13 +20,8 @@ class JinoosDecision(BaseDecision): JinoosDecision predicts the best scores for single-label classification tasks and detects out-of-scope (OOS) samples based on a threshold. - Attributes: - thresh: The optimized threshold value for OOS detection - name: Name of the predictor, defaults to "jinoos" - _n_classes: Number of classes determined during fitting - supports_multilabel: Whether the module supports multilabel classification - supports_multiclass: Whether the module supports multiclass classification - supports_oos: Whether the module supports out-of-scope samples + Args: + search_space: List of threshold values to search through for OOS detection Examples: -------- @@ -60,11 +55,6 @@ def __init__( self, search_space: list[FloatFromZeroToOne] | None = None, ) -> None: - """Initialize Jinoos predictor. - - Args: - search_space: List of threshold values to search through for OOS detection - """ self.search_space = np.array(search_space) if search_space is not None else default_search_space if any(val < 0 or val > 1 for val in self.search_space): @@ -78,9 +68,6 @@ def from_context(cls, context: Context, search_space: list[FloatFromZeroToOne] | Args: context: Context containing configurations and utilities search_space: List of threshold values to search through - - Returns: - Initialized JinoosDecision instance """ return cls( search_space=search_space, @@ -137,10 +124,10 @@ def jinoos_score(y_true: ListOfGenericLabels, y_pred: npt.NDArray[Any]) -> float .. math:: - \\frac{C_{in}}{N_{in}}+\\frac{C_{oos}}{N_{oos}} + \frac{C_{in}}{N_{in}}+\frac{C_{oos}}{N_{oos}} - where C_in is the number of correctly predicted in-domain labels - and N_in is the total number of in-domain labels. The same for OOS samples. + where :math:`C_{in}` is the number of correctly predicted in-domain labels + and :math:`N_{in}` is the total number of in-domain labels. The same for OOS samples. Args: y_true: True labels diff --git a/autointent/modules/decision/_threshold.py b/autointent/modules/decision/_threshold.py index b0ccf82a5..133ebcbec 100644 --- a/autointent/modules/decision/_threshold.py +++ b/autointent/modules/decision/_threshold.py @@ -23,14 +23,8 @@ class ThresholdDecision(BaseDecision): ThresholdDecision uses a predefined threshold (or array of thresholds) to predict labels for single-label or multi-label classification tasks. - Attributes: - tags: Tags for predictions (if any) - name: Name of the predictor, defaults to "threshold" - supports_oos: Whether the module supports out-of-scope samples - supports_multilabel: Whether the module supports multilabel classification - supports_multiclass: Whether the module supports multiclass classification - _multilabel: Whether the task is multilabel - _n_classes: Number of classes in the dataset + Args: + thresh: Threshold for the scores, shape (n_classes,) or float Examples: -------- @@ -82,11 +76,6 @@ def __init__( self, thresh: FloatFromZeroToOne | list[FloatFromZeroToOne] = 0.5, ) -> None: - """Initialize threshold predictor. - - Args: - thresh: Threshold for the scores, shape (n_classes,) or float - """ val_error = False self.thresh = thresh if isinstance(thresh, float) else np.array(thresh) if isinstance(thresh, float): @@ -107,9 +96,6 @@ def from_context( Args: context: Context containing configurations and utilities thresh: Threshold for classification - - Returns: - Initialized ThresholdDecision instance """ return cls( thresh=thresh, diff --git a/autointent/modules/decision/_tunable.py b/autointent/modules/decision/_tunable.py index 8e0fff36a..1c4c4b893 100644 --- a/autointent/modules/decision/_tunable.py +++ b/autointent/modules/decision/_tunable.py @@ -27,14 +27,11 @@ class TunableDecision(BaseDecision): in single-label or multi-label classification tasks. It is designed for datasets with varying score distributions and supports out-of-scope (OOS) detection. - Attributes: - name: Name of the predictor, defaults to "tunable" - _n_classes: Number of classes determined during fitting - _multilabel: Whether the task is multilabel + Args: + target_metric: Metric to optimize during threshold tuning + n_optuna_trials: Number of optimization trials + seed: Random seed for reproducibility tags: Tags for predictions (if any) - supports_multilabel: Whether the module supports multilabel classification - supports_multiclass: Whether the module supports multiclass classification - supports_oos: Whether the module supports out-of-scope samples Examples: -------- @@ -88,14 +85,6 @@ def __init__( seed: int | None = 0, tags: list[Tag] | None = None, ) -> None: - """Initialize tunable predictor. - - Args: - target_metric: Metric to optimize during threshold tuning - n_optuna_trials: Number of optimization trials - seed: Random seed for reproducibility - tags: Tags for predictions (if any) - """ self.target_metric = target_metric self.n_optuna_trials = n_optuna_trials self.seed = seed @@ -119,9 +108,6 @@ def from_context( context: Context containing configurations and utilities target_metric: Metric to optimize during threshold tuning n_optuna_trials: Number of optimization trials - - Returns: - Initialized TunableDecision instance """ return cls( target_metric=target_metric, diff --git a/autointent/modules/embedding/_logreg.py b/autointent/modules/embedding/_logreg.py index f55ad983c..7b08ae709 100644 --- a/autointent/modules/embedding/_logreg.py +++ b/autointent/modules/embedding/_logreg.py @@ -23,13 +23,9 @@ class LogregAimedEmbedding(BaseEmbedding): The main purpose of this module is to be used at embedding node for optimizing embedding configuration using its logreg classification quality as a sort of proxy metric. - Attributes: - _classifier: The trained logistic regression model - _label_encoder: Label encoder for converting labels to numerical format - name: Name of the module, defaults to "logreg" - supports_multiclass: Whether the module supports multiclass classification - supports_multilabel: Whether the module supports multilabel classification - supports_oos: Whether the module supports out-of-scope detection + Args: + embedder_config: Config of the embedder used for creating embeddings + cv: Number of folds used in LogisticRegressionCV Examples: -------- @@ -57,12 +53,6 @@ def __init__( embedder_config: EmbedderConfig | str | dict[str, Any], cv: PositiveInt = 3, ) -> None: - """Initialize the LogregAimedEmbedding. - - Args: - embedder_config: Config of the embedder used for creating embeddings - cv: Number of folds used in LogisticRegressionCV - """ self.embedder_config = EmbedderConfig.from_search_config(embedder_config) self.cv = cv @@ -83,9 +73,6 @@ def from_context( context: Context containing configurations and utilities cv: Number of folds used in LogisticRegressionCV embedder_config: Config of the embedder to use - - Returns: - Initialized LogregAimedEmbedding instance """ return cls( cv=cv, diff --git a/autointent/modules/embedding/_retrieval.py b/autointent/modules/embedding/_retrieval.py index a0fdfc665..04f4d04c8 100644 --- a/autointent/modules/embedding/_retrieval.py +++ b/autointent/modules/embedding/_retrieval.py @@ -18,12 +18,9 @@ class RetrievalAimedEmbedding(BaseEmbedding): The main purpose of this module is to be used at embedding node for optimizing embedding configuration using its retrieval quality as a sort of proxy metric. - Attributes: - _vector_index: The vector index used for nearest neighbor retrieval - name: Name of the module, defaults to "retrieval" - supports_multiclass: Whether the module supports multiclass classification - supports_multilabel: Whether the module supports multilabel classification - supports_oos: Whether the module supports out-of-scope detection + Args: + k: Number of nearest neighbors to retrieve + embedder_config: Config of the embedder used for creating embeddings Examples: -------- @@ -52,12 +49,6 @@ def __init__( embedder_config: EmbedderConfig | str | dict[str, Any], k: PositiveInt = 10, ) -> None: - """Initialize the RetrievalAimedEmbedding. - - Args: - k: Number of nearest neighbors to retrieve - embedder_config: Config of the embedder used for creating embeddings - """ self.k = k embedder_config = EmbedderConfig.from_search_config(embedder_config) self.embedder_config = embedder_config @@ -79,9 +70,6 @@ def from_context( context: The context containing configurations and utilities k: Number of nearest neighbors to retrieve embedder_config: Config of the embedder to use - - Returns: - Initialized RetrievalAimedEmbedding instance """ return cls( k=k, diff --git a/autointent/modules/regex/_simple.py b/autointent/modules/regex/_simple.py index 4945278ce..d83470740 100644 --- a/autointent/modules/regex/_simple.py +++ b/autointent/modules/regex/_simple.py @@ -37,9 +37,6 @@ class SimpleRegex(BaseRegex): A module that uses regular expressions to detect intents in text utterances. Supports both full and partial pattern matching. - - Attributes: - name: Name of the module, defaults to "regex" """ name = "simple" diff --git a/autointent/modules/scoring/_description/description.py b/autointent/modules/scoring/_description/description.py index f52e05076..bf961eb90 100644 --- a/autointent/modules/scoring/_description/description.py +++ b/autointent/modules/scoring/_description/description.py @@ -22,14 +22,9 @@ class DescriptionScorer(BaseScorer): DescriptionScorer embeds both the utterances and the intent descriptions, then computes a similarity score between the two, using either cosine similarity and softmax. - Attributes: - _embedder: The embedder used to generate embeddings for utterances and descriptions - name: Name of the scorer, defaults to "description" - _n_classes: Number of intent classes - _multilabel: Whether the task is multilabel - _description_vectors: Embedded vectors of intent descriptions - supports_multiclass: Whether multiclass classification is supported - supports_multilabel: Whether multilabel classification is supported + Args: + embedder_config: Config of the embedder model + temperature: Temperature parameter for scaling logits, defaults to 1.0 """ _embedder: Embedder @@ -45,12 +40,6 @@ def __init__( embedder_config: EmbedderConfig | str | dict[str, Any] | None = None, temperature: PositiveFloat = 1.0, ) -> None: - """Initialize the DescriptionScorer. - - Args: - embedder_config: Config of the embedder model - temperature: Temperature parameter for scaling logits, defaults to 1.0 - """ self.temperature = temperature self.embedder_config = EmbedderConfig.from_search_config(embedder_config) diff --git a/autointent/modules/scoring/_dnnc/dnnc.py b/autointent/modules/scoring/_dnnc/dnnc.py index 7758528b2..33bf77ca0 100644 --- a/autointent/modules/scoring/_dnnc/dnnc.py +++ b/autointent/modules/scoring/_dnnc/dnnc.py @@ -22,18 +22,10 @@ class DNNCScorer(BaseScorer): This module uses a Ranker for scoring candidate intents and can optionally train a logistic regression head on top of cross-encoder features. - Reference: - Zhang, J. G., Hashimoto, K., Liu, W., Wu, C. S., Wan, Y., Yu, P. S., ... & Xiong, C. (2020). - Discriminative Nearest Neighbor Few-Shot Intent Detection by Transferring Natural Language Inference. - arXiv preprint arXiv:2010.13009. - - Attributes: - _n_classes: Number of intent classes - _vector_index: Index for nearest neighbor search - _cross_encoder: Ranker model for scoring pairs - name: Name of the scorer, defaults to "dnnc" - supports_multilabel: Whether multilabel classification is supported - supports_multiclass: Whether multiclass classification is supported + Args: + cross_encoder_config: Config of the cross-encoder model + embedder_config: Config of the embedder model + k: Number of nearest neighbors to retrieve Examples: -------- @@ -53,6 +45,11 @@ class DNNCScorer(BaseScorer): test_utterances = ["Hello!", "What's up?"] scores = scorer.predict(test_utterances) + Reference: + Zhang, J. G., Hashimoto, K., Liu, W., Wu, C. S., Wan, Y., Yu, P. S., ... & Xiong, C. (2020). + Discriminative Nearest Neighbor Few-Shot Intent Detection by Transferring Natural Language Inference. + arXiv preprint arXiv:2010.13009. + """ name = "dnnc" @@ -68,13 +65,6 @@ def __init__( cross_encoder_config: CrossEncoderConfig | str | dict[str, Any] | None = None, embedder_config: EmbedderConfig | str | dict[str, Any] | None = None, ) -> None: - """Initialize the DNNCScorer. - - Args: - cross_encoder_config: Config of the cross-encoder model - embedder_config: Config of the embedder model - k: Number of nearest neighbors to retrieve - """ self.cross_encoder_config = CrossEncoderConfig.from_search_config(cross_encoder_config) self.embedder_config = EmbedderConfig.from_search_config(embedder_config) self.k = k @@ -98,9 +88,6 @@ def from_context( cross_encoder_config: Config of the cross-encoder model k: Number of nearest neighbors to retrieve embedder_config: Config of the embedder model, or None to use the best embedder - - Returns: - Initialized DNNCScorer instance """ if embedder_config is None: embedder_config = context.resolve_embedder() diff --git a/autointent/modules/scoring/_knn/knn.py b/autointent/modules/scoring/_knn/knn.py index 9cd009798..749bbdc31 100644 --- a/autointent/modules/scoring/_knn/knn.py +++ b/autointent/modules/scoring/_knn/knn.py @@ -20,12 +20,14 @@ class KNNScorer(BaseScorer): This module uses a vector index to retrieve nearest neighbors for query utterances and applies a weighting strategy to compute class probabilities. - Attributes: - weights: Weighting strategy used for scoring - _vector_index: VectorIndex instance for neighbor retrieval - name: Name of the scorer, defaults to "knn" - supports_multiclass: Whether multiclass classification is supported - supports_multilabel: Whether multilabel classification is supported + Args: + embedder_config: Config of the embedder used for vectorization + k: Number of closest neighbors to consider during inference + weights: Weighting strategy: + + - "uniform": Equal weight for all neighbors + - "distance": Weight inversely proportional to distance + - "closest": Only the closest neighbor of each class is weighted Examples: -------- @@ -58,16 +60,6 @@ def __init__( embedder_config: EmbedderConfig | str | dict[str, Any] | None = None, weights: WeightType = "distance", ) -> None: - """Initialize the KNNScorer. - - Args: - embedder_config: Config of the embedder used for vectorization - k: Number of closest neighbors to consider during inference - weights: Weighting strategy: - - "uniform": Equal weight for all neighbors - - "distance": Weight inversely proportional to distance - - "closest": Only the closest neighbor of each class is weighted - """ self.embedder_config = EmbedderConfig.from_search_config(embedder_config) self.k = k self.weights = weights @@ -95,9 +87,6 @@ def from_context( k: Number of closest neighbors to consider during inference weights: Weighting strategy for scoring embedder_config: Config of the embedder, or None to use the best embedder - - Returns: - Initialized KNNScorer instance """ if embedder_config is None: embedder_config = context.resolve_embedder() diff --git a/autointent/modules/scoring/_knn/rerank_scorer.py b/autointent/modules/scoring/_knn/rerank_scorer.py index 2cf7e0aa3..64acf52c9 100644 --- a/autointent/modules/scoring/_knn/rerank_scorer.py +++ b/autointent/modules/scoring/_knn/rerank_scorer.py @@ -18,9 +18,18 @@ class RerankScorer(KNNScorer): This module uses a cross-encoder to re-rank the nearest neighbors retrieved by a KNN scorer. - Attributes: - name: Name of the scorer, defaults to "rerank" - _scorer: Ranker instance for re-ranking + Args: + embedder_config: Config of the embedder used for vectorization + k: Number of closest neighbors to consider during inference + weights: Weighting strategy: + + - "uniform": Equal weight for all neighbors + - "distance": Weight inversely proportional to distance + - "closest": Only the closest neighbor of each class is weighted + + cross_encoder_config: Config of the cross-encoder model used for re-ranking + m: Number of top-ranked neighbors to consider, or None to use k + rank_threshold_cutoff: Rank threshold cutoff for re-ranking, or None """ name = "rerank" @@ -35,19 +44,6 @@ def __init__( cross_encoder_config: CrossEncoderConfig | str | dict[str, Any] | None = None, embedder_config: EmbedderConfig | str | dict[str, Any] | None = None, ) -> None: - """Initialize the RerankScorer. - - Args: - embedder_config: Config of the embedder used for vectorization - k: Number of closest neighbors to consider during inference - weights: Weighting strategy: - - "uniform": Equal weight for all neighbors - - "distance": Weight inversely proportional to distance - - "closest": Only the closest neighbor of each class is weighted - cross_encoder_config: Config of the cross-encoder model used for re-ranking - m: Number of top-ranked neighbors to consider, or None to use k - rank_threshold_cutoff: Rank threshold cutoff for re-ranking, or None - """ super().__init__( embedder_config=embedder_config, k=k, @@ -91,9 +87,6 @@ def from_context( or None to use the best existing embedder m: Number of top-ranked neighbors to consider, or None to use k rank_threshold_cutoff: Rank threshold cutoff for re-ranking, or None - - Returns: - An instance of RerankScorer """ if embedder_config is None: embedder_config = context.resolve_embedder() diff --git a/autointent/modules/scoring/_linear.py b/autointent/modules/scoring/_linear.py index 86a004bff..06e04c4dd 100644 --- a/autointent/modules/scoring/_linear.py +++ b/autointent/modules/scoring/_linear.py @@ -19,13 +19,11 @@ class LinearScorer(BaseScorer): This module uses embeddings generated from a transformer model to train a logistic regression classifier for intent classification. - Attributes: - name: Name of the scorer, defaults to "linear" - _multilabel: Whether multilabel classification is used - _clf: Trained classifier instance - _embedder: Embedder instance for feature extraction - supports_multiclass: Whether multiclass classification is supported - supports_multilabel: Whether multilabel classification is supported + Args: + embedder_config: Config of the embedder model + cv: Number of cross-validation folds, defaults to 3 + n_jobs: Number of parallel jobs for cross-validation, defaults to None + seed: Random seed for reproducibility, defaults to 0 Example: -------- @@ -62,14 +60,6 @@ def __init__( cv: int = 3, seed: int = 0, ) -> None: - """Initialize the LinearScorer. - - Args: - embedder_config: Config of the embedder model - cv: Number of cross-validation folds, defaults to 3 - n_jobs: Number of parallel jobs for cross-validation, defaults to None - seed: Random seed for reproducibility, defaults to 0 - """ self.cv = cv self.seed = seed self.embedder_config = EmbedderConfig.from_search_config(embedder_config) @@ -89,9 +79,6 @@ def from_context( Args: context: Context containing configurations and utilities embedder_config: Config of the embedder, or None to use the best embedder - - Returns: - Initialized LinearScorer instance """ if embedder_config is None: embedder_config = context.resolve_embedder() diff --git a/autointent/modules/scoring/_mlknn/mlknn.py b/autointent/modules/scoring/_mlknn/mlknn.py index 79b517c9f..d6e8f9057 100644 --- a/autointent/modules/scoring/_mlknn/mlknn.py +++ b/autointent/modules/scoring/_mlknn/mlknn.py @@ -19,18 +19,11 @@ class MLKnnScorer(BaseScorer): This module implements ML-KNN, a multi-label classifier that computes probabilities based on the k-nearest neighbors of a query instance. - Attributes: - name: Name of the scorer, defaults to "mlknn" - _n_classes: Number of classes - _vector_index: Index for nearest neighbor search - _prior_prob_true: Prior probabilities for true labels - _prior_prob_false: Prior probabilities for false labels - _cond_prob_true: Conditional probabilities for true labels - _cond_prob_false: Conditional probabilities for false labels - _features: Embedded features of training data - _labels: Labels of training data - supports_multiclass: Whether multiclass classification is supported - supports_multilabel: Whether multilabel classification is supported + Args: + k: Number of nearest neighbors to consider + embedder_config: Config of the embedder used for vectorization + s: Smoothing parameter for probability calculations, defaults to 1.0 + ignore_first_neighbours: Number of closest neighbors to ignore, defaults to 0 Example: -------- @@ -75,14 +68,6 @@ def __init__( s: float = 1.0, ignore_first_neighbours: int = 0, ) -> None: - """Initialize the MLKnnScorer. - - Args: - k: Number of nearest neighbors to consider - embedder_config: Config of the embedder used for vectorization - s: Smoothing parameter for probability calculations, defaults to 1.0 - ignore_first_neighbours: Number of closest neighbors to ignore, defaults to 0 - """ self.k = k self.embedder_config = EmbedderConfig.from_search_config(embedder_config) self.s = s diff --git a/autointent/modules/scoring/_sklearn/sklearn_scorer.py b/autointent/modules/scoring/_sklearn/sklearn_scorer.py index fbc8c4405..19e1a635f 100644 --- a/autointent/modules/scoring/_sklearn/sklearn_scorer.py +++ b/autointent/modules/scoring/_sklearn/sklearn_scorer.py @@ -35,10 +35,10 @@ class SklearnScorer(BaseScorer): This module uses embeddings generated from a transformer model to train chosen sklearn classifier for intent classification. - Attributes: - name: Name of the scorer, defaults to "sklearn" - supports_multilabel: Whether multilabel classification is supported - supports_multiclass: Whether multiclass classification is supported + Args: + clf_name: Name of the sklearn classifier to use + embedder_config: Config of the embedder model + **clf_args: Arguments for the chosen sklearn classifier Examples: >>> from autointent.modules.scoring import SklearnScorer @@ -65,11 +65,6 @@ def __init__( ) -> None: """Initialize the SklearnScorer. - Args: - clf_name: Name of the sklearn classifier to use - embedder_config: Config of the embedder model - **clf_args: Arguments for the chosen sklearn classifier - Raises: ValueError: If the specified classifier doesn't exist or lacks predict_proba """ @@ -99,9 +94,6 @@ def from_context( clf_name: Name of the sklearn classifier to use embedder_config: Config of the embedder, or None to use the best embedder **clf_args: Arguments for the chosen sklearn classifier - - Returns: - Initialized SklearnScorer instance """ if embedder_config is None: embedder_config = context.resolve_embedder() diff --git a/autointent/nodes/__init__.py b/autointent/nodes/__init__.py index c15db8d73..ecf236746 100644 --- a/autointent/nodes/__init__.py +++ b/autointent/nodes/__init__.py @@ -1,7 +1,7 @@ """Some core components used in AutoIntent behind the scenes.""" from ._inference_node import InferenceNode -from ._optimization import NodeOptimizer +from ._node_optimizer import NodeOptimizer __all__ = [ "InferenceNode", diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_node_optimizer.py similarity index 100% rename from autointent/nodes/_optimization/_node_optimizer.py rename to autointent/nodes/_node_optimizer.py diff --git a/autointent/nodes/_optimization/__init__.py b/autointent/nodes/_optimization/__init__.py deleted file mode 100644 index 9ec8136e9..000000000 --- a/autointent/nodes/_optimization/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from ._node_optimizer import NodeOptimizer - -__all__ = ["NodeOptimizer"] diff --git a/tests/generation/intents/test_description_generation.py b/tests/generation/intents/test_description_generation.py index 460917864..09f83efd4 100644 --- a/tests/generation/intents/test_description_generation.py +++ b/tests/generation/intents/test_description_generation.py @@ -4,13 +4,13 @@ import pytest from autointent import Dataset -from autointent.generation.intents.description_generation import ( +from autointent.generation.chat_templates import PromptDescription +from autointent.generation.intents._description_generation import ( create_intent_description, - enhance_dataset_with_descriptions, + generate_descriptions, generate_intent_descriptions, group_utterances_by_label, ) -from autointent.generation.intents.prompt_scheme import PromptDescription from autointent.schemas import Intent, Sample @@ -271,7 +271,7 @@ async def test_generate_intent_descriptions_empty_utterances_patterns(): def test_enhance_dataset_with_descriptions_basic(): client = AsyncMock() with patch( - "autointent.generation.intents.description_generation.generate_intent_descriptions", + "autointent.generation.intents._description_generation.generate_intent_descriptions", new=AsyncMock( return_value=[ Intent(id=1, name="Greeting", description="Generated description"), @@ -294,7 +294,7 @@ def test_enhance_dataset_with_descriptions_basic(): prompt = PromptDescription( text="Describe intent {intent_name} with examples: {user_utterances} and patterns: {regex_patterns}", ) - enhanced_dataset = enhance_dataset_with_descriptions( + enhanced_dataset = generate_descriptions( dataset=dataset, client=client, prompt=prompt, @@ -319,7 +319,7 @@ def test_enhance_dataset_with_descriptions_basic(): def test_enhance_dataset_with_existing_descriptions(): client = AsyncMock() with patch( - "autointent.generation.intents.description_generation.generate_intent_descriptions", + "autointent.generation.intents._description_generation.generate_intent_descriptions", new=AsyncMock( return_value=[ Intent(id=0, name="Greeting", description="Existing description"), @@ -342,7 +342,7 @@ def test_enhance_dataset_with_existing_descriptions(): prompt = PromptDescription( text="Describe intent {intent_name} with examples: {user_utterances} and patterns: {regex_patterns}", ) - enhanced_dataset = enhance_dataset_with_descriptions( + enhanced_dataset = generate_descriptions( dataset=dataset, client=client, prompt=prompt, diff --git a/tests/generation/utterances/test_balancer.py b/tests/generation/utterances/test_balancer.py index 86c3e332f..58d109c3e 100644 --- a/tests/generation/utterances/test_balancer.py +++ b/tests/generation/utterances/test_balancer.py @@ -7,8 +7,8 @@ from autointent import Dataset from autointent.custom_types import Split +from autointent.generation.chat_templates import EnglishSynthesizerTemplate from autointent.generation.utterances import DatasetBalancer, Generator -from autointent.generation.utterances.basic.chat_templates._synthesizer_en import EnglishSynthesizerTemplate logger = logging.getLogger(__name__) diff --git a/tests/generation/utterances/test_basic_synthesizer.py b/tests/generation/utterances/test_basic_synthesizer.py index ed173f64c..f89ebac82 100644 --- a/tests/generation/utterances/test_basic_synthesizer.py +++ b/tests/generation/utterances/test_basic_synthesizer.py @@ -1,6 +1,7 @@ from unittest.mock import AsyncMock, Mock -from autointent.generation.utterances import EnglishSynthesizerTemplate, UtteranceGenerator +from autointent.generation.chat_templates import EnglishSynthesizerTemplate +from autointent.generation.utterances import UtteranceGenerator def has_unfilled_fields(template): diff --git a/tests/generation/utterances/test_evolver.py b/tests/generation/utterances/test_evolver.py index b85d2dac4..ee2b4f31a 100644 --- a/tests/generation/utterances/test_evolver.py +++ b/tests/generation/utterances/test_evolver.py @@ -2,7 +2,8 @@ import pytest -from autointent.generation.utterances import AbstractEvolution, IncrementalUtteranceEvolver, UtteranceEvolver +from autointent.generation.chat_templates import AbstractEvolution +from autointent.generation.utterances import IncrementalUtteranceEvolver, UtteranceEvolver def test_on_dataset_incremental(dataset): diff --git a/tests/generation/utterances/test_generator.py b/tests/generation/utterances/test_generator.py index ad1930b9b..e7685612d 100644 --- a/tests/generation/utterances/test_generator.py +++ b/tests/generation/utterances/test_generator.py @@ -2,8 +2,8 @@ import pytest +from autointent.generation.chat_templates import Message from autointent.generation.utterances.generator import Generator -from autointent.generation.utterances.schemas import Message @pytest.fixture(autouse=True)