diff --git a/autointent/_dataset/_validation.py b/autointent/_dataset/_validation.py index 555c5df5a..7d6ce6e12 100644 --- a/autointent/_dataset/_validation.py +++ b/autointent/_dataset/_validation.py @@ -101,7 +101,7 @@ def _validate_classes(self, splits: list[list[Sample]]) -> int: ) raise ValueError(message) if not n_classes[0]: - message = "Number of classes is zero or undefined. " "Ensure at least one class is present in the splits." + message = "Number of classes is zero or undefined. Ensure at least one class is present in the splits." raise ValueError(message) return n_classes[0] @@ -120,8 +120,7 @@ def _validate_intents(self, n_classes: int) -> "DatasetReader": intent_ids = [intent.id for intent in self.intents] if intent_ids != list(range(len(self.intents))): message = ( - f"Invalid intent IDs. Expected sequential IDs from 0 to {len(self.intents) - 1}, " - f"but got {intent_ids}." + f"Invalid intent IDs. Expected sequential IDs from 0 to {len(self.intents) - 1}, but got {intent_ids}." ) raise ValueError(message) return self diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py index 6cf1478f9..0a677c811 100644 --- a/autointent/context/data_handler/_data_handler.py +++ b/autointent/context/data_handler/_data_handler.py @@ -236,7 +236,7 @@ def _split_cv(self) -> None: random_seed=self.random_seed, allow_oos_in_train=True, ) - self.dataset[f"{Split.TRAIN}_{self.config.n_folds-1}"] = self.dataset.pop(Split.TRAIN) + self.dataset[f"{Split.TRAIN}_{self.config.n_folds - 1}"] = self.dataset.pop(Split.TRAIN) def _split_validation_from_train(self, size: float) -> None: if Split.TRAIN in self.dataset: diff --git a/autointent/generation/utterances/basic/chat_templates/_base.py b/autointent/generation/utterances/basic/chat_templates/_base.py index 0c39d89fb..7c66d716e 100644 --- a/autointent/generation/utterances/basic/chat_templates/_base.py +++ b/autointent/generation/utterances/basic/chat_templates/_base.py @@ -66,10 +66,10 @@ def __call__(self, intent_data: Intent, n_examples: int) -> list[Message]: ] def _create_final_message(self, intent_data: Intent, n_examples: int, sample_utterances: list[str]) -> Message: - content = f"{self._INTENT_NAME_LABEL}: {intent_data.name}\n\n" f"{self._EXAMPLE_UTTERANCES_LABEL}:\n" + content = f"{self._INTENT_NAME_LABEL}: {intent_data.name}\n\n{self._EXAMPLE_UTTERANCES_LABEL}:\n" if sample_utterances: - numbered_utterances = "\n".join(f"{i+1}. {utt}" for i, utt in enumerate(sample_utterances)) + numbered_utterances = "\n".join(f"{i + 1}. {utt}" for i, utt in enumerate(sample_utterances)) content += numbered_utterances + "\n\n" content += self._GENERATE_INSTRUCTION.format(n_examples=n_examples) diff --git a/autointent/generation/utterances/basic/chat_templates/_synthesizer_en.py b/autointent/generation/utterances/basic/chat_templates/_synthesizer_en.py index b305563f7..62914bd8f 100644 --- a/autointent/generation/utterances/basic/chat_templates/_synthesizer_en.py +++ b/autointent/generation/utterances/basic/chat_templates/_synthesizer_en.py @@ -70,7 +70,7 @@ class EnglishSynthesizerTemplate(BaseSynthesizerTemplate): ), Message( role=Role.ASSISTANT, - content=("1. Can you tell me the forecast for tomorrow?\n" "2. Is it going to rain this weekend?"), + content="1. Can you tell me the forecast for tomorrow?\n2. Is it going to rain this weekend?", ), Message( role=Role.USER, diff --git a/autointent/generation/utterances/basic/chat_templates/_synthesizer_ru.py b/autointent/generation/utterances/basic/chat_templates/_synthesizer_ru.py index 82c74daba..54416060b 100644 --- a/autointent/generation/utterances/basic/chat_templates/_synthesizer_ru.py +++ b/autointent/generation/utterances/basic/chat_templates/_synthesizer_ru.py @@ -53,7 +53,7 @@ class RussianSynthesizerTemplate(BaseSynthesizerTemplate): ), Message( role=Role.ASSISTANT, - content=("1. Забронируйте люкс в Санкт-Петербурге на выходные\n" "2. Ищу номер с видом на море в Сочи"), + content=("1. Забронируйте люкс в Санкт-Петербурге на выходные\n2. Ищу номер с видом на море в Сочи"), ), Message( role=Role.USER, @@ -66,7 +66,7 @@ class RussianSynthesizerTemplate(BaseSynthesizerTemplate): ), Message( role=Role.ASSISTANT, - content=("1. Какой прогноз на завтра?\n" "2. Будет ли дождь в субботу?"), + content=("1. Какой прогноз на завтра?\n2. Будет ли дождь в субботу?"), ), Message( role=Role.USER, diff --git a/autointent/generation/utterances/basic/utterance_generator.py b/autointent/generation/utterances/basic/utterance_generator.py index ac6189f8c..a69df3c85 100644 --- a/autointent/generation/utterances/basic/utterance_generator.py +++ b/autointent/generation/utterances/basic/utterance_generator.py @@ -1,15 +1,14 @@ """Basic generation of new utterances from existing ones.""" import asyncio -from collections.abc import Callable from datasets import Dataset as HFDataset from datasets import concatenate_datasets from autointent import Dataset from autointent.custom_types import Split +from autointent.generation.utterances.basic.chat_templates import BaseSynthesizerTemplate from autointent.generation.utterances.generator import Generator -from autointent.generation.utterances.schemas import Message from autointent.schemas import Intent, Sample @@ -22,9 +21,7 @@ class UtteranceGenerator: punctuation, and length of the desired generations. """ - def __init__( - self, generator: Generator, prompt_maker: Callable[[Intent, int], list[Message]], async_mode: bool = False - ) -> None: + def __init__(self, generator: Generator, prompt_maker: BaseSynthesizerTemplate, async_mode: bool = False) -> None: """Initialize.""" self.generator = generator self.prompt_maker = prompt_maker diff --git a/autointent/generation/utterances/evolution/chat_templates/__init__.py b/autointent/generation/utterances/evolution/chat_templates/__init__.py index f6c4e2902..e38ce5df6 100644 --- a/autointent/generation/utterances/evolution/chat_templates/__init__.py +++ b/autointent/generation/utterances/evolution/chat_templates/__init__.py @@ -7,7 +7,13 @@ from .informal import InformalEvolution from .reasoning import ReasoningEvolution +EVOLUTION_NAMES = [evolution.name for evolution in EvolutionChatTemplate.__subclasses__()] + +EVOLUTION_MAPPING = {evolution.name: evolution() for evolution in EvolutionChatTemplate.__subclasses__()} + __all__ = [ + "EVOLUTION_MAPPING", + "EVOLUTION_NAMES", "AbstractEvolution", "ConcreteEvolution", "EvolutionChatTemplate", diff --git a/autointent/generation/utterances/evolution/chat_templates/abstract.py b/autointent/generation/utterances/evolution/chat_templates/abstract.py index 20c2da5b2..6698e027d 100644 --- a/autointent/generation/utterances/evolution/chat_templates/abstract.py +++ b/autointent/generation/utterances/evolution/chat_templates/abstract.py @@ -3,7 +3,6 @@ from typing import ClassVar from autointent.generation.utterances.schemas import Message, Role -from autointent.schemas import Intent from .base import EvolutionChatTemplate @@ -11,6 +10,7 @@ class AbstractEvolution(EvolutionChatTemplate): """Chat template for evolution augmentation via abstraction.""" + name = "abstract" _messages: ClassVar[list[Message]] = [ Message( role=Role.USER, @@ -36,10 +36,3 @@ class AbstractEvolution(EvolutionChatTemplate): ), Message(role=Role.ASSISTANT, content="I'm having trouble with my laptop."), ] - - def __call__(self, utterance: str, intent_data: Intent) -> list[Message]: - """Make chat to complete.""" - return [ - *self._messages, - Message(role=Role.USER, content=f"Intent name: {intent_data.name or ''}\nUtterance: {utterance}"), - ] diff --git a/autointent/generation/utterances/evolution/chat_templates/base.py b/autointent/generation/utterances/evolution/chat_templates/base.py index cb119d1be..1a6c7bb1f 100644 --- a/autointent/generation/utterances/evolution/chat_templates/base.py +++ b/autointent/generation/utterances/evolution/chat_templates/base.py @@ -1,14 +1,21 @@ """Base class for chat templates for evolution augmentation.""" -from abc import ABC, abstractmethod +from typing import ClassVar -from autointent.generation.utterances.schemas import Message +from autointent.generation.utterances.schemas import Message, Role from autointent.schemas import Intent -class EvolutionChatTemplate(ABC): +class EvolutionChatTemplate: """Base class for chat templates for evolution augmentation.""" - @abstractmethod + _messages: ClassVar[list[Message]] + name: str + def __call__(self, utterance: str, intent_data: Intent) -> list[Message]: """Make a chat to complete by LLM.""" + invoke_message = Message( + role=Role.USER, + content=f"Intent name: {intent_data.name or ''}\nUtterance: {utterance}", + ) + return [*self._messages, invoke_message] diff --git a/autointent/generation/utterances/evolution/chat_templates/concrete.py b/autointent/generation/utterances/evolution/chat_templates/concrete.py index dcca78bac..b6cf984e2 100644 --- a/autointent/generation/utterances/evolution/chat_templates/concrete.py +++ b/autointent/generation/utterances/evolution/chat_templates/concrete.py @@ -3,7 +3,6 @@ from typing import ClassVar from autointent.generation.utterances.schemas import Message, Role -from autointent.schemas import Intent from .base import EvolutionChatTemplate @@ -11,6 +10,8 @@ class ConcreteEvolution(EvolutionChatTemplate): """Chat template for evolution augmentation via concretizing.""" + name = "concrete" + _messages: ClassVar[list[Message]] = [ Message( role=Role.USER, @@ -29,14 +30,7 @@ class ConcreteEvolution(EvolutionChatTemplate): Message(role=Role.ASSISTANT, content="I want to reserve a table for 4 persons at 9 pm."), Message( role=Role.USER, - content=("Intent name: requesting technical support\n" "Utterance: I'm having trouble with my laptop."), + content="Intent name: requesting technical support\nUtterance: I'm having trouble with my laptop.", ), Message(role=Role.ASSISTANT, content="My laptop is constantly rebooting and overheating."), ] - - def __call__(self, utterance: str, intent_data: Intent) -> list[Message]: - """Make chat to complete.""" - return [ - *self._messages, - Message(role=Role.USER, content=f"Intent name: {intent_data.name or ''}\nUtterance: {utterance}"), - ] diff --git a/autointent/generation/utterances/evolution/chat_templates/formal.py b/autointent/generation/utterances/evolution/chat_templates/formal.py index a527826c9..9ba0d5364 100644 --- a/autointent/generation/utterances/evolution/chat_templates/formal.py +++ b/autointent/generation/utterances/evolution/chat_templates/formal.py @@ -3,7 +3,6 @@ from typing import ClassVar from autointent.generation.utterances.schemas import Message, Role -from autointent.schemas import Intent from .base import EvolutionChatTemplate @@ -11,6 +10,8 @@ class FormalEvolution(EvolutionChatTemplate): """Chat template for formal tone augmentation.""" + name: str = "formal" + _messages: ClassVar[list[Message]] = [ Message( role=Role.USER, @@ -39,10 +40,3 @@ class FormalEvolution(EvolutionChatTemplate): content="My Lenovo laptop frequently restarts and experiences overheating issues. Kindly assist.", ), ] - - def __call__(self, utterance: str, intent_data: Intent) -> list[Message]: - """Generate chat for formal tone adaptation.""" - return [ - *self._messages, - Message(role=Role.USER, content=f"Intent name: {intent_data.name or ''}\nUtterance: {utterance}"), - ] diff --git a/autointent/generation/utterances/evolution/chat_templates/funny.py b/autointent/generation/utterances/evolution/chat_templates/funny.py index 0f401b464..2b799a3d6 100644 --- a/autointent/generation/utterances/evolution/chat_templates/funny.py +++ b/autointent/generation/utterances/evolution/chat_templates/funny.py @@ -3,7 +3,6 @@ from typing import ClassVar from autointent.generation.utterances.schemas import Message, Role -from autointent.schemas import Intent from .base import EvolutionChatTemplate @@ -11,6 +10,7 @@ class FunnyEvolution(EvolutionChatTemplate): """Chat template for humorous tone augmentation.""" + name: str = "funny" _messages: ClassVar[list[Message]] = [ Message( role=Role.USER, @@ -37,10 +37,3 @@ class FunnyEvolution(EvolutionChatTemplate): ), Message(role=Role.ASSISTANT, content="My Lenovo thinks it's a phoenix—keeps dying and rising in flames."), ] - - def __call__(self, utterance: str, intent_data: Intent) -> list[Message]: - """Generate chat for humorous tone adaptation.""" - return [ - *self._messages, - Message(role=Role.USER, content=f"Intent name: {intent_data.name or ''}\nUtterance: {utterance}"), - ] diff --git a/autointent/generation/utterances/evolution/chat_templates/goofy.py b/autointent/generation/utterances/evolution/chat_templates/goofy.py index c53156054..5650eba4f 100644 --- a/autointent/generation/utterances/evolution/chat_templates/goofy.py +++ b/autointent/generation/utterances/evolution/chat_templates/goofy.py @@ -3,7 +3,6 @@ from typing import ClassVar from autointent.generation.utterances.schemas import Message, Role -from autointent.schemas import Intent from .base import EvolutionChatTemplate @@ -11,6 +10,7 @@ class GoofyEvolution(EvolutionChatTemplate): """Chat template for goofy tone augmentation.""" + name: str = "goofy" _messages: ClassVar[list[Message]] = [ Message( role=Role.USER, @@ -39,10 +39,3 @@ class GoofyEvolution(EvolutionChatTemplate): role=Role.ASSISTANT, content="My laptop's having an existential crisis—keeps rebooting and melting. Help!" ), ] - - def __call__(self, utterance: str, intent_data: Intent) -> list[Message]: - """Generate chat for goofy tone adaptation.""" - return [ - *self._messages, - Message(role=Role.USER, content=f"Intent name: {intent_data.name or ''}\nUtterance: {utterance}"), - ] diff --git a/autointent/generation/utterances/evolution/chat_templates/informal.py b/autointent/generation/utterances/evolution/chat_templates/informal.py index 7d9243f20..ee3debeb3 100644 --- a/autointent/generation/utterances/evolution/chat_templates/informal.py +++ b/autointent/generation/utterances/evolution/chat_templates/informal.py @@ -3,7 +3,6 @@ from typing import ClassVar from autointent.generation.utterances.schemas import Message, Role -from autointent.schemas import Intent from .base import EvolutionChatTemplate @@ -11,6 +10,7 @@ class InformalEvolution(EvolutionChatTemplate): """Chat template for informal tone augmentation.""" + name: str = "informal" _messages: ClassVar[list[Message]] = [ Message( role=Role.USER, @@ -36,10 +36,3 @@ class InformalEvolution(EvolutionChatTemplate): ), Message(role=Role.ASSISTANT, content="My Lenovo keeps crashing and getting super hot. Any ideas?"), ] - - def __call__(self, utterance: str, intent_data: Intent) -> list[Message]: - """Generate chat for informal tone adaptation.""" - return [ - *self._messages, - Message(role=Role.USER, content=f"Intent name: {intent_data.name or ''}\nUtterance: {utterance}"), - ] diff --git a/autointent/generation/utterances/evolution/chat_templates/reasoning.py b/autointent/generation/utterances/evolution/chat_templates/reasoning.py index 39791acc9..283c24515 100644 --- a/autointent/generation/utterances/evolution/chat_templates/reasoning.py +++ b/autointent/generation/utterances/evolution/chat_templates/reasoning.py @@ -3,7 +3,6 @@ from typing import ClassVar from autointent.generation.utterances.schemas import Message, Role -from autointent.schemas import Intent from .base import EvolutionChatTemplate @@ -11,6 +10,8 @@ class ReasoningEvolution(EvolutionChatTemplate): """Chat template for evolution augmentation via reasoning.""" + name = "reasoning" + _messages: ClassVar[list[Message]] = [ Message( role=Role.USER, @@ -36,10 +37,3 @@ class ReasoningEvolution(EvolutionChatTemplate): ), Message(role=Role.ASSISTANT, content="I don't know what's happening with my laptop."), ] - - def __call__(self, utterance: str, intent_data: Intent) -> list[Message]: - """Make chat to complete.""" - return [ - *self._messages, - Message(role=Role.USER, content=f"Intent name: {intent_data.name or ''}\nUtterance: {utterance}"), - ] diff --git a/autointent/generation/utterances/evolution/cli.py b/autointent/generation/utterances/evolution/cli.py index b5d0fd7c8..b7afe84c8 100644 --- a/autointent/generation/utterances/evolution/cli.py +++ b/autointent/generation/utterances/evolution/cli.py @@ -8,13 +8,8 @@ from autointent.generation.utterances.generator import Generator from .chat_templates import ( - AbstractEvolution, - ConcreteEvolution, - FormalEvolution, - FunnyEvolution, - GoofyEvolution, - InformalEvolution, - ReasoningEvolution, + EVOLUTION_MAPPING, + EVOLUTION_NAMES, ) logging.basicConfig(level="INFO") @@ -44,14 +39,8 @@ def _parse_args() -> Namespace: ) parser.add_argument("--private", action="store_true", help="Publish privately if --output-repo option is used") parser.add_argument("--n-evolutions", type=int, default=1, help="Number of utterances to generate for each intent") - parser.add_argument("--decide-for-me", action="store_true") - parser.add_argument("--reasoning", action="store_true", help="Whether to use `Reasoning` evolution") - parser.add_argument("--concretizing", action="store_true", help="Whether to use `Concretizing` evolution") - parser.add_argument("--abstract", action="store_true", help="Whether to use `Abstract` evolution") - parser.add_argument("--formal", action="store_true", help="Whether to use `Formal` evolution") - parser.add_argument("--funny", action="store_true", help="Whether to use `Funny` evolution") - parser.add_argument("--goofy", action="store_true", help="Whether to use `Goofy` evolution") - parser.add_argument("--informal", action="store_true", help="Whether to use `Informal` evolution") + parser.add_argument("--decide-for-me", action="store_true", help="Enable incremental evolution") + parser.add_argument("--template", type=str, choices=EVOLUTION_NAMES, help="Template to use", nargs="+") parser.add_argument("--async-mode", action="store_true", help="Enable asynchronous generation") parser.add_argument("--seed", type=int, default=0) parser.add_argument("--batch-size", type=int, default=4) @@ -62,25 +51,8 @@ def _parse_args() -> Namespace: def main() -> None: """CLI endpoint.""" - mapping = { - "reasoning": ReasoningEvolution, - "concretizing": ConcreteEvolution, - "abstract": AbstractEvolution, - "formal": FormalEvolution, - "funny": FunnyEvolution, - "goofy": GoofyEvolution, - "informal": InformalEvolution, - } args = _parse_args() - evolutions = [] - - for arg_name, evolution_cls in mapping.items(): - if getattr(args, arg_name): - evolutions.append(evolution_cls()) # type: ignore[abstract] - - if not evolutions: - logger.warning("No evolutions selected. Exiting.") - return + evolutions = [EVOLUTION_MAPPING[template_name] for template_name in args.template] utterance_evolver: UtteranceEvolver if args.decide_for_me: @@ -103,7 +75,7 @@ def main() -> None: dataset.to_json(args.output_path) if args.output_repo is not None: - dataset.push_to_hub(args.output_repo) + dataset.push_to_hub(args.output_repo, args.private) if __name__ == "__main__": diff --git a/autointent/generation/utterances/evolution/evolver.py b/autointent/generation/utterances/evolution/evolver.py index 0de075f60..98424b682 100644 --- a/autointent/generation/utterances/evolution/evolver.py +++ b/autointent/generation/utterances/evolution/evolver.py @@ -6,15 +6,15 @@ import asyncio import random -from collections.abc import Callable, Sequence +from collections.abc import Sequence from datasets import Dataset as HFDataset from datasets import concatenate_datasets from autointent import Dataset from autointent.custom_types import Split +from autointent.generation.utterances.evolution.chat_templates import EvolutionChatTemplate from autointent.generation.utterances.generator import Generator -from autointent.generation.utterances.schemas import Message from autointent.schemas import Intent @@ -29,7 +29,7 @@ class UtteranceEvolver: def __init__( self, generator: Generator, - prompt_makers: Sequence[Callable[[str, Intent], list[Message]]], + prompt_makers: Sequence[EvolutionChatTemplate], seed: int = 0, async_mode: bool = False, ) -> None: diff --git a/autointent/generation/utterances/evolution/incremental_evolver.py b/autointent/generation/utterances/evolution/incremental_evolver.py index c8ac4fe95..4fb3b774b 100644 --- a/autointent/generation/utterances/evolution/incremental_evolver.py +++ b/autointent/generation/utterances/evolution/incremental_evolver.py @@ -5,7 +5,7 @@ """ import copy -from collections.abc import Callable, Sequence +from collections.abc import Sequence from pathlib import Path from typing import Any @@ -14,10 +14,9 @@ from autointent import Dataset, Pipeline from autointent.custom_types import Split +from autointent.generation.utterances.evolution.chat_templates import EvolutionChatTemplate from autointent.generation.utterances.evolution.evolver import UtteranceEvolver from autointent.generation.utterances.generator import Generator -from autointent.generation.utterances.schemas import Message -from autointent.schemas import Intent SEARCH_SPACE = [ { @@ -47,7 +46,7 @@ class IncrementalUtteranceEvolver(UtteranceEvolver): def __init__( self, generator: Generator, - prompt_makers: Sequence[Callable[[str, Intent], list[Message]]], + prompt_makers: Sequence[EvolutionChatTemplate], seed: int = 0, async_mode: bool = False, search_space: str | None = None, diff --git a/pyproject.toml b/pyproject.toml index a810de550..e9dd1069e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ classifiers=[ 'Framework :: Sphinx', 'Typing :: Typed', ] -requires-python = ">=3.10,<=4.0" +requires-python = ">=3.10,<4.0" dependencies = [ "sentence-transformers (>=3,<4)", "scikit-learn (>=1.5,<2.0)",