deeppavlov · voorhs · Feb 10, 2025 · Jan 28, 2025 · Jan 28, 2025 · Jan 28, 2025
diff --git a/autointent/generation/intents/prompt_scheme.py b/autointent/generation/intents/prompt_scheme.py
@@ -2,7 +2,7 @@
 
 from pydantic import BaseModel, field_validator
 
-from autointent.generation.utterances.prompts import PROMPT_DESCRIPTION
+from autointent.generation.intents.prompts import PROMPT_DESCRIPTION
 
 
 class PromptDescription(BaseModel):

diff --git a/autointent/generation/utterances/prompts.py → autointent/generation/intents/prompts.py b/autointent/generation/utterances/prompts.py → autointent/generation/intents/prompts.py
diff --git a/autointent/generation/utterances/__init__.py b/autointent/generation/utterances/__init__.py
@@ -1,14 +1,28 @@
 from .basic import SynthesizerChatTemplate, UtteranceGenerator
-from .evolution import AbstractEvolution, ConcreteEvolution, EvolutionChatTemplate, ReasoningEvolution, UtteranceEvolver
+from .evolution import (
+                        AbstractEvolution,
+                        ConcreteEvolution,
+                        EvolutionChatTemplate,
+                        FormalEvolution,
+                        FunnyEvolution,
+                        GoofyEvolution,
+                        InformalEvolution,
+                        ReasoningEvolution,
+                        UtteranceEvolver,
+)
 from .generator import Generator
 
 __all__ = [
-    "AbstractEvolution",
-    "ConcreteEvolution",
-    "EvolutionChatTemplate",
-    "Generator",
-    "ReasoningEvolution",
-    "SynthesizerChatTemplate",
-    "UtteranceEvolver",
-    "UtteranceGenerator",
+                        "AbstractEvolution",
+                        "ConcreteEvolution",
+                        "EvolutionChatTemplate",
+                        "FormalEvolution",
+                        "FunnyEvolution",
+                        "Generator",
+                        "GoofyEvolution",
+                        "InformalEvolution",
+                        "ReasoningEvolution",
+                        "SynthesizerChatTemplate",
+                        "UtteranceEvolver",
+                        "UtteranceGenerator",
 ]
diff --git a/autointent/generation/utterances/basic/cli.py b/autointent/generation/utterances/basic/cli.py
@@ -14,7 +14,7 @@
 
 
 def main() -> None:
-    """ClI endpoint."""
+    """CLI endpoint."""
     parser = ArgumentParser()
     parser.add_argument(
         "--input-path",
@@ -48,11 +48,12 @@ def main() -> None:
         default=5,
         help="Number of utterances to use as an example for augmentation",
     )
+    parser.add_argument("--async-mode", action="store_true", help="Enable asynchronous generation")
     args = parser.parse_args()
 
     dataset = load_dataset(args.input_path)
     template = SynthesizerChatTemplate(dataset, args.split, max_sample_utterances=args.n_sample_utterances)
-    generator = UtteranceGenerator(Generator(), template)
+    generator = UtteranceGenerator(Generator(), template, async_mode=args.async_mode)
 
     n_before = len(dataset[args.split])
     new_samples = generator.augment(dataset, split_name=args.split, n_generations=args.n_generations)

diff --git a/autointent/generation/utterances/basic/utterance_generator.py b/autointent/generation/utterances/basic/utterance_generator.py
@@ -1,5 +1,6 @@
 """Basic generation of new utterances from existing ones."""
 
+import asyncio
 from collections.abc import Callable
 
 from datasets import Dataset as HFDataset
@@ -17,46 +18,105 @@ class UtteranceGenerator:
     Basic generation of new utterances from existing ones.
 
     This augmentation method simply prompts LLM to look at existing examples
-    and generate similar. Additionaly it can consider some aspects of style,
-    punctuation and length of the desired generations.
+    and generate similar. Additionally, it can consider some aspects of style,
+    punctuation, and length of the desired generations.
     """
 
-    def __init__(self, generator: Generator, prompt_maker: Callable[[Intent, int], list[Message]]) -> None:
+    def __init__(
+        self, generator: Generator, prompt_maker: Callable[[Intent, int], list[Message]], async_mode: bool = False
+    ) -> None:
         """Initialize."""
         self.generator = generator
         self.prompt_maker = prompt_maker
+        self.async_mode = async_mode
 
     def __call__(self, intent_data: Intent, n_generations: int) -> list[str]:
         """Generate new utterances."""
         messages = self.prompt_maker(intent_data, n_generations)
         response_text = self.generator.get_chat_completion(messages)
         return _extract_utterances(response_text)
 
+    async def _call_async(self, intent_data: Intent, n_generations: int) -> list[str]:
+        """Generate new utterances asynchronously."""
+        messages = self.prompt_maker(intent_data, n_generations)
+        response_text = await self.generator.get_chat_completion_async(messages)
+        return _extract_utterances(response_text)
+
     def augment(
         self,
         dataset: Dataset,
         split_name: str = Split.TRAIN,
         n_generations: int = 5,
         update_split: bool = True,
+        batch_size: int = 4,
     ) -> list[Sample]:
         """
         Augment some split of dataset.
 
-        TODO Note that for now it supports only single-label datasets.
+        :param dataset: Dataset object
+        :param split_name: Dataset split (default is TRAIN)
+        :param n_generations: Number of utterances to generate per intent
+        :param update_split: Whether to update the dataset split
+        :param batch_size: Batch size for async generation
+        :return: List of generated samples
         """
+        if self.async_mode:
+            return asyncio.get_event_loop().run_until_complete(
+                self._augment_async(dataset, split_name, n_generations, update_split, batch_size)
+            )
+
         original_split = dataset[split_name]
         new_samples = []
         for intent in dataset.intents:
-            generated_utterances = self(
-                intent_data=intent,
-                n_generations=n_generations,
+            generated_utterances = self(intent_data=intent, n_generations=n_generations)
+            new_samples.extend(
+                [{Dataset.label_feature: intent.id, Dataset.utterance_feature: ut} for ut in generated_utterances]
             )
+
+        if update_split:
+            generated_split = HFDataset.from_list(new_samples)
+            dataset[split_name] = concatenate_datasets([original_split, generated_split])
+
+        return [Sample(**sample) for sample in new_samples]
+
+    async def _augment_async(
+        self,
+        dataset: Dataset,
+        split_name: str = Split.TRAIN,
+        n_generations: int = 5,
+        update_split: bool = True,
+        batch_size: int = 4,
+    ) -> list[Sample]:
+        """
+        Augment some split of dataset asynchronously in batches.
+
+        :param dataset: Dataset object
+        :param split_name: Dataset split (default is TRAIN)
+        :param n_generations: Number of utterances to generate per intent
+        :param update_split: Whether to update the dataset split
+        :param batch_size: Batch size for async generation
+        :return: List of generated samples
+        """
+        original_split = dataset[split_name]
+        new_samples = []
+
+        results = []
+        for start_idx in range(0, len(dataset.intents), batch_size):
+            batch_intents = dataset.intents[start_idx : start_idx + batch_size]
+            tasks = [self._call_async(intent_data=intent, n_generations=n_generations) for intent in batch_intents]
+            batch_results = await asyncio.gather(*tasks)
+            results.extend(batch_results)
+
+        for i, generated_utterances in enumerate(results):
+            intent = dataset.intents[i]
             new_samples.extend(
                 [{Dataset.label_feature: intent.id, Dataset.utterance_feature: ut} for ut in generated_utterances]
             )
+
         if update_split:
             generated_split = HFDataset.from_list(new_samples)
             dataset[split_name] = concatenate_datasets([original_split, generated_split])
+
         return [Sample(**sample) for sample in new_samples]
 
 
@@ -68,4 +128,4 @@ def _extract_utterances(response_text: str) -> list[str]:
     """
     raw_utterances = response_text.split("\n")
     # remove enumeration
-    return [ut[ut.find(" ") + 1 :] for ut in raw_utterances]
+    return [ut[ut.find(" ") + 1 :] if " " in ut else ut for ut in raw_utterances]
diff --git a/autointent/generation/utterances/evolution/__init__.py b/autointent/generation/utterances/evolution/__init__.py
@@ -1,4 +1,23 @@
-from .chat_templates import AbstractEvolution, ConcreteEvolution, EvolutionChatTemplate, ReasoningEvolution
+from .chat_templates import (
+                             AbstractEvolution,
+                             ConcreteEvolution,
+                             EvolutionChatTemplate,
+                             FormalEvolution,
+                             FunnyEvolution,
+                             GoofyEvolution,
+                             InformalEvolution,
+                             ReasoningEvolution,
+)
 from .evolver import UtteranceEvolver
 
-__all__ = ["AbstractEvolution", "ConcreteEvolution", "EvolutionChatTemplate", "ReasoningEvolution", "UtteranceEvolver"]
+__all__ = [
+                             "AbstractEvolution",
+                             "ConcreteEvolution",
+                             "EvolutionChatTemplate",
+                             "FormalEvolution",
+                             "FunnyEvolution",
+                             "GoofyEvolution",
+                             "InformalEvolution",
+                             "ReasoningEvolution",
+                             "UtteranceEvolver",
+]
diff --git a/autointent/generation/utterances/evolution/chat_templates/__init__.py b/autointent/generation/utterances/evolution/chat_templates/__init__.py
@@ -1,6 +1,19 @@
 from .abstract import AbstractEvolution
 from .base import EvolutionChatTemplate
 from .concrete import ConcreteEvolution
+from .formal import FormalEvolution
+from .funny import FunnyEvolution
+from .goofy import GoofyEvolution
+from .informal import InformalEvolution
 from .reasoning import ReasoningEvolution
 
-__all__ = ["AbstractEvolution", "ConcreteEvolution", "EvolutionChatTemplate", "ReasoningEvolution"]
+__all__ = [
+    "AbstractEvolution",
+    "ConcreteEvolution",
+    "EvolutionChatTemplate",
+    "FormalEvolution",
+    "FunnyEvolution",
+    "GoofyEvolution",
+    "InformalEvolution",
+    "ReasoningEvolution",
+]
diff --git a/autointent/generation/utterances/evolution/chat_templates/abstract.py b/autointent/generation/utterances/evolution/chat_templates/abstract.py
@@ -28,9 +28,9 @@ class AbstractEvolution(EvolutionChatTemplate):
         ),
         Message(role=Role.ASSISTANT, content="Please, reserve a table for me."),
         Message(
-            role=Role.ASSISTANT,
+            role=Role.USER,
             content=(
-                "Intent name: requesting technical support"
+                "Intent name: requesting technical support\n"
                 "Utterance: My Lenovo laptop is constantly rebooting and overheating."
             ),
         ),

diff --git a/autointent/generation/utterances/evolution/chat_templates/concrete.py b/autointent/generation/utterances/evolution/chat_templates/concrete.py
@@ -29,7 +29,10 @@ class ConcreteEvolution(EvolutionChatTemplate):
         Message(role=Role.ASSISTANT, content="I want to reserve a table for 4 persons at 9 pm."),
         Message(
             role=Role.USER,
-            content=("Intent name: requesting technical support\n" "Utterance: I'm having trouble with my laptop."),
+            content=(
+                "Intent name: requesting technical support\n"
+                "Utterance: I'm having trouble with my laptop."
+            ),
         ),
         Message(role=Role.ASSISTANT, content="My laptop is constantly rebooting and overheating."),
     ]

diff --git a/autointent/generation/utterances/evolution/chat_templates/formal.py b/autointent/generation/utterances/evolution/chat_templates/formal.py
@@ -0,0 +1,48 @@
+"""Chat template for formal tone augmentation."""
+
+from typing import ClassVar
+
+from autointent.generation.utterances.schemas import Message, Role
+from autointent.schemas import Intent
+
+from .base import EvolutionChatTemplate
+
+
+class FormalEvolution(EvolutionChatTemplate):
+    """Chat template for formal tone augmentation."""
+
+    _messages: ClassVar[list[Message]] = [
+        Message(
+            role=Role.USER,
+            content=(
+                "I want you to act as a rewriter. "
+                "You will be provided with an utterance and the topic (name of intent class) of the utterance. "
+                "You need to rewrite the utterance in a more formal tone using the following method:\n"
+                "1. Rewrite the utterance in a more formal tone.\n"
+                "2. Use polite and professional language while maintaining clarity.\n"
+                "3. The rewritten utterance should be grammatically correct and complete.\n"
+                "4. Keep the rewritten utterance within 15 words.\n\n"
+                "Intent name: Reserve Restaurant"
+                "Utterance: I want to reserve a table for 4 persons at 9 pm."
+            ),
+        ),
+        Message(role=Role.ASSISTANT, content="I would like to make a reservation for four guests at 9 pm."),
+        Message(
+            role=Role.ASSISTANT,
+            content=(
+                "Intent name: requesting technical support\n"
+                "Utterance: My Lenovo laptop is constantly rebooting and overheating."
+            ),
+        ),
+        Message(
+            role=Role.ASSISTANT,
+            content="My Lenovo laptop frequently restarts and experiences overheating issues. Kindly assist.",
+        ),
+    ]
+
+    def __call__(self, utterance: str, intent_data: Intent) -> list[Message]:
+        """Generate chat for formal tone adaptation."""
+        return [
+            *self._messages,
+            Message(role=Role.USER, content=f"Intent name: {intent_data.name or ''}\nUtterance: {utterance}"),
+        ]
diff --git a/autointent/generation/utterances/evolution/chat_templates/funny.py b/autointent/generation/utterances/evolution/chat_templates/funny.py
@@ -0,0 +1,46 @@
+"""Chat template for humorous tone augmentation."""
+
+from typing import ClassVar
+
+from autointent.generation.utterances.schemas import Message, Role
+from autointent.schemas import Intent
+
+from .base import EvolutionChatTemplate
+
+
+class FunnyEvolution(EvolutionChatTemplate):
+    """Chat template for humorous tone augmentation."""
+
+    _messages: ClassVar[list[Message]] = [
+        Message(
+            role=Role.USER,
+            content=(
+                "I want you to act as a rewriter. "
+                "You will be provided with an utterance and the topic (name of intent class) of the utterance. "
+                "You need to rewrite the utterance in a humorous way while maintaining its original meaning using "
+                "the following method:\n"
+                "1. Rewrite the utterance in a humorous way while maintaining its original meaning.\n"
+                "2. Use wordplay, exaggeration, or lighthearted phrasing.\n"
+                "3. The rewritten utterance should still be understandable and relevant.\n"
+                "4. Keep it within 15 words.\n\n"
+                "Intent name: Reserve Restaurant"
+                "Utterance: I want to reserve a table for 4 persons at 9 pm."
+            ),
+        ),
+        Message(role=Role.ASSISTANT, content="Gotta feed my squad at 9 pm. Got a table for us?"),
+        Message(
+            role=Role.USER,
+            content=(
+                "Intent name: requesting technical support\n"
+                "Utterance: My Lenovo laptop is constantly rebooting and overheating."
+            ),
+        ),
+        Message(role=Role.ASSISTANT, content="My Lenovo thinks it's a phoenix—keeps dying and rising in flames."),
+    ]
+
+    def __call__(self, utterance: str, intent_data: Intent) -> list[Message]:
+        """Generate chat for humorous tone adaptation."""
+        return [
+            *self._messages,
+            Message(role=Role.USER, content=f"Intent name: {intent_data.name or ''}\nUtterance: {utterance}"),
+        ]