adversarial augmentation (#251)

Tetragrammaton123 · voorhs · web-flow · commit 40d2986eafac · 2025-10-04T11:50:03.000+03:00
* adversarial augmentation

* async mode, lint, typing, etc

* async update (aiometer), в гугл колабе ускорилось в раза 2-3

* async

* mypy fix

* mypy again and init

* fix

* move to proper directory

* run formatter

* тесты для адверсариал аугментации

* исправление ошибок

* опять ошибка

* я не сдамся

* add disclaimer

---------

Co-authored-by: voorhs &lt;ilya_alekseev_2016@list.ru&gt;
diff --git a/src/autointent/generation/utterances/__init__.py b/src/autointent/generation/utterances/__init__.py
@@ -1,10 +1,13 @@
 """Generative methods for enriching dataset with synthetic samples."""
 
+from ._adversarial import CriticHumanLike, HumanUtteranceGenerator
 from ._basic import DatasetBalancer, UtteranceGenerator
 from ._evolution import IncrementalUtteranceEvolver, UtteranceEvolver
 
 __all__ = [
+    "CriticHumanLike",
     "DatasetBalancer",
+    "HumanUtteranceGenerator",
     "IncrementalUtteranceEvolver",
     "UtteranceEvolver",
     "UtteranceGenerator",
diff --git a/src/autointent/generation/utterances/_adversarial/__init__.py b/src/autointent/generation/utterances/_adversarial/__init__.py
@@ -0,0 +1,4 @@
+from .critic_human_like import CriticHumanLike
+from .human_utterance_generator import HumanUtteranceGenerator
+
+__all__ = ["CriticHumanLike", "HumanUtteranceGenerator"]
diff --git a/src/autointent/generation/utterances/_adversarial/critic_human_like.py b/src/autointent/generation/utterances/_adversarial/critic_human_like.py
@@ -0,0 +1,83 @@
+"""CriticHumanLike class for distinguishing human vs generated utterances."""
+
+from typing import Literal
+
+from pydantic import BaseModel
+
+from autointent.generation import Generator
+from autointent.generation.chat_templates import Message, Role
+
+
+class CriticResponse(BaseModel):
+    """Structured answer."""
+
+    reasoning: str
+    label: Literal["human", "generated"]
+
+
+class CriticHumanLike:
+    """A simple critic class that classifies user utterances as either 'human' or 'generated'.
+
+    using an LLM-based binary classifier prompt.
+    """
+
+    def __init__(self, generator: Generator, max_retries: int = 3) -> None:
+        """Initialize the CriticFirst.
+
+        Args:
+            generator: Wrapper for the LLM API to generate classification responses.
+            max_retries: Maximum number of attempts to retry classification if the response is invalid.
+        """
+        self.generator = generator
+        self.max_retries = max_retries
+
+    def build_classification_prompt(self, example: str, intent_name: str) -> Message:
+        """Args.
+
+            example: The user utterance to classify.
+            intent_name: The name of the intent associated with the utterance.
+
+        Returns:
+            Message: A formatted message prompt for classification.
+        """
+        content = (
+            "You are a critic that determines whether a user utterance was written by a human or "
+            "generated by a language model.\n\n"
+            f"Intent: {intent_name}\n"
+            f'Utterance: "{example}"\n\n'
+            "Here is an example of a human-written utterance for this intent:\n"
+            '"Could you please help me find the nearest coffee shop?"\n\n'
+            "Respond in **JSON format** with three keys:\n"
+            "- `reasoning`: a short chain-of-thought where you explain your logic\n"
+            "- `label`: must be either `human` or `generated`\n"
+            "Example:\n"
+            "{\n"
+            '  "reasoning": "The phrasing includes casual contractions and natural hesitation. The utterance '
+            'flows similarly to how a human would speak spontaneously.",\n'
+            '  "label": "human",\n'
+            "}"
+        )
+        return Message(role=Role.USER, content=content)
+
+    def is_human(self, utterance: str, intent_name: str) -> bool:
+        """Args.
+
+            utterance: The utterance to evaluate.
+            intent_name: The associated intent.
+
+        Returns:
+            bool: True if classified as human, False otherwise.
+        """
+        message = self.build_classification_prompt(utterance, intent_name)
+        response = self.generator.get_structured_output_sync(
+            messages=[message], output_model=CriticResponse, max_retries=self.max_retries
+        )
+        return response.label == "human"
+
+    async def is_human_async(self, utterance: str, intent_name: str) -> bool:
+        message = self.build_classification_prompt(utterance, intent_name)
+
+        response = await self.generator.get_structured_output_async(
+            messages=[message], output_model=CriticResponse, max_retries=self.max_retries
+        )
+        return response.label == "human"
diff --git a/src/autointent/generation/utterances/_adversarial/human_utterance_generator.py b/src/autointent/generation/utterances/_adversarial/human_utterance_generator.py
@@ -0,0 +1,194 @@
+import asyncio
+import logging
+import random
+from collections import defaultdict
+from functools import partial
+from typing import Any
+
+import aiometer
+from datasets import Dataset as HFDataset
+from datasets import concatenate_datasets
+
+from autointent import Dataset
+from autointent.custom_types import Split
+from autointent.generation import Generator
+from autointent.generation.chat_templates._evolution_templates_schemas import Message, Role
+from autointent.schemas import Sample
+
+from .critic_human_like import CriticHumanLike
+
+logger = logging.getLogger(__name__)
+
+
+class HumanUtteranceGenerator:
+    """Generator of human-like utterances.
+
+    This class rewrites given user utterances to make them sound more natural and human-like,
+    while preserving their original intent. The generation process is iterative and attempts
+    to bypass a critic that identifies machine-generated text.
+
+    .. warning:: This method is experimental and can yield inferior data quality.
+
+    """
+
+    def __init__(
+        self,
+        generator: Generator,
+        critic: CriticHumanLike,
+        async_mode: bool = False,
+        max_at_once: int = 5,
+        max_per_second: int = 10,
+    ) -> None:
+        """Initialize the HumanUtteranceGeneratoror.
+
+        Args:
+            generator: Wrapper for the LLM API used to generate utterances.
+            critic: Critic to determine whether the generated utterance sounds human-like.
+            async_mode: Whether to use asynchronous mode for generation.
+            max_at_once: Maximum number of concurrent async tasks.
+            max_per_second: Maximum number of tasks per second.
+        """
+        self.generator = generator
+        self.critic = critic
+        self.async_mode = async_mode
+        self.max_at_once = max_at_once
+        self.max_per_second = max_per_second
+
+    def augment(
+        self, dataset: Dataset, split_name: str = Split.TRAIN, update_split: bool = True, n_final_per_class: int = 5
+    ) -> list[Sample]:
+        """Generate human-like utterances for each intent by iteratively refining machine-generated candidates.
+
+        Args:
+            dataset: The dataset to augment.
+            split_name: The name of the split to augment (e.g., 'train').
+            update_split: Whether to update the dataset split with the new utterances.
+            n_final_per_class: Number of successful utterances to generate per intent.
+
+        Returns:
+            list[Sample]: List of newly generated samples.
+        """
+        if self.async_mode:
+            return asyncio.run(
+                self.augment_async(
+                    dataset=dataset,
+                    split_name=split_name,
+                    update_split=update_split,
+                    n_final_per_class=n_final_per_class,
+                )
+            )
+        original_split = dataset[split_name]
+        id_to_name = {intent.id: intent.name for intent in dataset.intents}
+        new_samples = []
+
+        class_to_samples = defaultdict(list)
+        for sample in original_split:
+            class_to_samples[sample["label"]].append(sample["utterance"])
+
+        for intent_id, intent_name in id_to_name.items():
+            if intent_name is None:
+                logger.warning("Intent with id %s has no name! Skipping it...", intent_id)
+                continue
+            generated_count = 0
+            attempt = 0
+
+            seed_utterances = class_to_samples.get(intent_id, [])
+            if not seed_utterances:
+                continue
+
+            while generated_count < n_final_per_class and attempt < n_final_per_class * 3:
+                attempt += 1
+                n_seeds = min(3, len(seed_utterances))
+                seed_examples = random.sample(seed_utterances, k=n_seeds)
+                rejected: list[str] = []
+
+                for _ in range(3):
+                    prompt = self._build_adversarial_prompt(intent_name, seed_examples, rejected)
+                    generated = self.generator.get_chat_completion([prompt]).strip()
+                    if self.critic.is_human(generated, intent_name):
+                        new_samples.append({Dataset.label_feature: intent_id, Dataset.utterance_feature: generated})
+                        generated_count += 1
+                        break
+                    rejected.append(generated)
+        if update_split:
+            generated_split = HFDataset.from_list(new_samples)
+            dataset[split_name] = concatenate_datasets([original_split, generated_split])
+
+        return [Sample(**sample) for sample in new_samples]
+
+    async def augment_async(
+        self, dataset: Dataset, split_name: str = Split.TRAIN, update_split: bool = True, n_final_per_class: int = 5
+    ) -> list[Sample]:
+        original_split = dataset[split_name]
+        id_to_name = {intent.id: intent.name for intent in dataset.intents}
+        new_samples = []
+
+        class_to_samples = defaultdict(list)
+        for sample in original_split:
+            class_to_samples[sample["label"]].append(sample["utterance"])
+
+        async def generate_one(intent_id: str, intent_name: str) -> list[dict[str, Any]]:
+            generated: list[dict[str, Any]] = []
+            attempts = 0
+            seed_utterances = class_to_samples[intent_id]
+            while len(generated) < n_final_per_class and attempts < n_final_per_class * 3:
+                attempts += 1
+                seed_examples = random.sample(seed_utterances, k=min(3, len(seed_utterances)))
+                rejected: list[str] = []
+
+                for _ in range(3):
+                    prompt = self._build_adversarial_prompt(intent_name, seed_examples, rejected)
+                    utterance = (await self.generator.get_chat_completion_async([prompt])).strip()
+                    if await self.critic.is_human_async(utterance, intent_name):
+                        generated.append({Dataset.label_feature: int(intent_id), Dataset.utterance_feature: utterance})
+                        break
+                    rejected.append(utterance)
+            return generated
+
+        tasks = [
+            partial(generate_one, str(intent_id), intent_name)
+            for intent_id, intent_name in id_to_name.items()
+            if class_to_samples.get(intent_id) and intent_name is not None
+        ]
+
+        results = await aiometer.run_all(
+            tasks,
+            max_at_once=self.max_at_once,
+            max_per_second=self.max_per_second,
+        )
+
+        for result in results:
+            new_samples.extend(result)
+        if update_split:
+            generated_split = HFDataset.from_list(new_samples)
+            dataset[split_name] = concatenate_datasets([original_split, generated_split])
+
+        return [Sample(**sample) for sample in new_samples]
+
+    def _build_adversarial_prompt(self, intent_name: str, seed_examples: list[str], rejected: list[str]) -> Message:
+        """Build a few-shot prompt.
+
+        Build a few-shot prompt to guide the generator to create a new human-like utterance
+        from scratch based on the intent name and example utterances.
+
+        Args:
+            intent_name: The intent of the utterance.
+            seed_examples: List of 1-3 example utterances for the intent.
+            rejected: List of previously rejected generations.
+
+        Returns:
+            Message: A formatted prompt instructing the generator to produce a new natural-sounding utterance..
+        """
+        rejected_block = "\n".join(f"- {r}" for r in rejected) if rejected else "None"
+        examples_block = "\n".join(f'- "{ex}"' for ex in seed_examples)
+        content = (
+            f"Your task is to generate a new user utterance that fits the intent '{intent_name}'.\n\n"
+            "Here are some examples of utterances for this intent:\n"
+            f"{examples_block}\n\n"
+            "Preserving its original intent: "
+            f"'{intent_name}'.\n\n"
+            f"The following previous attempts were classified as machine-generated and rejected:\n{rejected_block}\n\n"
+            "Try to write something that would pass as written by a real human. Output a single version only.\n"
+            "IMPORTANT: You must modify the original utterance."
+        )
+        return Message(role=Role.USER, content=content)
diff --git a/tests/generation/utterances/test_adversarial.py b/tests/generation/utterances/test_adversarial.py
@@ -0,0 +1,75 @@
+from unittest.mock import AsyncMock, Mock
+
+import pytest
+
+from autointent import Dataset
+from autointent.generation.utterances import CriticHumanLike, HumanUtteranceGenerator
+from autointent.schemas import Sample
+
+
+@pytest.fixture
+def dataset():
+    return Dataset.from_dict(
+        {
+            "intents": [
+                {"id": 0, "name": "Greeting"},
+                {"id": 1, "name": "OrderFood"},
+            ],
+            "train": [
+                {"utterance": "hello", "label": 0},
+                {"utterance": "hi there", "label": 0},
+                {"utterance": "i want pizza", "label": 1},
+            ],
+        }
+    )
+
+
+def test_human_utterance_generator_sync(dataset):
+    mock_llm = Mock()
+    mock_llm.get_chat_completion.return_value = "Human-like utterance"
+
+    mock_critic = Mock(spec=CriticHumanLike)
+    mock_critic.is_human.return_value = True
+
+    generator = HumanUtteranceGenerator(mock_llm, mock_critic, async_mode=False)
+
+    n_before = len(dataset["train"])
+    new_samples = generator.augment(dataset, split_name="train", update_split=False, n_final_per_class=2)
+    n_after = len(dataset["train"])
+
+    assert n_before == n_after
+    assert len(new_samples) > 0
+    assert all(isinstance(sample, Sample) for sample in new_samples)
+    assert all("utterance" in sample.dict() for sample in new_samples)
+    assert all("label" in sample.dict() for sample in new_samples)
+
+
+def test_human_utterance_generator_async(dataset):
+    mock_llm = AsyncMock()
+    mock_llm.get_chat_completion_async.return_value = "Human-like utterance"
+
+    mock_critic = AsyncMock(spec=CriticHumanLike)
+    mock_critic.is_human_async.return_value = True
+
+    generator = HumanUtteranceGenerator(mock_llm, mock_critic, async_mode=True)
+
+    n_before = len(dataset["train"])
+    new_samples = generator.augment(dataset, split_name="train", update_split=False, n_final_per_class=2)
+    n_after = len(dataset["train"])
+    assert n_before == n_after
+    assert len(new_samples) > 0
+    assert all(isinstance(sample, Sample) for sample in new_samples)
+    assert all("utterance" in sample.dict() for sample in new_samples)
+    assert all("label" in sample.dict() for sample in new_samples)
+
+
+def test_human_utterance_generator_respects_critic(dataset):
+    mock_llm = Mock()
+    mock_llm.get_chat_completion.return_value = "Generated utterance"
+
+    mock_critic = Mock(spec=CriticHumanLike)
+    mock_critic.is_human.return_value = True
+    generator = HumanUtteranceGenerator(mock_llm, mock_critic, async_mode=False)
+    new_samples = generator.augment(dataset, split_name="train", update_split=False, n_final_per_class=1)
+    assert len(new_samples) > 0
+    assert mock_critic.is_human.call_count >= 1