Skip to content

Commit 3c5ed18

Browse files
adversarial augmentation
1 parent c6681e7 commit 3c5ed18

File tree

3 files changed

+190
-0
lines changed

3 files changed

+190
-0
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from .critic_human_like import CriticHumanLike
2+
from .human_utterance_generator import HumanUtteranceGenerator
3+
4+
__all__ = ["CriticHumanLike", "HumanUtteranceGenerator"]
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
"""CriticHumanLike class for distinguishing human vs generated utterances."""
2+
3+
from typing import Literal
4+
5+
from pydantic import BaseModel
6+
7+
from autointent.generation import Generator
8+
from autointent.generation.chat_templates import Message, Role
9+
10+
11+
class CriticResponse(BaseModel):
12+
"""Structured answer."""
13+
reasoning: str
14+
label: Literal["human", "generated"]
15+
explanation: str
16+
17+
class CriticHumanLike:
18+
"""A simple critic class that classifies user utterances as either 'human' or 'generated'.
19+
20+
using an LLM-based binary classifier prompt.
21+
"""
22+
23+
def __init__(self, generator: Generator)-> None:
24+
"""Initialize the CriticFirst.
25+
26+
Args:
27+
generator: Wrapper for the LLM API to generate classification responses.
28+
"""
29+
self.generator = generator
30+
31+
def build_classification_prompt(self, example: str, intent_name: str) -> Message:
32+
"""Args.
33+
34+
example: The user utterance to classify.
35+
intent_name: The name of the intent associated with the utterance.
36+
37+
Returns:
38+
Message: A formatted message prompt for classification.
39+
"""
40+
content = (
41+
"You are a critic that determines whether a user utterance was written by a human or "
42+
"generated by a language model.\n\n"
43+
f"Intent: {intent_name}\n"
44+
f'Utterance: "{example}"\n\n'
45+
"Respond in **JSON format** with three keys:\n"
46+
"- `reasoning`: a short chain-of-thought where you explain your logic\n"
47+
"- `label`: must be either `human` or `generated`\n"
48+
"- `explanation`: a concise summary of your decision\n\n"
49+
"Example:\n"
50+
"{\n"
51+
' "reasoning": "The phrasing includes casual contractions and natural hesitation. The utterance '
52+
'flows similarly to how a human would speak spontaneously.",\n'
53+
' "label": "human",\n'
54+
' "explanation": "The utterance includes natural hesitation and informal phrasing '
55+
'typical of human speech."\n'
56+
"}"
57+
)
58+
return Message(role=Role.USER, content=content)
59+
60+
def is_human(self, utterance: str, intent_name: str) -> bool:
61+
"""Args.
62+
63+
utterance: The utterance to evaluate.
64+
intent_name: The associated intent.
65+
66+
Returns:
67+
bool: True if classified as human, False otherwise.
68+
"""
69+
messages = self.build_classification_prompt(utterance, intent_name)
70+
response: CriticResponse = self.generator.get_structured_output_sync(
71+
messages=messages,
72+
output_model=CriticResponse,
73+
max_retries=3)
74+
return response.label == "human"
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
from collections import defaultdict
2+
3+
from datasets import Dataset as HFDataset
4+
from datasets import concatenate_datasets
5+
6+
from autointent import Dataset
7+
from autointent.custom_types import Split
8+
from autointent.generation import Generator
9+
from autointent.generation.chat_templates._evolution_templates_schemas import Message, Role
10+
from autointent.schemas import Sample
11+
12+
from .critic_human_like import CriticHumanLike
13+
14+
15+
class HumanUtteranceGenerator:
16+
"""Generator of human-like utterances.
17+
18+
This class rewrites given user utterances to make them sound more natural and human-like,
19+
while preserving their original intent. The generation process is iterative and attempts
20+
to bypass a critic that identifies machine-generated text.
21+
"""
22+
23+
def __init__(self, generator: Generator, critic: CriticHumanLike)-> None:
24+
"""Initialize the CritlUtteranceGenerator.
25+
26+
Args:
27+
generator: Wrapper for the LLM API used to generate utterances.
28+
critic: Critic to determine whether the generated utterance sounds human-like.
29+
"""
30+
self.generator = generator
31+
self.critic = critic
32+
33+
def augment(
34+
self,
35+
dataset: Dataset,
36+
split_name: str = Split.TRAIN,
37+
update_split: bool = True,
38+
n_final_per_class: int = 5
39+
) -> list[Sample]:
40+
"""Generate human-like utterances for each intent by iteratively refining machine-generated candidates.
41+
42+
Args:
43+
dataset: The dataset to augment.
44+
split_name: The name of the split to augment (e.g., 'train').
45+
update_split: Whether to update the dataset split with the new utterances.
46+
n_final_per_class: Number of successful utterances to generate per intent.
47+
48+
Returns:
49+
list[Sample]: List of newly generated samples.
50+
"""
51+
original_split = dataset[split_name]
52+
id_to_name = {intent.id: intent.name for intent in dataset.intents}
53+
new_samples = []
54+
55+
class_to_samples = defaultdict(list)
56+
for sample in original_split:
57+
class_to_samples[sample["label"]].append(sample["utterance"])
58+
59+
for intent_id, intent_name in id_to_name.items():
60+
generated_count = 0
61+
attempt = 0
62+
63+
seed_utterances = class_to_samples.get(intent_id, [])
64+
if not seed_utterances:
65+
continue
66+
67+
while generated_count < n_final_per_class and attempt < n_final_per_class * 3:
68+
attempt += 1
69+
seed = seed_utterances[attempt % len(seed_utterances)]
70+
rejected = []
71+
72+
for _ in range(3):
73+
prompt = self._build_adversarial_prompt(seed, intent_name, rejected)
74+
generated = self.generator.get_chat_completion([prompt]).strip()
75+
76+
if self.critic.is_human(generated, intent_name):
77+
new_samples.append({
78+
Dataset.label_feature: intent_id,
79+
Dataset.utterance_feature: generated
80+
})
81+
generated_count += 1
82+
break
83+
rejected.append(generated)
84+
85+
if update_split:
86+
generated_split = HFDataset.from_list(new_samples)
87+
dataset[split_name] = concatenate_datasets([original_split, generated_split])
88+
89+
return [Sample(**sample) for sample in new_samples]
90+
91+
def _build_adversarial_prompt(self, example: str, intent_name: str, rejected: list[str]) -> Message:
92+
"""Build an adversarial prompt to guide the model in generating more human-like utterances.
93+
94+
Args:
95+
example: The original utterance to be modified.
96+
intent_name: The intent of the utterance.
97+
rejected: List of previously rejected generations.
98+
99+
Returns:
100+
Message: A formatted prompt guiding the generator to improve naturalness.
101+
"""
102+
rejected_block = "\n".join(f"- {r}" for r in rejected) if rejected else "None"
103+
content = (
104+
"Your task is to rewrite the following user utterance so that it sounds as natural "
105+
"and human-like as possible, while preserving its original intent: "
106+
f"'{intent_name}'.\n\n"
107+
f'Original utterance: "{example}"\n\n'
108+
f"The following previous attempts were classified as machine-generated and rejected:\n{rejected_block}\n\n"
109+
"Try to write something that would pass as written by a real human. Output a single version only.\n"
110+
"IMPORTANT: You must modify the original utterance."
111+
)
112+
return Message(role=Role.USER, content=content)

0 commit comments

Comments
 (0)