исправление ошибок

Tetragrammaton123 · Tetragrammaton123 · commit cc647eef96dc · 2025-10-02T16:17:44.000+03:00
diff --git a/src/autointent/generation/utterances/__init__.py b/src/autointent/generation/utterances/__init__.py
@@ -1,14 +1,14 @@
 """Generative methods for enriching dataset with synthetic samples."""
 
+from ._adversarial import CriticHumanLike, HumanUtteranceGenerator
 from ._basic import DatasetBalancer, UtteranceGenerator
 from ._evolution import IncrementalUtteranceEvolver, UtteranceEvolver
-from ._adversarial import HumanUtteranceGenerator, CriticHumanLike
 
 __all__ = [
+    "CriticHumanLike",
     "DatasetBalancer",
+    "HumanUtteranceGenerator",
     "IncrementalUtteranceEvolver",
     "UtteranceEvolver",
     "UtteranceGenerator",
-    "HumanUtteranceGenerator",
-    "CriticHumanLike"
 ]
diff --git a/src/autointent/generation/utterances/_adversarial/__init__.py b/src/autointent/generation/utterances/_adversarial/__init__.py
@@ -1,4 +1,4 @@
 from .critic_human_like import CriticHumanLike
 from .human_utterance_generator import HumanUtteranceGenerator
 
-__all__ = ["HumanUtteranceGenerator", "CriticHumanLike"]
+__all__ = ["CriticHumanLike", "HumanUtteranceGenerator"]
diff --git a/src/autointent/generation/utterances/_adversarial/human_utterance_generator.py b/src/autointent/generation/utterances/_adversarial/human_utterance_generator.py
@@ -3,6 +3,7 @@
 import random
 from collections import defaultdict
 from functools import partial
+from typing import Any
 
 import aiometer
 from datasets import Dataset as HFDataset
@@ -123,8 +124,8 @@ async def augment_async(
         for sample in original_split:
             class_to_samples[sample["label"]].append(sample["utterance"])
 
-        async def generate_one(intent_id: str, intent_name: str) -> list[dict[str, str]]:
-            generated: list[dict[str, str]] = []
+        async def generate_one(intent_id: str, intent_name: str) -> list[dict[str, Any]]:
+            generated: list[dict[str, Any]] = []
             attempts = 0
             seed_utterances = class_to_samples[intent_id]
             while len(generated) < n_final_per_class and attempts < n_final_per_class * 3:
@@ -136,7 +137,7 @@ async def generate_one(intent_id: str, intent_name: str) -> list[dict[str, str]]
                     prompt = self._build_adversarial_prompt(intent_name, seed_examples, rejected)
                     utterance = (await self.generator.get_chat_completion_async([prompt])).strip()
                     if await self.critic.is_human_async(utterance, intent_name):
-                        generated.append({Dataset.label_feature: intent_id, Dataset.utterance_feature: utterance})
+                        generated.append({Dataset.label_feature: int(intent_id), Dataset.utterance_feature: utterance})
                         break
                     rejected.append(utterance)
             return generated
@@ -155,8 +156,6 @@ async def generate_one(intent_id: str, intent_name: str) -> list[dict[str, str]]
 
         for result in results:
             new_samples.extend(result)
-        for s in new_samples:
-            s['label'] = int(s['label'])
         if update_split:
             generated_split = HFDataset.from_list(new_samples)
             dataset[split_name] = concatenate_datasets([original_split, generated_split])
diff --git a/tests/generation/utterances/test_adversarial.py b/tests/generation/utterances/test_adversarial.py
@@ -1,20 +1,18 @@
-from unittest.mock import Mock, AsyncMock
-import pytest
-from autointent.generation import Generator
+from unittest.mock import AsyncMock, Mock
 
-from autointent.generation.utterances import HumanUtteranceGenerator, CriticHumanLike
-from autointent import Dataset, Sample
+from autointent import Sample
+from autointent.generation.utterances import CriticHumanLike, HumanUtteranceGenerator
 
 
 def test_human_utterance_generator_sync(dataset):
     mock_llm = Mock()
     mock_llm.get_chat_completion.return_value = "Human-like utterance"
-    
+
     mock_critic = Mock(spec=CriticHumanLike)
     mock_critic.is_human.return_value = True
 
     generator = HumanUtteranceGenerator(mock_llm, mock_critic, async_mode=False)
-    
+
     n_before = len(dataset["train_0"])
     new_samples = generator.augment(dataset, split_name="train_0", update_split=False, n_final_per_class=2)
     n_after = len(dataset["train_0"])
@@ -29,7 +27,7 @@ def test_human_utterance_generator_sync(dataset):
 def test_human_utterance_generator_async(dataset):
     mock_llm = AsyncMock()
     mock_llm.get_chat_completion_async.return_value = "Human-like utterance"
-    
+
     mock_critic = AsyncMock(spec=CriticHumanLike)
     mock_critic.is_human_async.return_value = True
     generator = HumanUtteranceGenerator(mock_llm, mock_critic, async_mode=True)
@@ -56,4 +54,4 @@ def test_human_utterance_generator_respects_critic(dataset):
 
     new_samples = generator.augment(dataset, split_name="train_0", update_split=False, n_final_per_class=1)
     assert len(new_samples) > 0
-    assert all(mock_critic.is_human.call_count >= 1 for _ in range(len(new_samples)))
+    assert all(mock_critic.is_human.call_count >= 1 for _ in range(len(new_samples)))